public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [Patch] 1/3 Add new builtin __builtin_flush_icache().
@ 2007-07-01  5:01 David Daney
  2007-07-01  5:05 ` [Patch] 2/3 MIPS support for " David Daney
                   ` (3 more replies)
  0 siblings, 4 replies; 34+ messages in thread
From: David Daney @ 2007-07-01  5:01 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1607 bytes --]

This series of patches adds a new built-in __builtin_flush_icache.
The first part is the target independent support.  The second adds
support to the MIPS back-end.  And finally the third changes the MIPS
libffi support to use the new builtin instead of making a library
call.

The prototype for the new built-in is:

void __builtin_flush_icache(void *location, size_t len);

As one might expect, it flushes the region of memory pointed to by
location with length len from the instruction cache.  The
new builtin and the flush_icache insn would be the preferred mechanism
to flushing the i-cache.

Looking at the various targets, it seems possible that in addition my
mips support, alpha, arc, arm, ia64, m32r, m68k, pa, rs6000, score,
sparc, and xtensa could do something useful for __builtin_flush_icache.

Bootstrapped and tested on x86_64-unknown-linux-gnu all default
languages with no regressions.

Also tested on:
 x86_64 cross to mipsel-linux --with-arch=mips32
 i686   cross to mipsel-linux --with-arch=mips32r2

with no regressions.

OK to commit?
2007-06-30  David Daney  <ddaney@avtrex.com>

    * builtins.def (BUILT_IN_FLUSH_ICACHE): New builtin.
    * builtins.c (expand_builtin_flush_icache): New function.
    (expand_builtin): Call expand_builtin_flush_icache for
    BUILT_IN_FLUSH_ICACHE.
    (fold_builtin_flush_icache): New function.
    (fold_builtin_2): Call fold_builtin_flush_icache for
    BUILT_IN_FLUSH_ICACHE.
    * testsuite/gcc.dg/builtins-64.c: New test.
    * testsuite/gcc.dg/builtins-65.c: New test.
    * doc/extend.texi (__builtin_flush_icache): Document new builtin.



[-- Attachment #2: flush-cache1.diff --]
[-- Type: text/x-patch, Size: 6150 bytes --]

Index: builtins.def
===================================================================
--- builtins.def	(revision 125997)
+++ builtins.def	(working copy)
@@ -1,6 +1,6 @@
 /* This file contains the definitions and documentation for the
    builtins used in the GNU compiler.
-   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
    Free Software Foundation, Inc.
 
 This file is part of GCC.
@@ -640,6 +640,7 @@ DEF_EXT_LIB_BUILTIN    (BUILT_IN_FFS, "f
 DEF_EXT_LIB_BUILTIN    (BUILT_IN_FFSIMAX, "ffsimax", BT_FN_INT_INTMAX, ATTR_CONST_NOTHROW_LIST)
 DEF_EXT_LIB_BUILTIN    (BUILT_IN_FFSL, "ffsl", BT_FN_INT_LONG, ATTR_CONST_NOTHROW_LIST)
 DEF_EXT_LIB_BUILTIN    (BUILT_IN_FFSLL, "ffsll", BT_FN_INT_LONGLONG, ATTR_CONST_NOTHROW_LIST)
+DEF_GCC_BUILTIN        (BUILT_IN_FLUSH_ICACHE, "flush_icache", BT_FN_VOID_PTR_SIZE, ATTR_NOTHROW_LIST)
 DEF_EXT_LIB_BUILTIN        (BUILT_IN_FORK, "fork", BT_FN_PID, ATTR_NOTHROW_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_FRAME_ADDRESS, "frame_address", BT_FN_PTR_UINT, ATTR_NULL)
 DEF_LIB_BUILTIN        (BUILT_IN_FREE, "free", BT_FN_VOID_PTR, ATTR_NOTHROW_LIST) 
Index: builtins.c
===================================================================
--- builtins.c	(revision 125997)
+++ builtins.c	(working copy)
@@ -5513,6 +5513,39 @@ expand_builtin_profile_func (bool exitp)
   return const0_rtx;
 }
 
+/* Expand a call to _builtin_flush_icache.  If the target does not
+   support the operation, it is a nop. */
+static rtx
+expand_builtin_flush_icache (tree exp)
+{
+  tree location, len;
+  rtx location_rtx, len_rtx;
+
+  if (!validate_arglist (exp, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
+    return NULL_RTX;
+
+  location = CALL_EXPR_ARG (exp, 0);
+  len = CALL_EXPR_ARG (exp, 1);
+
+  /* Evaluate LOCATION in case it has side-effects. */
+  location_rtx = expand_expr (location, NULL_RTX, Pmode, EXPAND_NORMAL);
+  location_rtx = convert_memory_address (Pmode, location_rtx);
+
+  /* If the LEN parameter is zero, return. */
+  if (integer_zerop (len))
+    return const0_rtx;
+
+  len_rtx = expand_normal (len);
+
+#ifdef HAVE_flush_icache
+  if (HAVE_flush_icache)
+    {
+      emit_insn (gen_flush_icache (location_rtx, len_rtx));
+    }
+#endif
+  return const0_rtx;
+}
+
 /* Given a trampoline address, make sure it satisfies TRAMPOLINE_ALIGNMENT.  */
 
 static rtx
@@ -6590,6 +6623,9 @@ expand_builtin (tree exp, rtx target, rt
     case BUILT_IN_PROFILE_FUNC_EXIT:
       return expand_builtin_profile_func (true);
 
+    case BUILT_IN_FLUSH_ICACHE:
+      return expand_builtin_flush_icache (exp);
+
     case BUILT_IN_INIT_TRAMPOLINE:
       return expand_builtin_init_trampoline (exp);
     case BUILT_IN_ADJUST_TRAMPOLINE:
@@ -9558,6 +9594,22 @@ fold_builtin_unordered_cmp (tree fndecl,
 		      fold_build2 (code, type, arg0, arg1));
 }
 
+/* Fold function call to builtin flush_icache.  Return
+   NULL_TREE if no simplification can be made.  */
+
+static tree
+fold_builtin_flush_icache (tree location, tree len)
+{
+  if (! validate_arg (location, POINTER_TYPE)
+      || ! validate_arg (len, INTEGER_TYPE))
+    return NULL_TREE;
+
+  /* If len parameter is zero, do nothing. */
+  if (integer_zerop (len))
+    return omit_one_operand (void_type_node, location, len);
+  return NULL_TREE;
+}
+
 /* Fold a call to built-in function FNDECL with 0 arguments.
    IGNORE is true if the result of the function call is ignored.  This
    function returns NULL_TREE if no simplification was possible.  */
@@ -10068,6 +10120,8 @@ fold_builtin_2 (tree fndecl, tree arg0, 
     case BUILT_IN_VFPRINTF:
       return fold_builtin_fprintf (fndecl, arg0, arg1, NULL_TREE,
 				   ignore, fcode);
+    case BUILT_IN_FLUSH_ICACHE:
+      return fold_builtin_flush_icache (arg0, arg1);
 
     default:
       break;
Index: testsuite/gcc.dg/builtins-64.c
===================================================================
--- testsuite/gcc.dg/builtins-64.c	(revision 0)
+++ testsuite/gcc.dg/builtins-64.c	(revision 0)
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+void f()
+{
+  char *memory = __builtin_alloca(40);
+  __builtin_flush_icache(memory, 40);
+}
+
+/* { dg-final { scan-tree-dump "__builtin_flush_icache" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: testsuite/gcc.dg/builtins-65.c
===================================================================
--- testsuite/gcc.dg/builtins-65.c	(revision 0)
+++ testsuite/gcc.dg/builtins-65.c	(revision 0)
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+void f()
+{
+  char *memory = __builtin_alloca(40);
+  /* __builtin_flush_icache should be folded away for size == 0. */
+  __builtin_flush_icache(memory, 0);
+}
+
+/* { dg-final { scan-tree-dump-not "__builtin_flush_icache" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: doc/extend.texi
===================================================================
--- doc/extend.texi	(revision 125997)
+++ doc/extend.texi	(working copy)
@@ -6086,6 +6086,19 @@ if (__builtin_expect (ptr != NULL, 1))
 when testing pointer or floating-point values.
 @end deftypefn
 
+@deftypefn {Built-in Function} void __builtin_flush_icache (const void *@var{addr}, size_t @var{len})
+This functions is used to flush the processor's instruction cache for
+the region of memory of length @var{len} at location @var{addr}.  Some
+targets require that the instruction cache be flushed, after modifying
+memory containing code, in order to obtain deterministic behavior.
+
+If the target does not require instruction cache flushes or otherwise
+does not have support for @code{__builtin_flush_icache}, the address
+expression is evaluated if it includes side effects but no other code
+is generated.  Currently MIPS is the only target with support for
+@code{__builtin_flush_icache}.
+@end deftypefn
+
 @deftypefn {Built-in Function} void __builtin_prefetch (const void *@var{addr}, ...)
 This function is used to minimize cache-miss latency by moving data into
 a cache before it is accessed.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [Patch] 2/3 MIPS support for builtin __builtin_flush_icache().
  2007-07-01  5:01 [Patch] 1/3 Add new builtin __builtin_flush_icache() David Daney
@ 2007-07-01  5:05 ` David Daney
  2007-07-01 10:56   ` Richard Sandiford
  2007-07-01  5:11 ` [Patch] 3/3 FFI: Use __builtin_flush_icache() to flush MIPS i-cache David Daney
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 34+ messages in thread
From: David Daney @ 2007-07-01  5:05 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1336 bytes --]

This is the second part of the __builtin_flush_icache patch.  It adds
support for MIPS.

flush_icache is expanded to either a library call or an in-line cache
flushing sequence if the ISA supports it.

It it is expanded in-line it is done by two insns (synci_loop and
clear_hazard).  A possible follow on optimization would be to fold the
clear_hazard into the next jump instruction that follows the synci_loop.

I also changed INITIALIZE_TRAMPOLINE to emit flush_icache instead of
the library call it used previously.

Bootstrapped and tested on x86_64-unknown-linux-gnu all default
languages with no regressions.

Also tested on:
 x86_64 cross to mipsel-linux --with-arch=mips32
 i686   cross to mipsel-linux --with-arch=mips32r2

with no regressions.

OK to commit?

2007-06-30  David Daney  <ddaney@avtrex.com>

    * config/mips/mips.h (ISA_HAS_SYNCI): New target capability
    predicate.
    (INITIALIZE_TRAMPOLINE): Emit flush_icache instead library call.
    * config/mips/mips.md (UNSPEC_SYNCI_LOOP): New constant
    (UNSPEC_CLEAR_HAZARD): New constant.
    (flush_icache): New expand.
    (synci_loop): New insn.
    (clear_hazard): New insn.
    * testsuite/gcc.target/mips/flush-icache-2.c: New test.
    * testsuite/gcc.target/mips/flush-icache-1.c: New test.
    * testsuite/gcc.target/mips/flush-icache-3.c: New test.


[-- Attachment #2: flush-cache2.diff --]
[-- Type: text/x-patch, Size: 5846 bytes --]

Index: config/mips/mips.h
===================================================================
--- config/mips/mips.h	(revision 125997)
+++ config/mips/mips.h	(working copy)
@@ -770,6 +770,10 @@ extern const struct mips_rtx_cost_data *
 				 || ISA_MIPS32R2			\
 				 || ISA_MIPS64				\
 				 || TARGET_MIPS5500)
+
+/* ISA includes synci, jr.hb and jalr.hb */
+#define ISA_HAS_SYNCI ISA_MIPS32R2
+
 \f
 /* Add -G xx support.  */
 
@@ -2122,15 +2126,7 @@ typedef struct mips_args {
   chain_addr = plus_constant (func_addr, GET_MODE_SIZE (ptr_mode));	    \
   emit_move_insn (gen_rtx_MEM (ptr_mode, func_addr), FUNC);		    \
   emit_move_insn (gen_rtx_MEM (ptr_mode, chain_addr), CHAIN);		    \
-									    \
-  /* Flush both caches.  We need to flush the data cache in case	    \
-     the system has a write-back cache.  */				    \
-  /* ??? Should check the return value for errors.  */			    \
-  if (mips_cache_flush_func && mips_cache_flush_func[0])		    \
-    emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mips_cache_flush_func),   \
-		       0, VOIDmode, 3, ADDR, Pmode,			    \
-		       GEN_INT (TRAMPOLINE_SIZE), TYPE_MODE (integer_type_node),\
-		       GEN_INT (3), TYPE_MODE (integer_type_node));	    \
+  emit_insn (gen_flush_icache (copy_rtx (ADDR), GEN_INT (TRAMPOLINE_SIZE))); \
 }
 \f
 /* Addressing modes, and classification of registers for them.  */
Index: config/mips/mips.md
===================================================================
--- config/mips/mips.md	(revision 125997)
+++ config/mips/mips.md	(working copy)
@@ -50,6 +50,8 @@ (define_constants
    (UNSPEC_TLS_GET_TP		28)
    (UNSPEC_MFHC1		31)
    (UNSPEC_MTHC1		32)
+   (UNSPEC_SYNCI_LOOP		33)
+   (UNSPEC_CLEAR_HAZARD		34)
 
    (UNSPEC_ADDRESS_FIRST	100)
 
@@ -4171,6 +4173,72 @@ (define_insn "cprestore"
 }
   [(set_attr "type" "store")
    (set_attr "length" "4,12")])
+
+(define_expand "flush_icache"
+  [(match_operand:SI 0 "general_operand" "r")
+   (match_operand:SI 1 "general_operand" "r")]
+  ""
+  "
+{
+  if (ISA_HAS_SYNCI)
+    {
+      emit_insn (gen_synci_loop (copy_rtx (operands[0]),
+                                           copy_rtx (operands[1])));
+      emit_insn (gen_clear_hazard ());
+    }
+  else
+    /* Flush both caches.  We need to flush the data cache in case
+       the system has a write-back cache.  */
+    /* ??? Should check the return value for errors.  */
+    if (mips_cache_flush_func && mips_cache_flush_func[0])
+      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mips_cache_flush_func),
+                         0, VOIDmode, 3, copy_rtx (operands[0]), Pmode,
+                         copy_rtx (operands[1]), TYPE_MODE (integer_type_node),
+                         GEN_INT (3), TYPE_MODE (integer_type_node));
+  DONE;
+}")
+
+(define_insn "synci_loop"
+  [(unspec_volatile[(match_operand:SI 0 "general_operand" "r")
+                    (match_operand:SI 1 "general_operand" "r")
+                    (clobber (match_scratch:SI 2 "=0"))
+                    (clobber (match_scratch:SI 3 "=1"))
+                    (clobber (match_scratch:SI 4 "=r"))
+		    (clobber (match_scratch:SI 5 "=r"))]
+                    UNSPEC_SYNCI_LOOP)]
+  "ISA_HAS_SYNCI"
+{
+  return ".set\tpush\n"
+         "\t.set\tnoreorder\n"
+         "\t.set\tnomacro\n"
+         "\taddu\t%3,%0,%1\n"
+         "\trdhwr\t%4,$1\n"
+         "1:\tsynci\t0(%2)\n"
+         "\tsltu\t%5,%2,%3\n"
+         "\tbne\t%5,$0,1b\n"
+         "\taddu\t%2,%2,%4\n"
+         "\tsync\n"
+         "\t.set\tpop";
+ }
+  [(set_attr "length" "28")])
+
+(define_insn "clear_hazard"
+  [(unspec_volatile [(clobber (match_scratch:SI 0 "=r"))] UNSPEC_CLEAR_HAZARD)
+   (clobber (reg:SI 31))]
+  "ISA_HAS_SYNCI"
+{
+  return ".set\tpush\n"
+         "\t.set\tnoreorder\n"
+         "\t.set\tnomacro\n"
+         "\tbal\t1f\n"
+         "\tnop\n"
+         "1:\taddiu\t%0,$31,12\n"
+         "\tjr.hb\t%0\n"
+         "\tnop\n"
+         "\t.set\tpop";
+}
+  [(set_attr "length" "20")])
+
 \f
 ;; Block moves, see mips.c for more details.
 ;; Argument 0 is the destination
Index: testsuite/gcc.target/mips/flush-icache-2.c
===================================================================
--- testsuite/gcc.target/mips/flush-icache-2.c	(revision 0)
+++ testsuite/gcc.target/mips/flush-icache-2.c	(revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-mips-options "-O2 -mips32" } */
+/* { dg-final { scan-assembler-not "synci" } } */
+/* { dg-final { scan-assembler-not "jr.hb" } } */
+/* { dg-final { scan-assembler "_flush_cache" } } */
+
+void f()
+{
+  int size = 40;
+  char *memory = __builtin_alloca(size);
+  __builtin_flush_icache(memory, size);
+}
+
Index: testsuite/gcc.target/mips/flush-icache-1.c
===================================================================
--- testsuite/gcc.target/mips/flush-icache-1.c	(revision 0)
+++ testsuite/gcc.target/mips/flush-icache-1.c	(revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-mips-options "-O2 -mips32r2" } */
+/* { dg-final { scan-assembler "synci" } } */
+/* { dg-final { scan-assembler "jr.hb" } } */
+/* { dg-final { scan-assembler-not "_flush_cache" } } */
+
+void f()
+{
+  int size = 40;
+  char *memory = __builtin_alloca(size);
+  __builtin_flush_icache(memory, size);
+}
+
Index: testsuite/gcc.target/mips/flush-icache-3.c
===================================================================
--- testsuite/gcc.target/mips/flush-icache-3.c	(revision 0)
+++ testsuite/gcc.target/mips/flush-icache-3.c	(revision 0)
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-mips-options "-O2 -mips32r2" } */
+/* { dg-final { scan-assembler-not "synci" } } */
+/* { dg-final { scan-assembler-not "jr.hb" } } */
+/* { dg-final { scan-assembler-not "_flush_cache" } } */
+
+void f()
+{
+  char *memory = __builtin_alloca(40);
+  __builtin_flush_icache(memory, 0);
+}
+

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [Patch] 3/3 FFI: Use __builtin_flush_icache() to flush MIPS i-cache.
  2007-07-01  5:01 [Patch] 1/3 Add new builtin __builtin_flush_icache() David Daney
  2007-07-01  5:05 ` [Patch] 2/3 MIPS support for " David Daney
@ 2007-07-01  5:11 ` David Daney
  2007-07-05  8:34   ` David Daney
  2007-07-01  8:01 ` [Patch] 1/3 Add new builtin __builtin_flush_icache() Paolo Bonzini
  2007-07-05  9:05 ` [Patch] 4/3 i386 target support for __builtin___clear_cache() David Daney
  3 siblings, 1 reply; 34+ messages in thread
From: David Daney @ 2007-07-01  5:11 UTC (permalink / raw)
  To: gcc-patches, java-patches

[-- Attachment #1: Type: text/plain, Size: 847 bytes --]

This is the third and final part of the __builtin_flush_icache patch.
It replaces the hard coded library call to cacheflush() in the MIPS
ffi_prep_closure_loc with a call to __builtin_flush_icache();

For targets that support user-space i-cache flushing (mips32r2), this
will eliminate a system call from the ffi closure preparation path.
On all other targets the generated code is the same as before.

Bootstrapped and tested on x86_64-unknown-linux-gnu all default
languages with no regressions.

Also tested on:
 x86_64 cross to mipsel-linux --with-arch=mips32
 i686   cross to mipsel-linux --with-arch=mips32r2

with no regressions.

OK to commit?

libffi/:
2007-06-30  David Daney  <ddaney@avtrex.com>

    * src/mips/ffi.c: Don't include sys/cachectl.h.
    (ffi_prep_closure_loc): Use __builtin_flush_icache() instead of
    cacheflush().


[-- Attachment #2: flush-cache3.diff --]
[-- Type: text/x-patch, Size: 617 bytes --]

Index: src/mips/ffi.c
===================================================================
--- src/mips/ffi.c	(revision 125997)
+++ src/mips/ffi.c	(working copy)
@@ -27,7 +27,6 @@
 #include <ffi_common.h>
 
 #include <stdlib.h>
-#include <sys/cachectl.h>
 
 #if _MIPS_SIM == _ABIN32
 #define FIX_ARGP \
@@ -525,8 +524,7 @@ ffi_prep_closure_loc (ffi_closure *closu
   closure->fun = fun;
   closure->user_data = user_data;
 
-  /* XXX this is available on Linux, but anything else? */
-  cacheflush (codeloc, FFI_TRAMPOLINE_SIZE, ICACHE);
+  __builtin_flush_icache(codeloc, FFI_TRAMPOLINE_SIZE);
 
   return FFI_OK;
 }

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-01  5:01 [Patch] 1/3 Add new builtin __builtin_flush_icache() David Daney
  2007-07-01  5:05 ` [Patch] 2/3 MIPS support for " David Daney
  2007-07-01  5:11 ` [Patch] 3/3 FFI: Use __builtin_flush_icache() to flush MIPS i-cache David Daney
@ 2007-07-01  8:01 ` Paolo Bonzini
  2007-07-01 17:51   ` Thiemo Seufer
  2007-07-01 18:47   ` David Daney
  2007-07-05  9:05 ` [Patch] 4/3 i386 target support for __builtin___clear_cache() David Daney
  3 siblings, 2 replies; 34+ messages in thread
From: Paolo Bonzini @ 2007-07-01  8:01 UTC (permalink / raw)
  To: David Daney; +Cc: gcc-patches


> The prototype for the new built-in is:
> 
> void __builtin_flush_icache(void *location, size_t len);
> 
> As one might expect, it flushes the region of memory pointed to by
> location with length len from the instruction cache.  The
> new builtin and the flush_icache insn would be the preferred mechanism
> to flushing the i-cache.

libgcc already exports __clear_cache with this prototype:

void
__clear_cache (char *beg __attribute__((__unused__)),
                char *end __attribute__((__unused__)))

Paolo

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 2/3 MIPS support for builtin __builtin_flush_icache().
  2007-07-01  5:05 ` [Patch] 2/3 MIPS support for " David Daney
@ 2007-07-01 10:56   ` Richard Sandiford
  2007-07-05  7:50     ` David Daney
  0 siblings, 1 reply; 34+ messages in thread
From: Richard Sandiford @ 2007-07-01 10:56 UTC (permalink / raw)
  To: David Daney; +Cc: gcc-patches

David Daney <ddaney@avtrex.com> writes:
> This is the second part of the __builtin_flush_icache patch.  It adds
> support for MIPS.

Thanks for doing this.

> @@ -4171,6 +4173,72 @@ (define_insn "cprestore"
>  }
>    [(set_attr "type" "store")
>     (set_attr "length" "4,12")])
> +
> +(define_expand "flush_icache"
> +  [(match_operand:SI 0 "general_operand" "r")
> +   (match_operand:SI 1 "general_operand" "r")]

No constraints in a define_expand; just remove the "r" strings altogether.

The expander ought to be able to require something more specific than
general_operand here.  I suppose that'll need some changes to the
target-independent part of the patch.  It will also mean changing
the above to:

  [(match_operand 0 "pmode_register_operand")
   (match_operand 1 "pmode_register_operand")]

since this pattern is used in situations where Pmode != SImode.

> +  if (ISA_HAS_SYNCI)
> +    {
> +      emit_insn (gen_synci_loop (copy_rtx (operands[0]),
> +                                           copy_rtx (operands[1])));
> +      emit_insn (gen_clear_hazard ());

Odd indentation.  No need to call copy_rtx here; the operands are only
used this once.

> +    /* Flush both caches.  We need to flush the data cache in case
> +       the system has a write-back cache.  */
> +    /* ??? Should check the return value for errors.  */
> +    if (mips_cache_flush_func && mips_cache_flush_func[0])
> +      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mips_cache_flush_func),
> +                         0, VOIDmode, 3, copy_rtx (operands[0]), Pmode,
> +                         copy_rtx (operands[1]), TYPE_MODE (integer_type_node),
> +                         GEN_INT (3), TYPE_MODE (integer_type_node));
> +  DONE;

Same copy_rtx comment here.  TBH, I'm not sure what the "???" comment
refers to.

> +(define_insn "synci_loop"
> +  [(unspec_volatile[(match_operand:SI 0 "general_operand" "r")
> +                    (match_operand:SI 1 "general_operand" "r")
> +                    (clobber (match_scratch:SI 2 "=0"))
> +                    (clobber (match_scratch:SI 3 "=1"))
> +                    (clobber (match_scratch:SI 4 "=r"))
> +		    (clobber (match_scratch:SI 5 "=r"))]
> +                    UNSPEC_SYNCI_LOOP)]

Is there any particular need to force operand 2 to be the same as
operand 0, and operand 3 to be the same as operand 1?  Since these
clobbers aren't earlyclobbers, I would have expected the register
allocators to be able to reuse operands 0 and 1 as scratch registers
if it thought that doing so was useful.  _Forcing_ it to reuse them
seems unnecessarily restrictive.

Minor nit, but please use "d" rather than "r", for consistency with
other mips.md patterns.  I realise this pattern will never be used for
MIPS16, but even so.

> +  "ISA_HAS_SYNCI"
> +{
> +  return ".set\tpush\n"
> +         "\t.set\tnoreorder\n"
> +         "\t.set\tnomacro\n"
> +         "\taddu\t%3,%0,%1\n"
> +         "\trdhwr\t%4,$1\n"
> +         "1:\tsynci\t0(%2)\n"
> +         "\tsltu\t%5,%2,%3\n"
> +         "\tbne\t%5,$0,1b\n"
> +         "\taddu\t%2,%2,%4\n"
> +         "\tsync\n"
> +         "\t.set\tpop";
> + }
> +  [(set_attr "length" "28")])

Since it's a fixed string, you could replace the "{ ... }" C code with:

  ".set\tpush\;
   .set\tnoreorder\;
   ..."

I'm not sure if that's easier to read or not.

However, we're eventually going to want this for MIPS64 too, and while
we could use mode macros to parameterise it, I wonder if would be better
to simply expand this as rtl, with special patterns for the "rdhwr",
"synci" and "sync" instructions.  This is similar to what we do for
other largish patterns.  That's certainly not a requirement though;
I'm not even sure it makes sense.  It's just an idea-cum-question.

> +(define_insn "clear_hazard"
> +  [(unspec_volatile [(clobber (match_scratch:SI 0 "=r"))] UNSPEC_CLEAR_HAZARD)
> +   (clobber (reg:SI 31))]

Clobbers aren't expected inside unspec_volatiles.  I think this should be:

  [(unspec_volatile [(const_int 0)] UNSPEC_CLEAR_HAZARD)
   (clobber (match_scratch:SI 0 "=d"))
   (clobber (reg:SI 31))]

(where "(const_int 0)" is the traditional "I don't take any arguments" hack)

> +  "ISA_HAS_SYNCI"
> +{
> +  return ".set\tpush\n"
> +         "\t.set\tnoreorder\n"
> +         "\t.set\tnomacro\n"
> +         "\tbal\t1f\n"
> +         "\tnop\n"
> +         "1:\taddiu\t%0,$31,12\n"
> +         "\tjr.hb\t%0\n"
> +         "\tnop\n"
> +         "\t.set\tpop";
> +}

Following on from your comment about delay slots, it also strikes me
that the "bal; nop; addiu" sequence is almost always longer than the
sequence to load a local address into a register.  I think the only
exception is static n64 with -mno-sym32.  I wonder if it would be
worth using a sequence like:

    rtx label, target, insn;

    label = gen_label_rtx ();
    target = force_reg (Pmode, gen_rtx_LABEL_REF (Pmode, label));
    insn = emit_jump_insn (gen_indirect_jump_hb (target));
    REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (insn));
    emit_label (label);

where indirect_jump_hb would just be something like:

(define_insn "indirect_jump_hb"
  [(set (pc) (match_operand "pmode_register_operand" "d"))
   (unspec_volatile [(const_int 0)] UNSPEC_JR_HB)]
  "ISA_HAS_SYNCI"
  "jr.hb\t%0"
  [(set_attr "type" "jump")])

(all untested).  I think we might then be able to fill the delay slot
from the target of the jump, which should be OK in this context.
Again, this is just an idea-cum-question.

The tests looked good, thanks.

Richard

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-01  8:01 ` [Patch] 1/3 Add new builtin __builtin_flush_icache() Paolo Bonzini
@ 2007-07-01 17:51   ` Thiemo Seufer
  2007-07-01 18:47   ` David Daney
  1 sibling, 0 replies; 34+ messages in thread
From: Thiemo Seufer @ 2007-07-01 17:51 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: David Daney, gcc-patches

Paolo Bonzini wrote:
>
>> The prototype for the new built-in is:
>> void __builtin_flush_icache(void *location, size_t len);
>> As one might expect, it flushes the region of memory pointed to by
>> location with length len from the instruction cache.  The
>> new builtin and the flush_icache insn would be the preferred mechanism
>> to flushing the i-cache.
>
> libgcc already exports __clear_cache with this prototype:
>
> void
> __clear_cache (char *beg __attribute__((__unused__)),
>                char *end __attribute__((__unused__)))

On MIPS{32,64}R2 an icache flush can be done inline with synci instructions.
Older revisions need a syscall.


Thiemo

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-01  8:01 ` [Patch] 1/3 Add new builtin __builtin_flush_icache() Paolo Bonzini
  2007-07-01 17:51   ` Thiemo Seufer
@ 2007-07-01 18:47   ` David Daney
  2007-07-02  5:04     ` Paolo Bonzini
  1 sibling, 1 reply; 34+ messages in thread
From: David Daney @ 2007-07-01 18:47 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gcc-patches

Paolo Bonzini wrote:
>
>> The prototype for the new built-in is:
>>
>> void __builtin_flush_icache(void *location, size_t len);
>>
>> As one might expect, it flushes the region of memory pointed to by
>> location with length len from the instruction cache.  The
>> new builtin and the flush_icache insn would be the preferred mechanism
>> to flushing the i-cache.
>
> libgcc already exports __clear_cache with this prototype:
>
> void
> __clear_cache (char *beg __attribute__((__unused__)),
>                char *end __attribute__((__unused__)))
I am aware of this function.  What is your suggestion?

Should we:

1) Rename the new builtin to __builtin_clear_cache (char *beg ,  char 
*end), and have it call the function in libgcc if there is not an inline 
expansion for it?

2) Keep the name an prototype of my original patch and call 
__clear_cache if there is not an inline expansion for it?

3) Something else entirely?

It would seem that arm and m68k are the only targets that use 
__clear_cache() from libgcc.

David Daney

David Daney

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-01 18:47   ` David Daney
@ 2007-07-02  5:04     ` Paolo Bonzini
  2007-07-03 23:55       ` Mark Mitchell
  0 siblings, 1 reply; 34+ messages in thread
From: Paolo Bonzini @ 2007-07-02  5:04 UTC (permalink / raw)
  To: David Daney; +Cc: gcc-patches


> 1) Rename the new builtin to __builtin_clear_cache (char *beg ,  char 
> *end), and have it call the function in libgcc if there is not an inline 
> expansion for it?

(And use libgcc's prototype).  This would be the cleanest.

This, plus, try to see how __clear_cache could use the definition of the 
builtin.

Paolo

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-02  5:04     ` Paolo Bonzini
@ 2007-07-03 23:55       ` Mark Mitchell
  2007-07-04  7:18         ` Paolo Bonzini
  2007-07-04 17:10         ` David Daney
  0 siblings, 2 replies; 34+ messages in thread
From: Mark Mitchell @ 2007-07-03 23:55 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: David Daney, gcc-patches

Paolo Bonzini wrote:
> 
>> 1) Rename the new builtin to __builtin_clear_cache (char *beg ,  char
>> *end), and have it call the function in libgcc if there is not an
>> inline expansion for it?
> 
> (And use libgcc's prototype).  This would be the cleanest.
> 
> This, plus, try to see how __clear_cache could use the definition of the
> builtin.

Paolo, I didn't quite follow your suggestion.  So, I'm not sure if I'm
agreeing or disagreeing. :-)

Anyhow, here's my suggestion.

1. The builtin should be:

  __builtin___clear_cache (char *beg, char *end)

following the standard convention that the builtin for X is __builtin_X.

2. The default implementation of the builtin should be to call
__clear_cache.  That means that users can call __builtin_clear_cache
from their code, on all platforms.

3. Targets which can do something clever can implement the builtin as
they please and can define CLEAR_INSN_CACHE(BEG, END) to expand to
__builtin_clear_cache(BEG, END).  On such targets, then, __clear_cache
in libgcc will call the builtin, which we can be assured will not
recursively expand to __clear_cache.

Thanks,

-- 
Mark Mitchell
CodeSourcery
mark@codesourcery.com
(650) 331-3385 x713

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-03 23:55       ` Mark Mitchell
@ 2007-07-04  7:18         ` Paolo Bonzini
  2007-07-04 17:17           ` Mark Mitchell
  2007-07-04 17:10         ` David Daney
  1 sibling, 1 reply; 34+ messages in thread
From: Paolo Bonzini @ 2007-07-04  7:18 UTC (permalink / raw)
  To: Mark Mitchell; +Cc: Paolo Bonzini, David Daney, gcc-patches


> 1. The builtin should be:
> 
>   __builtin___clear_cache (char *beg, char *end)
> 
> following the standard convention that the builtin for X is __builtin_X.

... but you misspelled it twice later in the message. ;-)

I think your suggestion is very good.

Paolo

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-03 23:55       ` Mark Mitchell
  2007-07-04  7:18         ` Paolo Bonzini
@ 2007-07-04 17:10         ` David Daney
  2007-07-04 17:51           ` Mark Mitchell
  1 sibling, 1 reply; 34+ messages in thread
From: David Daney @ 2007-07-04 17:10 UTC (permalink / raw)
  To: Mark Mitchell; +Cc: Paolo Bonzini, gcc-patches

Mark Mitchell wrote:
> Paolo Bonzini wrote:
>   
>>> 1) Rename the new builtin to __builtin_clear_cache (char *beg ,  char
>>> *end), and have it call the function in libgcc if there is not an
>>> inline expansion for it?
>>>       
>> (And use libgcc's prototype).  This would be the cleanest.
>>
>> This, plus, try to see how __clear_cache could use the definition of the
>> builtin.
>>     
>
> Paolo, I didn't quite follow your suggestion.  So, I'm not sure if I'm
> agreeing or disagreeing. :-)
>
> Anyhow, here's my suggestion.
>
> 1. The builtin should be:
>
>   __builtin___clear_cache (char *beg, char *end)
>
> following the standard convention that the builtin for X is __builtin_X.
>
> 2. The default implementation of the builtin should be to call
> __clear_cache.  That means that users can call __builtin_clear_cache
> from their code, on all platforms.
>
>   
I agree and had arrived at the same conclusion for your first two points.
> 3. Targets which can do something clever can implement the builtin as
> they please and can define CLEAR_INSN_CACHE(BEG, END) to expand to
> __builtin_clear_cache(BEG, END).  On such targets, then, __clear_cache
> in libgcc will call the builtin, which we can be assured will not
> recursively expand to __clear_cache.
>
>
>   
My current idea for this can, I think,,be best expressed by looking at 
the proposed code for libgcc:

void
__clear_cache (char *beg, char *end)
{
  if (__builtin_clear_cache_inline_p())
    __builtin___clear_cache (beg, end);
  else
    {
#ifdef CLEAR_INSN_CACHE
      CLEAR_INSN_CACHE (beg, end);
#endif /* CLEAR_INSN_CACHE */
    }
}

No changes to any existing  CLEAR_INSN_CACHE definitions are needed.  
There is a new target hook that determines the value of 
__builtin_clear_cache_inline_p().  I chose to do it this way because I 
think it would be impossible to reliably determine if the 
__builtin___clear_cache would expand in-line without such a predicate.

 A revised patch is in the works.

Thanks,
David Daney

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-04  7:18         ` Paolo Bonzini
@ 2007-07-04 17:17           ` Mark Mitchell
  0 siblings, 0 replies; 34+ messages in thread
From: Mark Mitchell @ 2007-07-04 17:17 UTC (permalink / raw)
  To: bonzini; +Cc: David Daney, gcc-patches

Paolo Bonzini wrote:
> 
>> 1. The builtin should be:
>>
>>   __builtin___clear_cache (char *beg, char *end)
>>
>> following the standard convention that the builtin for X is __builtin_X.
> 
> ... but you misspelled it twice later in the message. ;-)

Doh!  I meant the spelling that you quote above, and David seems to
agree, so I think we're all on the same page.

Thanks,

-- 
Mark Mitchell
CodeSourcery
mark@codesourcery.com
(650) 331-3385 x713

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-04 17:10         ` David Daney
@ 2007-07-04 17:51           ` Mark Mitchell
  2007-07-05  7:36             ` David Daney
  0 siblings, 1 reply; 34+ messages in thread
From: Mark Mitchell @ 2007-07-04 17:51 UTC (permalink / raw)
  To: David Daney; +Cc: Paolo Bonzini, gcc-patches

David Daney wrote:

> void
> __clear_cache (char *beg, char *end)
> {
>  if (__builtin_clear_cache_inline_p())
>    __builtin___clear_cache (beg, end);
>  else
>    {
> #ifdef CLEAR_INSN_CACHE
>      CLEAR_INSN_CACHE (beg, end);
> #endif /* CLEAR_INSN_CACHE */
>    }
> }
> 
> No changes to any existing  CLEAR_INSN_CACHE definitions are needed. 
> There is a new target hook that determines the value of
> __builtin_clear_cache_inline_p().

I think that's a little bit overly complicated.  Since the target has to
do something anyhow, to indicate that we should call the builtin here,
we might as well just have the target define CLEAR_INSN_CACHE that way.

My two cents,

-- 
Mark Mitchell
CodeSourcery
mark@codesourcery.com
(650) 331-3385 x713

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-04 17:51           ` Mark Mitchell
@ 2007-07-05  7:36             ` David Daney
  2007-07-05 17:59               ` Richard Sandiford
  2007-07-06  6:07               ` Mark Mitchell
  0 siblings, 2 replies; 34+ messages in thread
From: David Daney @ 2007-07-05  7:36 UTC (permalink / raw)
  To: Mark Mitchell; +Cc: Paolo Bonzini, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2279 bytes --]

Mark Mitchell wrote:
> David Daney wrote:
>
>   
>> void
>> __clear_cache (char *beg, char *end)
>> {
>>  if (__builtin_clear_cache_inline_p())
>>    __builtin___clear_cache (beg, end);
>>  else
>>    {
>> #ifdef CLEAR_INSN_CACHE
>>      CLEAR_INSN_CACHE (beg, end);
>> #endif /* CLEAR_INSN_CACHE */
>>    }
>> }
>>
>> No changes to any existing  CLEAR_INSN_CACHE definitions are needed. 
>> There is a new target hook that determines the value of
>> __builtin_clear_cache_inline_p().
>>     
>
> I think that's a little bit overly complicated.  Since the target has to
> do something anyhow, to indicate that we should call the builtin here,
> we might as well just have the target define CLEAR_INSN_CACHE that way.
>
> My two cents,
>   

Ok, I think I have come around to your way of thinking.  I still have 
the new predicate __builtin_clear_cache_inline_p() for use in target 
CLEAR_INSN_CACHE definitions, but I leave __clear_cache unchanged.

Here is a new version of the target independent portion of the patch.  
Thanks to Mark, Paolo, and Richard for their input.

Currently bootstrapping/testing on x86_64-unknown-linux-gnu, 
mipsel-unknown-linux-gnu (--with-arch=[mips32|mips32r2]).

OK to commit?

2007-07-04  David Daney  <ddaney@avtrex.com>

    * builtins.def (BUILT_IN_CLEAR_CACHE): New builtin.
    (BUILT_IN_CLEAR_CACHE_INLINE_P): Same.
    * builtins.c (expand_builtin_clear_cache): New function.
    (expand_builtin): Call expand_builtin_clear_cache for
    BUILT_IN_CLEAR_CACHE case.  Call gcc_unreachable() for
    BUILT_IN_CLEAR_CACHE_INLINE_P case.
    (fold_builtin_clear_cache_inline_p): New function.
    (fold_builtin_0): Call fold_builtin_clear_cache_inline_p for
    BUILT_IN_CLEAR_CACHE_INLINE_P case.
    * target.h (struct gcc_target):  Add new member
    builtin_clear_cache_inline_p.
    * target-def.h (TARGET_BUILTIN_CLEAR_CACHE_INLINE_P):  New target
    hooks initializer.
    (TARGET_INITIALIZER): Add TARGET_BUILTIN_CLEAR_CACHE_INLINE_P.
    * doc/extend.texi (__builtin___clear_cache): Document new builtin.
    (__builtin_clear_cache_inline_p): Same.
    * doc/tm.texi (TARGET_BUILTIN_CLEAR_CACHE_INLINE_P): Document new
    target hook.
    * testsuite/gcc.dg/builtins-64.c: New test.
    * testsuite/gcc.dg/builtins-65.c: New test.


[-- Attachment #2: flush-cache1.diff --]
[-- Type: text/x-patch, Size: 8662 bytes --]

Index: builtins.def
===================================================================
--- builtins.def	(revision 125997)
+++ builtins.def	(working copy)
@@ -1,6 +1,6 @@
 /* This file contains the definitions and documentation for the
    builtins used in the GNU compiler.
-   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
    Free Software Foundation, Inc.
 
 This file is part of GCC.
@@ -609,6 +609,8 @@ DEF_GCC_BUILTIN        (BUILT_IN_APPLY_A
 DEF_GCC_BUILTIN        (BUILT_IN_ARGS_INFO, "args_info", BT_FN_INT_INT, ATTR_NULL)
 DEF_GCC_BUILTIN        (BUILT_IN_BSWAP32, "bswap32", BT_FN_UINT32_UINT32, ATTR_CONST_NOTHROW_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_BSWAP64, "bswap64", BT_FN_UINT64_UINT64, ATTR_CONST_NOTHROW_LIST)
+DEF_EXT_LIB_BUILTIN    (BUILT_IN_CLEAR_CACHE, "__clear_cache", BT_FN_VOID_PTR_PTR, ATTR_NOTHROW_LIST)
+DEF_GCC_BUILTIN        (BUILT_IN_CLEAR_CACHE_INLINE_P, "clear_cache_inline_p", BT_FN_BOOL, ATTR_NOTHROW_LIST)
 DEF_LIB_BUILTIN        (BUILT_IN_CALLOC, "calloc", BT_FN_PTR_SIZE_SIZE, ATTR_MALLOC_NOTHROW_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_CLASSIFY_TYPE, "classify_type", BT_FN_INT_VAR, ATTR_NULL)
 DEF_GCC_BUILTIN        (BUILT_IN_CLZ, "clz", BT_FN_INT_UINT, ATTR_CONST_NOTHROW_LIST)
Index: builtins.c
===================================================================
--- builtins.c	(revision 125997)
+++ builtins.c	(working copy)
@@ -5513,6 +5513,44 @@ expand_builtin_profile_func (bool exitp)
   return const0_rtx;
 }
 
+/* Expand a call to _builtin___clear_cache.  If the target does not
+   support the operation, return NULL_RTX so it expands as a call. */
+static rtx
+expand_builtin_clear_cache (tree exp)
+{
+  tree begin, end;
+  rtx begin_rtx, end_rtx;
+
+  if (!targetm.builtin_clear_cache_inline_p())
+    return NULL_RTX;
+
+  /* We must not expand to a library call if
+     __builtin_clear_cache_inline_p() returns true.  If we did, the
+     fallback library function in libgcc would expand to a call to
+     _builtin___clear_cache() and we would recurse infinitely.  */
+  if (!validate_arglist (exp, POINTER_TYPE, POINTER_TYPE, VOID_TYPE))
+    {
+      error ("both arguments to %<_builtin___clear_cache%> must be pointers");
+      return const0_rtx;
+    }
+
+  begin = CALL_EXPR_ARG (exp, 0);
+  end = CALL_EXPR_ARG (exp, 1);
+
+  /* Evaluate BEGIN and END. */
+  begin_rtx = expand_expr (begin, NULL_RTX, Pmode, EXPAND_NORMAL);
+  begin_rtx = convert_memory_address (Pmode, begin_rtx);
+
+  end_rtx = expand_expr (end, NULL_RTX, Pmode, EXPAND_NORMAL);
+  end_rtx = convert_memory_address (Pmode, end_rtx);
+
+#ifdef HAVE_clear_cache
+  if (HAVE_clear_cache)
+    emit_insn (gen_clear_cache (begin_rtx, end_rtx));
+#endif
+  return const0_rtx;
+}
+
 /* Given a trampoline address, make sure it satisfies TRAMPOLINE_ALIGNMENT.  */
 
 static rtx
@@ -6181,6 +6219,16 @@ expand_builtin (tree exp, rtx target, rt
 	return const0_rtx;
       return expand_builtin_next_arg ();
 
+    case BUILT_IN_CLEAR_CACHE:
+      target = expand_builtin_clear_cache (exp);
+      if (target)
+        return target;
+      break;
+
+    case BUILT_IN_CLEAR_CACHE_INLINE_P:
+      /* This gets folded away.  We should never get here. */
+      gcc_unreachable();
+
     case BUILT_IN_CLASSIFY_TYPE:
       return expand_builtin_classify_type (exp);
 
@@ -9558,6 +9606,14 @@ fold_builtin_unordered_cmp (tree fndecl,
 		      fold_build2 (code, type, arg0, arg1));
 }
 
+/* Fold function call to builtin clear_cache_inline_p. */
+static tree
+fold_builtin_clear_cache_inline_p (tree type)
+{
+  int v = targetm.builtin_clear_cache_inline_p() ? 1 : 0;
+  return constant_boolean_node (v, type);
+}
+
 /* Fold a call to built-in function FNDECL with 0 arguments.
    IGNORE is true if the result of the function call is ignored.  This
    function returns NULL_TREE if no simplification was possible.  */
@@ -9581,6 +9637,9 @@ fold_builtin_0 (tree fndecl, bool ignore
     case BUILT_IN_CLASSIFY_TYPE:
       return fold_builtin_classify_type (NULL_TREE);
 
+    case BUILT_IN_CLEAR_CACHE_INLINE_P:
+      return fold_builtin_clear_cache_inline_p (type);
+
     default:
       break;
     }
Index: target.h
===================================================================
--- target.h	(revision 125997)
+++ target.h	(working copy)
@@ -855,6 +855,9 @@ struct gcc_target
      bits in the bitmap passed in. */  
   void (*live_on_entry) (bitmap); 
 
+  /* Returns true if __builtin_clear_cache will expand in-line. */
+  bool (*builtin_clear_cache_inline_p) (void);
+
   /* True if unwinding tables should be generated by default.  */
   bool unwind_tables_default;
 
Index: target-def.h
===================================================================
--- target-def.h	(revision 125997)
+++ target-def.h	(working copy)
@@ -649,6 +649,10 @@ Foundation, 51 Franklin Street, Fifth Fl
     TARGET_CXX_ADJUST_CLASS_AT_DEFINITION	\
   }
 
+#ifndef TARGET_BUILTIN_CLEAR_CACHE_INLINE_P
+#define TARGET_BUILTIN_CLEAR_CACHE_INLINE_P hook_bool_void_false
+#endif
+
 /* The whole shebang.  */
 #define TARGET_INITIALIZER			\
 {						\
@@ -731,6 +735,7 @@ Foundation, 51 Franklin Street, Fifth Fl
   TARGET_SECONDARY_RELOAD,			\
   TARGET_CXX,					\
   TARGET_EXTRA_LIVE_ON_ENTRY,                    \
+  TARGET_BUILTIN_CLEAR_CACHE_INLINE_P,		\
   TARGET_UNWIND_TABLES_DEFAULT,			\
   TARGET_HAVE_NAMED_SECTIONS,			\
   TARGET_HAVE_SWITCHABLE_BSS_SECTIONS,		\
Index: doc/extend.texi
===================================================================
--- doc/extend.texi	(revision 125997)
+++ doc/extend.texi	(working copy)
@@ -6086,6 +6086,23 @@ if (__builtin_expect (ptr != NULL, 1))
 when testing pointer or floating-point values.
 @end deftypefn
 
+@deftypefn {Built-in Function} void __builtin___clear_cache (char *@var{begin}, char *@var{end})
+This functions is used to flush the processor's instruction cache for
+the region of memory between @var{begin} and @var{end}.  Some
+targets require that the instruction cache be flushed, after modifying
+memory containing code, in order to obtain deterministic behavior.
+
+If the target does not require instruction cache flushes,
+@code{__builtin___clear_cache} has no effect.  Otherwise either
+instructions are emitted in-line to clear the instruction cache or a
+call to the @code{__clear_cache} function in libgcc is made.
+@end deftypefn
+
+@deftypefn {Built-in Function} bool __builtin_clear_cache_inline_p (void)
+This function returns @code{false} if @code{__builtin___clear_cache}
+will result in a call to the @code{__clear_cache} function in libgcc.
+@end deftypefn
+
 @deftypefn {Built-in Function} void __builtin_prefetch (const void *@var{addr}, ...)
 This function is used to minimize cache-miss latency by moving data into
 a cache before it is accessed.
Index: doc/tm.texi
===================================================================
--- doc/tm.texi	(revision 125997)
+++ doc/tm.texi	(working copy)
@@ -10093,6 +10093,15 @@ to have to make special provisions in @c
 to reserve space for caller-saved target registers.
 @end deftypefn
 
+@deftypefn {Target Hook} bool TARGET_BUILTIN_CLEAR_CACHE_INLINE_P (void)
+This target hook returns @code{true} if the built-in function
+@code{_builtin___clear_cache} (char * @var{begin}, char * @var{end})
+will be expanded in-line.  Otherwise if @code{false} is returned, the
+built-in will expand to a call to the library function
+@code{__clear_cache} (char * @var{begin}, char * @var{end}).  By
+default, the hook returns @code{false}.
+@end deftypefn
+
 @defmac POWI_MAX_MULTS
 If defined, this macro is interpreted as a signed integer C expression
 that specifies the maximum number of floating point multiplications
Index: testsuite/gcc.dg/builtins-64.c
===================================================================
--- testsuite/gcc.dg/builtins-64.c	(revision 0)
+++ testsuite/gcc.dg/builtins-64.c	(revision 0)
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+int f()
+{
+  /* We must fold away __builtin_clear_cache_inline_p. */
+  return __builtin_clear_cache_inline_p();
+}
+
+/* { dg-final { scan-tree-dump-not "__builtin_clear_cache_inline_p" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: testsuite/gcc.dg/builtins-65.c
===================================================================
--- testsuite/gcc.dg/builtins-65.c	(revision 0)
+++ testsuite/gcc.dg/builtins-65.c	(revision 0)
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+
+int main ()
+{
+  char *mem = __builtin_alloca (40);
+  __builtin___clear_cache (mem, mem + 40);
+  return 0;
+}

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 2/3 MIPS support for builtin __builtin_flush_icache().
  2007-07-01 10:56   ` Richard Sandiford
@ 2007-07-05  7:50     ` David Daney
  2007-07-05 19:05       ` Richard Sandiford
  0 siblings, 1 reply; 34+ messages in thread
From: David Daney @ 2007-07-05  7:50 UTC (permalink / raw)
  To: David Daney, gcc-patches, rsandifo

[-- Attachment #1: Type: text/plain, Size: 7305 bytes --]

Richard Sandiford wrote:
> David Daney <ddaney@avtrex.com> writes:
>   
>> @@ -4171,6 +4173,72 @@ (define_insn "cprestore"
>>  }
>>    [(set_attr "type" "store")
>>     (set_attr "length" "4,12")])
>> +
>> +(define_expand "flush_icache"
>> +  [(match_operand:SI 0 "general_operand" "r")
>> +   (match_operand:SI 1 "general_operand" "r")]
>>     
>
> No constraints in a define_expand; just remove the "r" strings altogether.
>
>   
Fixed.

>> +  if (ISA_HAS_SYNCI)
>> +    {
>> +      emit_insn (gen_synci_loop (copy_rtx (operands[0]),
>> +                                           copy_rtx (operands[1])));
>> +      emit_insn (gen_clear_hazard ());
>>     
>
> Odd indentation.  No need to call copy_rtx here; the operands are only
> used this once.
>
>   
Right.  I guess I am a little unclear on when copy_rtx is required.

>> +    /* Flush both caches.  We need to flush the data cache in case
>> +       the system has a write-back cache.  */
>> +    /* ??? Should check the return value for errors.  */
>> +    if (mips_cache_flush_func && mips_cache_flush_func[0])
>> +      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mips_cache_flush_func),
>> +                         0, VOIDmode, 3, copy_rtx (operands[0]), Pmode,
>> +                         copy_rtx (operands[1]), TYPE_MODE (integer_type_node),
>> +                         GEN_INT (3), TYPE_MODE (integer_type_node));
>> +  DONE;
>>     
>
> Same copy_rtx comment here.  TBH, I'm not sure what the "???" comment
> refers to.
>
>   
I just pased this part in from its old home in mips.h.  The ??? comment 
came with it.  It is gone now.

>> +(define_insn "synci_loop"
>> +  [(unspec_volatile[(match_operand:SI 0 "general_operand" "r")
>> +                    (match_operand:SI 1 "general_operand" "r")
>> +                    (clobber (match_scratch:SI 2 "=0"))
>> +                    (clobber (match_scratch:SI 3 "=1"))
>> +                    (clobber (match_scratch:SI 4 "=r"))
>> +		    (clobber (match_scratch:SI 5 "=r"))]
>> +                    UNSPEC_SYNCI_LOOP)]
>>     
>
> Is there any particular need to force operand 2 to be the same as
> operand 0, and operand 3 to be the same as operand 1?  Since these
> clobbers aren't earlyclobbers, I would have expected the register
> allocators to be able to reuse operands 0 and 1 as scratch registers
> if it thought that doing so was useful.  _Forcing_ it to reuse them
> seems unnecessarily restrictive.
>
> Minor nit, but please use "d" rather than "r", for consistency with
> other mips.md patterns.  I realise this pattern will never be used for
> MIPS16, but even so.
>   
The first one was to force the register to be reused so I didn't have to 
waste an instruction making a copy of it.  The second was gratuitous.  
It is moot though as this part is gone now.


> I wonder if would be better
> to simply expand this as rtl, with special patterns for the "rdhwr",
> "synci" and "sync" instructions.  This is similar to what we do for
> other largish patterns.  That's certainly not a requirement though;
> I'm not even sure it makes sense.  It's just an idea-cum-question.
>
>   
That is what I did.


>> +(define_insn "clear_hazard"
>> +  [(unspec_volatile [(clobber (match_scratch:SI 0 "=r"))] UNSPEC_CLEAR_HAZARD)
>> +   (clobber (reg:SI 31))]
>>     
>
> Clobbers aren't expected inside unspec_volatiles.  I think this should be:
>
>   [(unspec_volatile [(const_int 0)] UNSPEC_CLEAR_HAZARD)
>    (clobber (match_scratch:SI 0 "=d"))
>    (clobber (reg:SI 31))]
>
> (where "(const_int 0)" is the traditional "I don't take any arguments" hack)
>
>   
Fixed.

>> +  "ISA_HAS_SYNCI"
>> +{
>> +  return ".set\tpush\n"
>> +         "\t.set\tnoreorder\n"
>> +         "\t.set\tnomacro\n"
>> +         "\tbal\t1f\n"
>> +         "\tnop\n"
>> +         "1:\taddiu\t%0,$31,12\n"
>> +         "\tjr.hb\t%0\n"
>> +         "\tnop\n"
>> +         "\t.set\tpop";
>> +}
>>     
>
> Following on from your comment about delay slots, it also strikes me
> that the "bal; nop; addiu" sequence is almost always longer than the
> sequence to load a local address into a register.  I think the only
> exception is static n64 with -mno-sym32.  I wonder if it would be
> worth using a sequence like:
>
>     rtx label, target, insn;
>
>     label = gen_label_rtx ();
>     target = force_reg (Pmode, gen_rtx_LABEL_REF (Pmode, label));
>     insn = emit_jump_insn (gen_indirect_jump_hb (target));
>     REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (insn));
>     emit_label (label);
>
> where indirect_jump_hb would just be something like:
>
> (define_insn "indirect_jump_hb"
>   [(set (pc) (match_operand "pmode_register_operand" "d"))
>    (unspec_volatile [(const_int 0)] UNSPEC_JR_HB)]
>   "ISA_HAS_SYNCI"
>   "jr.hb\t%0"
>   [(set_attr "type" "jump")])
>
> (all untested).  I think we might then be able to fill the delay slot
> from the target of the jump, which should be OK in this context.
> Again, this is just an idea-cum-question.
>
>   
I spent way too much time trying to get that to work.  For now I left it 
mostly unchanged from the original patch.

The problem is that the CFG things just don't seem to be able to handle:
1) Jump insns where the body is a (parallel ...) instead of a (set (pc) 
...).
2) Unconditional jumps where the target and fall through edges goto the 
same place.

I kept the call to mips_cache_flush_func pretty much unchanged because I 
didn't want to break compatibility with the command line options that 
allow this to be changed.   Also I added CLEAR_INSN_CACHE that expands 
in libgcc that is used if  __builtin___clear_cache() cannot be expanded 
in-line.  This is slightly inefficient as it adds an extra function call 
layer through libgcc and has to do the arithmetic to convert the 
function parameters, but I am thinking I shouldn't be concerned about a 
few extra cycles doing this when we are about to do a system call.  The 
alternative is to have __builtin___clear_cache() directly expand to 
mips_cache_flush_func and leave libgcc out of the picture for MIPS. 

Attached is the new version of the patch.

Currently bootstrapping/testing on x86_64-unknown-linux-gnu, 
mipsel-unknown-linux-gnu (--with-arch=[mips32|mips32r2]).

OK to commit if the target independent portion is approved?

2007-07-04  David Daney  <ddaney@avtrex.com>

    * config/mips/mips.h (mips_builtin_clear_cache_inline_p): Declare
    function.
    (TARGET_BUILTIN_CLEAR_CACHE_INLINE_P): Define target hook.
    (ISA_HAS_SYNCI): New target capability predicate.
    (CLEAR_INSN_CACHE): Define libgcc2 hook.
    (INITIALIZE_TRAMPOLINE): Emit flush_icache instead library call.
    * config/mips/netbsd.h (CLEAR_INSN_CACHE): Define libgcc2 hook.
    * config/mips/mips-protos.h (mips_expand_synci_loop): Declare
    function.
    * config/mips/mips.md (UNSPEC_CLEAR_HAZARD): New constant.
    (UNSPEC_RDHWR): Same.
    (UNSPEC_SYNCI): Same.
    (UNSPEC_SYNC): Same.
    (flush_icache): New expand.
    (clear_cache): Same.
    (sync): New insn.
    (synci): Same.
    (rdhwr): Same.
    (clear_hazard): Same.
    * config/mips/mips.c (mips_builtin_clear_cache_inline_p): New function.
    (mips_expand_synci_loop): Same.
    * testsuite/gcc.target/mips/clear-cache-1.c: New test.
    * testsuite/gcc.target/mips/clear-cache-2.c: New test.





[-- Attachment #2: flush-cache2.diff --]
[-- Type: text/x-patch, Size: 9970 bytes --]

Index: config/mips/mips.h
===================================================================
--- config/mips/mips.h	(revision 125997)
+++ config/mips/mips.h	(working copy)
@@ -136,6 +136,10 @@ extern const struct mips_cpu_info mips_c
 extern const struct mips_cpu_info *mips_arch_info;
 extern const struct mips_cpu_info *mips_tune_info;
 extern const struct mips_rtx_cost_data *mips_cost;
+
+extern bool mips_builtin_clear_cache_inline_p (void);
+#undef TARGET_BUILTIN_CLEAR_CACHE_INLINE_P
+#define TARGET_BUILTIN_CLEAR_CACHE_INLINE_P mips_builtin_clear_cache_inline_p
 #endif
 
 /* Macros to silence warnings about numbers being signed in traditional
@@ -770,6 +774,10 @@ extern const struct mips_rtx_cost_data *
 				 || ISA_MIPS32R2			\
 				 || ISA_MIPS64				\
 				 || TARGET_MIPS5500)
+
+/* ISA includes synci, jr.hb and jalr.hb */
+#define ISA_HAS_SYNCI ISA_MIPS32R2
+
 \f
 /* Add -G xx support.  */
 
@@ -2108,6 +2116,17 @@ typedef struct mips_args {
 #define CACHE_FLUSH_FUNC "_flush_cache"
 #endif
 
+/* Clear the instruction cache from `beg' to `end'.  */
+#undef CLEAR_INSN_CACHE
+#define CLEAR_INSN_CACHE(BEG, END)				\
+{								\
+  extern void _flush_cache (char *b, int l, int f);		\
+  if (__builtin_clear_cache_inline_p())				\
+    __builtin___clear_cache ((BEG), (END));			\
+  else								\
+    _flush_cache ((BEG), ((char *)(END) - (char *)(BEG)), 3);	\
+}
+
 /* A C statement to initialize the variable parts of a trampoline.
    ADDR is an RTX for the address of the trampoline; FNADDR is an
    RTX for the address of the nested function; STATIC_CHAIN is an
@@ -2122,15 +2141,7 @@ typedef struct mips_args {
   chain_addr = plus_constant (func_addr, GET_MODE_SIZE (ptr_mode));	    \
   emit_move_insn (gen_rtx_MEM (ptr_mode, func_addr), FUNC);		    \
   emit_move_insn (gen_rtx_MEM (ptr_mode, chain_addr), CHAIN);		    \
-									    \
-  /* Flush both caches.  We need to flush the data cache in case	    \
-     the system has a write-back cache.  */				    \
-  /* ??? Should check the return value for errors.  */			    \
-  if (mips_cache_flush_func && mips_cache_flush_func[0])		    \
-    emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mips_cache_flush_func),   \
-		       0, VOIDmode, 3, ADDR, Pmode,			    \
-		       GEN_INT (TRAMPOLINE_SIZE), TYPE_MODE (integer_type_node),\
-		       GEN_INT (3), TYPE_MODE (integer_type_node));	    \
+  emit_insn (gen_flush_icache (copy_rtx (ADDR), GEN_INT (TRAMPOLINE_SIZE))); \
 }
 \f
 /* Addressing modes, and classification of registers for them.  */
Index: config/mips/netbsd.h
===================================================================
--- config/mips/netbsd.h	(revision 125997)
+++ config/mips/netbsd.h	(working copy)
@@ -190,6 +190,16 @@ Boston, MA 02110-1301, USA.  */
 #undef CACHE_FLUSH_FUNC
 #define CACHE_FLUSH_FUNC "_cacheflush"
 
+/* Clear the instruction cache from `beg' to `end'.  */
+#undef CLEAR_INSN_CACHE
+#define CLEAR_INSN_CACHE(BEG, END)				\
+{								\
+  extern void _cacheflush (char *b, int l, int f);		\
+  if (__builtin_clear_cache_inline_p())				\
+    __builtin___clear_cache ((BEG), (END));			\
+  else								\
+    _cacheflush ((BEG), ((char *)(END) - (char *)(BEG)), 3);	\
+}
 
 /* Make gcc agree with <machine/ansi.h> */
 
Index: config/mips/mips-protos.h
===================================================================
--- config/mips/mips-protos.h	(revision 125997)
+++ config/mips/mips-protos.h	(working copy)
@@ -1,6 +1,6 @@
 /* Prototypes of target machine for GNU compiler.  MIPS version.
    Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
-   1999, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   1999, 2001, 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
    Contributed by A. Lichnewsky (lich@inria.inria.fr).
    Changed by Michael Meissner	(meissner@osf.org).
    64-bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and
@@ -185,6 +185,7 @@ extern void mips_expand_call (rtx, rtx, 
 extern void mips_emit_fcc_reload (rtx, rtx, rtx);
 extern void mips_set_return_address (rtx, rtx);
 extern bool mips_expand_block_move (rtx, rtx, rtx);
+extern void mips_expand_synci_loop (rtx, rtx);
 
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx);
 extern void function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
Index: config/mips/mips.md
===================================================================
--- config/mips/mips.md	(revision 125997)
+++ config/mips/mips.md	(working copy)
@@ -50,6 +50,10 @@ (define_constants
    (UNSPEC_TLS_GET_TP		28)
    (UNSPEC_MFHC1		31)
    (UNSPEC_MTHC1		32)
+   (UNSPEC_CLEAR_HAZARD		33)
+   (UNSPEC_RDHWR		34)
+   (UNSPEC_SYNCI		35)
+   (UNSPEC_SYNC			36)
 
    (UNSPEC_ADDRESS_FIRST	100)
 
@@ -4171,6 +4175,82 @@ (define_insn "cprestore"
 }
   [(set_attr "type" "store")
    (set_attr "length" "4,12")])
+
+;; Flush the instruction cache starting as operands[0] with length
+;; operands[1].
+(define_expand "flush_icache"
+  [(match_operand:SI 0 "pmode_register_operand")
+   (match_operand:SI 1 "register_operand")]
+  ""
+  "
+{
+  if (ISA_HAS_SYNCI)
+    {
+      rtx end_addr = gen_reg_rtx (Pmode);
+      emit_insn (gen_rtx_SET (VOIDmode, end_addr,
+      			      gen_rtx_PLUS (Pmode, operands[0], operands[1])));
+      emit_insn (gen_clear_cache (operands[0], end_addr));
+    }
+  else
+    /* Flush both caches.  We need to flush the data cache in case
+       the system has a write-back cache.  */
+    if (mips_cache_flush_func && mips_cache_flush_func[0])
+      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mips_cache_flush_func),
+                         0, VOIDmode, 3, operands[0], Pmode,
+                         operands[1], TYPE_MODE (integer_type_node),
+                         GEN_INT (3), TYPE_MODE (integer_type_node));
+  DONE;
+}")
+
+;; Expand in-line code to clear the instruction cache between operand[0] and
+;; operand[1].
+(define_expand "clear_cache"
+  [(match_operand:SI 0 "general_operand")
+   (match_operand:SI 1 "general_operand")]
+  "ISA_HAS_SYNCI"
+  "
+{
+  mips_expand_synci_loop (operands[0], operands[1]);
+  emit_insn (gen_clear_hazard ());
+  DONE;
+}")
+
+(define_insn "sync"
+  [(unspec_volatile [(const_int 0)] UNSPEC_SYNC)]
+  ""
+  "sync"
+  [(set_attr "length" "4")])
+
+(define_insn "synci"
+  [(unspec_volatile [(match_operand:SI 0 "general_operand" "d")] UNSPEC_SYNCI)]
+  ""
+  "synci\t0(%0)"
+  [(set_attr "length" "4")])
+
+(define_insn "rdhwr"
+  [(set (match_operand:SI 0 "general_operand" "=d")
+        (unspec_volatile [(match_operand:SI 1 "immediate_operand" "n")]
+        UNSPEC_RDHWR))]
+  ""
+  "rdhwr\t%0,$%1"
+  [(set_attr "length" "4")])
+
+(define_insn "clear_hazard"
+  [(unspec_volatile [(const_int 0)] UNSPEC_CLEAR_HAZARD)
+   (clobber (reg:SI 31))]
+  "ISA_HAS_SYNCI"
+{
+  return ".set\tpush\n"
+         "\t.set\tnoreorder\n"
+         "\t.set\tnomacro\n"
+         "\tbal\t1f\n"
+         "\tnop\n"
+         "1:\taddiu\t$31,$31,12\n"
+         "\tjr.hb\t$31\n"
+         "\tnop\n"
+         "\t.set\tpop";
+}
+  [(set_attr "length" "20")])
 \f
 ;; Block moves, see mips.c for more details.
 ;; Argument 0 is the destination
Index: config/mips/mips.c
===================================================================
--- config/mips/mips.c	(revision 125997)
+++ config/mips/mips.c	(working copy)
@@ -3759,6 +3759,47 @@ mips_block_move_loop (rtx dest, rtx src,
     mips_block_move_straight (dest, src, leftover);
 }
 \f
+
+/* Return true if we will expand __builtin_clear_cache() in-line. */
+bool
+mips_builtin_clear_cache_inline_p (void)
+{
+  return ISA_HAS_SYNCI;
+}
+
+/* Expand a loop of synci insns */
+void
+mips_expand_synci_loop (rtx begin, rtx end)
+{
+  rtx end_addr, inc, label, label_ref, cmp, cmp_result;
+
+  begin = force_reg(Pmode, begin);
+  end = force_reg(Pmode, end);
+
+  /* Load INC with the cache line size (rdhwr INC,$1). */
+  inc = gen_reg_rtx (SImode);
+  emit_insn (gen_rdhwr (inc ,gen_rtx_CONST_INT(SImode, 1)));
+
+  /* Loop back to here. */
+  label = gen_label_rtx ();
+  emit_label (label);
+
+  emit_insn (gen_synci (begin));
+
+  cmp = gen_reg_rtx (SImode);
+  emit_insn (gen_rtx_SET (VOIDmode, cmp, gen_rtx_GTU (Pmode, begin, end)));
+
+  emit_insn (gen_rtx_SET (VOIDmode, begin, gen_rtx_PLUS (Pmode, begin, inc)));
+
+  label_ref = gen_rtx_LABEL_REF (VOIDmode, label);
+  cmp_result = gen_rtx_EQ (VOIDmode, cmp, gen_rtx_CONST_INT(SImode, 0));
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+                               gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_result,
+                                                     label_ref,
+                                                     pc_rtx)));
+  emit_insn (gen_sync ());
+}
+\f
 /* Expand a movmemsi instruction.  */
 
 bool
Index: testsuite/gcc.target/mips/clear-cache-1.c
===================================================================
--- testsuite/gcc.target/mips/clear-cache-1.c	(revision 0)
+++ testsuite/gcc.target/mips/clear-cache-1.c	(revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-mips-options "-O2 -mips32r2" } */
+/* { dg-final { scan-assembler "synci" } } */
+/* { dg-final { scan-assembler "jr.hb" } } */
+/* { dg-final { scan-assembler-not "__clear_cache" } } */
+
+void f()
+{
+  int size = 40;
+  char *memory = __builtin_alloca(size);
+  __builtin___clear_cache(memory, memory + size);
+}
+
Index: testsuite/gcc.target/mips/clear-cache-2.c
===================================================================
--- testsuite/gcc.target/mips/clear-cache-2.c	(revision 0)
+++ testsuite/gcc.target/mips/clear-cache-2.c	(revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-mips-options "-O2 -mips32" } */
+/* { dg-final { scan-assembler-not "synci" } } */
+/* { dg-final { scan-assembler-not "jr.hb" } } */
+/* { dg-final { scan-assembler "__clear_cache" } } */
+
+void f()
+{
+  int size = 40;
+  char *memory = __builtin_alloca(size);
+  __builtin___clear_cache(memory, memory + size);
+}
+

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 3/3 FFI: Use __builtin_flush_icache() to flush MIPS i-cache.
  2007-07-01  5:11 ` [Patch] 3/3 FFI: Use __builtin_flush_icache() to flush MIPS i-cache David Daney
@ 2007-07-05  8:34   ` David Daney
  2007-07-05 19:08     ` Richard Sandiford
  0 siblings, 1 reply; 34+ messages in thread
From: David Daney @ 2007-07-05  8:34 UTC (permalink / raw)
  To: gcc-patches, java-patches

[-- Attachment #1: Type: text/plain, Size: 1016 bytes --]

David Daney wrote:
> This is the third and final part of the __builtin_flush_icache patch.
> It replaces the hard coded library call to cacheflush() in the MIPS
> ffi_prep_closure_loc with a call to __builtin_flush_icache();
>
> For targets that support user-space i-cache flushing (mips32r2), this
> will eliminate a system call from the ffi closure preparation path.
> On all other targets the generated code is the same as before.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu all default
> languages with no regressions.
>
> Also tested on:
> x86_64 cross to mipsel-linux --with-arch=mips32
> i686   cross to mipsel-linux --with-arch=mips32r2
>
> with no regressions.
A new version of the patch.  All that changed is the name of the 
builtin.  It is now __builtin___clear_cache().

Tested as before.

OK to commit:

2007-07-04  David Daney  <ddaney@avtrex.com>

    * src/mips/ffi.c: Don't include sys/cachectl.h.
    (ffi_prep_closure_loc): Use __builtin___clear_cache() instead of
    cacheflush().


[-- Attachment #2: flush-cache3.diff --]
[-- Type: text/x-patch, Size: 974 bytes --]

Index: src/mips/ffi.c
===================================================================
--- src/mips/ffi.c	(revision 125997)
+++ src/mips/ffi.c	(working copy)
@@ -27,7 +27,6 @@
 #include <ffi_common.h>
 
 #include <stdlib.h>
-#include <sys/cachectl.h>
 
 #if _MIPS_SIM == _ABIN32
 #define FIX_ARGP \
@@ -506,6 +505,7 @@ ffi_prep_closure_loc (ffi_closure *closu
   unsigned int *tramp = (unsigned int *) &closure->tramp[0];
   unsigned int fn;
   unsigned int ctx = (unsigned int) codeloc;
+  char *clear_location = (char *) codeloc;
 
 #if defined(FFI_MIPS_O32)
   FFI_ASSERT(cif->abi == FFI_O32 || cif->abi == FFI_O32_SOFT_FLOAT);
@@ -525,8 +525,7 @@ ffi_prep_closure_loc (ffi_closure *closu
   closure->fun = fun;
   closure->user_data = user_data;
 
-  /* XXX this is available on Linux, but anything else? */
-  cacheflush (codeloc, FFI_TRAMPOLINE_SIZE, ICACHE);
+  __builtin___clear_cache(clear_location, clear_location + FFI_TRAMPOLINE_SIZE);
 
   return FFI_OK;
 }

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [Patch] 4/3 i386 target support for __builtin___clear_cache()
  2007-07-01  5:01 [Patch] 1/3 Add new builtin __builtin_flush_icache() David Daney
                   ` (2 preceding siblings ...)
  2007-07-01  8:01 ` [Patch] 1/3 Add new builtin __builtin_flush_icache() Paolo Bonzini
@ 2007-07-05  9:05 ` David Daney
  2007-07-08 20:39   ` David Daney
  3 siblings, 1 reply; 34+ messages in thread
From: David Daney @ 2007-07-05  9:05 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 587 bytes --]

This patch adds support for __builtin___clear_cache() to the i386 (and 
x86_64) target.

There is not much to see here.  All I do is define the 
TARGET_BUILTIN_CLEAR_CACHE_INLINE_P target hook to always return false.  
This causes __builtin___clear_cache() to be a nop for this target.

Tested with bootstrap and make -k check with no regressions on 
x86_64-unknown-linux-gnu.

OK to commit if the target independent portion of the patch is approved?

2007-07-04  David Daney  <ddaney@avtrex.com>

    * config/i386/i386.h (TARGET_BUILTIN_CLEAR_CACHE_INLINE_P): Define
    target hook.


[-- Attachment #2: flush-cache4.diff --]
[-- Type: text/x-patch, Size: 672 bytes --]

Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 125997)
+++ config/i386/i386.h	(working copy)
@@ -1716,6 +1716,12 @@ typedef struct ix86_args {
 
 #define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \
   x86_initialize_trampoline ((TRAMP), (FNADDR), (CXT))
+
+/* We never need to clear the instruction cache.  Returning true will
+   prevent _builtin__clear_cache from expanding to a library call.
+   Since a "clear_cache" insn is not defined it will expand to
+   const0_rtx.  */
+#define TARGET_BUILTIN_CLEAR_CACHE_INLINE_P hook_bool_void_true
 \f
 /* Definitions for register eliminations.
 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-05  7:36             ` David Daney
@ 2007-07-05 17:59               ` Richard Sandiford
  2007-07-05 18:23                 ` David Daney
  2007-07-06  6:07               ` Mark Mitchell
  1 sibling, 1 reply; 34+ messages in thread
From: Richard Sandiford @ 2007-07-05 17:59 UTC (permalink / raw)
  To: David Daney; +Cc: Mark Mitchell, Paolo Bonzini, gcc-patches

David Daney <ddaney@avtrex.com> writes:
> +/* Expand a call to _builtin___clear_cache.  If the target does not
> +   support the operation, return NULL_RTX so it expands as a call. */
> +static rtx
> +expand_builtin_clear_cache (tree exp)
> +{
> +  tree begin, end;
> +  rtx begin_rtx, end_rtx;
> +
> +  if (!targetm.builtin_clear_cache_inline_p())
> +    return NULL_RTX;
> +
> +  /* We must not expand to a library call if
> +     __builtin_clear_cache_inline_p() returns true.  If we did, the
> +     fallback library function in libgcc would expand to a call to
> +     _builtin___clear_cache() and we would recurse infinitely.  */
> +  if (!validate_arglist (exp, POINTER_TYPE, POINTER_TYPE, VOID_TYPE))

This comment seems to belong above the earlier code.  However, I'm only
really replying because...

> +  /* Evaluate BEGIN and END. */
> +  begin_rtx = expand_expr (begin, NULL_RTX, Pmode, EXPAND_NORMAL);
> +  begin_rtx = convert_memory_address (Pmode, begin_rtx);
> +
> +  end_rtx = expand_expr (end, NULL_RTX, Pmode, EXPAND_NORMAL);
> +  end_rtx = convert_memory_address (Pmode, end_rtx);
> +
> +#ifdef HAVE_clear_cache
> +  if (HAVE_clear_cache)
> +    emit_insn (gen_clear_cache (begin_rtx, end_rtx));
> +#endif

When I said that the switch from general_operand to pmode_register_operand
would need target-independent changes, I meant it would need changes to
this bit of code.  I think you need to check the predicates of each
clear_cache operand and force the operand into a register if it doesn't
match.  Something like:

  if (HAVE_clear_cache)
    {
      icode = CODE_FOR_clear_cache;
      if (!insn_data[icode].operand[0].predicate (begin_rtx, Pmode))
	begin_rtx = copy_to_mode_reg (Pmode, begin_rtx);
      if (!insn_data[icode].operand[1].predicate (end_rtx, Pmode))
	end_rtx = copy_to_mode_reg (Pmode, end_rtx);
      emit_insn (gen_clear_cache (begin_rtx, end_rtx));
    }

(It's unfortunate that this construct has to be copied so often,
but that's the way it is.)

As it stands, the code only seems to guarantee that the operand
is an address operand, whereas the predicates in the MIPS patch
claim that it will be a register.

Richard

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-05 17:59               ` Richard Sandiford
@ 2007-07-05 18:23                 ` David Daney
  2007-07-05 19:11                   ` Richard Sandiford
  0 siblings, 1 reply; 34+ messages in thread
From: David Daney @ 2007-07-05 18:23 UTC (permalink / raw)
  To: David Daney, Mark Mitchell, Paolo Bonzini, gcc-patches, rsandifo

Richard Sandiford wrote:
> David Daney <ddaney@avtrex.com> writes:
>> +/* Expand a call to _builtin___clear_cache.  If the target does not
>> +   support the operation, return NULL_RTX so it expands as a call. */
>> +static rtx
>> +expand_builtin_clear_cache (tree exp)
>> +{
>> +  tree begin, end;
>> +  rtx begin_rtx, end_rtx;
>> +
>> +  if (!targetm.builtin_clear_cache_inline_p())
>> +    return NULL_RTX;
>> +
>> +  /* We must not expand to a library call if
>> +     __builtin_clear_cache_inline_p() returns true.  If we did, the
>> +     fallback library function in libgcc would expand to a call to
>> +     _builtin___clear_cache() and we would recurse infinitely.  */
>> +  if (!validate_arglist (exp, POINTER_TYPE, POINTER_TYPE, VOID_TYPE))
> 
> This comment seems to belong above the earlier code.  However, I'm only
> really replying because...
> 

This is the way I meant for it to be.  If 
targetm.builtin_clear_cache_inline_p() is false we just do the default 
expand to call the function in libgcc.

If we are expanding in-line, the it is an error if the arguments are not 
pointers.

The only reason it is an error is to prevent infinite recursion.  If 
that were not an issue, we could return NULL_RTX if validate_arglist failed.


>> +  /* Evaluate BEGIN and END. */
>> +  begin_rtx = expand_expr (begin, NULL_RTX, Pmode, EXPAND_NORMAL);
>> +  begin_rtx = convert_memory_address (Pmode, begin_rtx);
>> +
>> +  end_rtx = expand_expr (end, NULL_RTX, Pmode, EXPAND_NORMAL);
>> +  end_rtx = convert_memory_address (Pmode, end_rtx);
>> +
>> +#ifdef HAVE_clear_cache
>> +  if (HAVE_clear_cache)
>> +    emit_insn (gen_clear_cache (begin_rtx, end_rtx));
>> +#endif
> 
> When I said that the switch from general_operand to pmode_register_operand
> would need target-independent changes, I meant it would need changes to
> this bit of code.  I think you need to check the predicates of each
> clear_cache operand and force the operand into a register if it doesn't
> match.  Something like:
> 
>   if (HAVE_clear_cache)
>     {
>       icode = CODE_FOR_clear_cache;
>       if (!insn_data[icode].operand[0].predicate (begin_rtx, Pmode))
> 	begin_rtx = copy_to_mode_reg (Pmode, begin_rtx);
>       if (!insn_data[icode].operand[1].predicate (end_rtx, Pmode))
> 	end_rtx = copy_to_mode_reg (Pmode, end_rtx);
>       emit_insn (gen_clear_cache (begin_rtx, end_rtx));
>     }
> 
> (It's unfortunate that this construct has to be copied so often,
> but that's the way it is.)
> 
> As it stands, the code only seems to guarantee that the operand
> is an address operand, whereas the predicates in the MIPS patch
> claim that it will be a register.

The operands in the MIPS patch are both (match_operand:SI 0 
"general_operand").  Look at the part in mips.c where I do:

begin = force_reg(Pmode, begin);

If that is the wrong way to put something in a register I will change it.

David Daney

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 2/3 MIPS support for builtin __builtin_flush_icache().
  2007-07-05  7:50     ` David Daney
@ 2007-07-05 19:05       ` Richard Sandiford
  2007-07-08 20:00         ` David Daney
  0 siblings, 1 reply; 34+ messages in thread
From: Richard Sandiford @ 2007-07-05 19:05 UTC (permalink / raw)
  To: David Daney; +Cc: gcc-patches

Thanks for all the updates.

David Daney <ddaney@avtrex.com> writes:
> Richard Sandiford wrote:
>> David Daney <ddaney@avtrex.com> writes:
>>> +    /* Flush both caches.  We need to flush the data cache in case
>>> +       the system has a write-back cache.  */
>>> +    /* ??? Should check the return value for errors.  */
>>> +    if (mips_cache_flush_func && mips_cache_flush_func[0])
>>> +      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mips_cache_flush_func),
>>> +                         0, VOIDmode, 3, copy_rtx (operands[0]), Pmode,
>>> +                         copy_rtx (operands[1]), TYPE_MODE (integer_type_node),
>>> +                         GEN_INT (3), TYPE_MODE (integer_type_node));
>>> +  DONE;
>>>     
>>
>> Same copy_rtx comment here.  TBH, I'm not sure what the "???" comment
>> refers to.
>>
>>   
> I just pased this part in from its old home in mips.h.  The ??? comment 
> came with it.  It is gone now.

Sorry, I missed that.  And thanks for deleting it.

>> I wonder if would be better
>> to simply expand this as rtl, with special patterns for the "rdhwr",
>> "synci" and "sync" instructions.  This is similar to what we do for
>> other largish patterns.  That's certainly not a requirement though;
>> I'm not even sure it makes sense.  It's just an idea-cum-question.
>>
> That is what I did.

Looks good, thanks.

>> Following on from your comment about delay slots, it also strikes me
>> that the "bal; nop; addiu" sequence is almost always longer than the
>> sequence to load a local address into a register.  I think the only
>> exception is static n64 with -mno-sym32.  I wonder if it would be
>> worth using a sequence like:
>>
>>     rtx label, target, insn;
>>
>>     label = gen_label_rtx ();
>>     target = force_reg (Pmode, gen_rtx_LABEL_REF (Pmode, label));
>>     insn = emit_jump_insn (gen_indirect_jump_hb (target));
>>     REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (insn));
>>     emit_label (label);
>>
>> where indirect_jump_hb would just be something like:
>>
>> (define_insn "indirect_jump_hb"
>>   [(set (pc) (match_operand "pmode_register_operand" "d"))
>>    (unspec_volatile [(const_int 0)] UNSPEC_JR_HB)]
>>   "ISA_HAS_SYNCI"
>>   "jr.hb\t%0"
>>   [(set_attr "type" "jump")])
>>
>> (all untested).  I think we might then be able to fill the delay slot
>> from the target of the jump, which should be OK in this context.
>> Again, this is just an idea-cum-question.
>>
>>   
> I spent way too much time trying to get that to work.  For now I left it 
> mostly unchanged from the original patch.
>
> The problem is that the CFG things just don't seem to be able to handle:
> 1) Jump insns where the body is a (parallel ...) instead of a (set (pc) 
> ...).
> 2) Unconditional jumps where the target and fall through edges goto the 
> same place.

Thanks for trying.  In that case, I agree your patch is the way to go.

> I kept the call to mips_cache_flush_func pretty much unchanged because I 
> didn't want to break compatibility with the command line options that 
> allow this to be changed.   Also I added CLEAR_INSN_CACHE that expands 
> in libgcc that is used if  __builtin___clear_cache() cannot be expanded 
> in-line.  This is slightly inefficient as it adds an extra function call 
> layer through libgcc and has to do the arithmetic to convert the 
> function parameters, but I am thinking I shouldn't be concerned about a 
> few extra cycles doing this when we are about to do a system call.  The 
> alternative is to have __builtin___clear_cache() directly expand to 
> mips_cache_flush_func and leave libgcc out of the picture for MIPS. 

I'm happy either way.  I suppose the latter is appealing in some ways --
to mirror what you said above, it means that -mflush-func will be
honoured -- but I can see why the consistency of calling a libgcc
function is good too.

Perhaps if we aren't sure, the libgcc-less version is better.
Once we add the function, we'll have to keep it for ABI compatibility.

> Index: config/mips/mips.h
> ===================================================================
> --- config/mips/mips.h	(revision 125997)
> +++ config/mips/mips.h	(working copy)
> @@ -136,6 +136,10 @@ extern const struct mips_cpu_info mips_c
>  extern const struct mips_cpu_info *mips_arch_info;
>  extern const struct mips_cpu_info *mips_tune_info;
>  extern const struct mips_rtx_cost_data *mips_cost;
> +
> +extern bool mips_builtin_clear_cache_inline_p (void);
> +#undef TARGET_BUILTIN_CLEAR_CACHE_INLINE_P
> +#define TARGET_BUILTIN_CLEAR_CACHE_INLINE_P mips_builtin_clear_cache_inline_p

I think the last two lines belong in mips.c, and that
mips_builtin_clear_cache_inline_p can be static.  See how
similar hooks are handled.

(There's a bit of confusing difference in the way target macros
are handled.  Some are intended to be defined by target .h files,
and target-def.h only defines them if the target .h files don't.
Others are defined by target-def.h and then overridden in the
target .c file.  My understanding is that the former are intended
for things like OS-specific and assembler-specific macros,
whereas the latter are intended for _target_-specific macros.
TARGET_BUILTIN_CLEAR_CACHE_INLINE_P is one of the latter for MIPS.)

> +;; Flush the instruction cache starting as operands[0] with length
> +;; operands[1].
> +(define_expand "flush_icache"
> +  [(match_operand:SI 0 "pmode_register_operand")
> +   (match_operand:SI 1 "register_operand")]

I think this should be:

  [(match_operand 0 "pmode_register_operand")
   (match_operand:SI 1 "const_arith_operand")]

There should be no mode on a pmode_register_operand; the predicate
exists to check the run-time Pmode.  And I think the only user --
INITIALIZE_TRAMPOLINE -- does pass a const_int here.  The use of
const_arith_operand (as opposed to const_int_operand) emphasises
that the constant must be a valid operand to an addition isntruction.

> +  if (ISA_HAS_SYNCI)
> +    {
> +      rtx end_addr = gen_reg_rtx (Pmode);
> +      emit_insn (gen_rtx_SET (VOIDmode, end_addr,
> +      			      gen_rtx_PLUS (Pmode, operands[0], operands[1])));

emit_insn (gen_add3_insn (end_addr, operands[0], operands[1]));

> +;; Expand in-line code to clear the instruction cache between operand[0] and
> +;; operand[1].
> +(define_expand "clear_cache"
> +  [(match_operand:SI 0 "general_operand")
> +   (match_operand:SI 1 "general_operand")]

I still think these should be:

  [(match_operand 0 "pmode_register_operand")
   (match_operand 1 "pmode_register_operand")]

> +(define_insn "sync"
> +  [(unspec_volatile [(const_int 0)] UNSPEC_SYNC)]
> +  ""
> +  "sync"
> +  [(set_attr "length" "4")])

Best make this conditional on ISA_HAS_SYNC, just to be on the safe side.
Very minor nit, but please drop the length attribute, since it's just
restating the default.  Likewise for the others.

> +(define_insn "synci"
> +  [(unspec_volatile [(match_operand:SI 0 "general_operand" "d")] UNSPEC_SYNCI)]
> +  ""
> +  "synci\t0(%0)"
> +  [(set_attr "length" "4")])

ISA_HAS_SYNC again.  Please also make the predicate "pmode_register_operand"
(without a mode).

> +(define_insn "rdhwr"
> +  [(set (match_operand:SI 0 "general_operand" "=d")
> +        (unspec_volatile [(match_operand:SI 1 "immediate_operand" "n")]
> +        UNSPEC_RDHWR))]
> +  ""
> +  "rdhwr\t%0,$%1"
> +  [(set_attr "length" "4")])

Getting even more minor here, but const_int_operand is a little
more specific than immediate_operand, and matches "n" better.

> Index: config/mips/mips.c
> ===================================================================
> --- config/mips/mips.c	(revision 125997)
> +++ config/mips/mips.c	(working copy)
> @@ -3759,6 +3759,47 @@ mips_block_move_loop (rtx dest, rtx src,
>      mips_block_move_straight (dest, src, leftover);
>  }
>  \f
> +
> +/* Return true if we will expand __builtin_clear_cache() in-line. */
> +bool

Silly formatting stuff: two spaces after ".".  Usual (and local) style
is to have a blank line between comment and function.

I tend to prefer comments like:

   Implement TARGET_BUILTIN_CLEAR_CACHE_INLINE_P.

instead, so that you know instantly which tm.texi macro you need to
look at in order to get the full interface.

> +/* Expand a loop of synci insns */
> +void
> +mips_expand_synci_loop (rtx begin, rtx end)

Blank line, and ".  */".  Pedantically, you should mention every
argument in the comment, so how about:

/* Expand a loop of synci insns for the address range [BEGIN, END).  */

> +{
> +  rtx end_addr, inc, label, label_ref, cmp, cmp_result;
> +
> +  begin = force_reg(Pmode, begin);
> +  end = force_reg(Pmode, end);

This wouldn't be needed after the pmode_register_operand change,
although I agree it is needed as things stand.

> +  /* Load INC with the cache line size (rdhwr INC,$1). */
> +  inc = gen_reg_rtx (SImode);
> +  emit_insn (gen_rdhwr (inc ,gen_rtx_CONST_INT(SImode, 1)));

CONST_INT objects don't have modes, and formatting.  Should be:

  emit_insn (gen_rdhwr (inc, const1_rtx));

> +  /* Loop back to here. */
> +  label = gen_label_rtx ();
> +  emit_label (label);
> +
> +  emit_insn (gen_synci (begin));
> +
> +  cmp = gen_reg_rtx (SImode);

Pmode rather than SImode.

> +  emit_insn (gen_rtx_SET (VOIDmode, cmp, gen_rtx_GTU (Pmode, begin, end)));

mips_emit_binary (GTU, cmp, begin, end);

> +
> +  emit_insn (gen_rtx_SET (VOIDmode, begin, gen_rtx_PLUS (Pmode, begin, inc)));

Either:

    emit_insn (gen_add3_insn (begin, begin, inc));

or:

    mips_emit_binary (PLUS, begin, begin, inc);

Probably the latter, givne the GTU thing above.

> +
> +  label_ref = gen_rtx_LABEL_REF (VOIDmode, label);
> +  cmp_result = gen_rtx_EQ (VOIDmode, cmp, gen_rtx_CONST_INT(SImode, 0));
> +  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
> +                               gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_result,
> +                                                     label_ref,
> +                                                     pc_rtx)));

cmp_result = gen_rtx_EQ (VOIDmode, cmp, const0_rtx);
emit_jump_insn (gen_condjump (cmp_result, label));

All in all, very minor comments.  Thanks again for your work on this.

Richard

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 3/3 FFI: Use __builtin_flush_icache() to flush MIPS i-cache.
  2007-07-05  8:34   ` David Daney
@ 2007-07-05 19:08     ` Richard Sandiford
  2007-07-11 18:55       ` Tom Tromey
  0 siblings, 1 reply; 34+ messages in thread
From: Richard Sandiford @ 2007-07-05 19:08 UTC (permalink / raw)
  To: David Daney; +Cc: gcc-patches, java-patches

David Daney <ddaney@avtrex.com> writes:
> A new version of the patch.  All that changed is the name of the 
> builtin.  It is now __builtin___clear_cache().

Assuming this is considered part of the MIPS target...

>     * src/mips/ffi.c: Don't include sys/cachectl.h.
>     (ffi_prep_closure_loc): Use __builtin___clear_cache() instead of
>     cacheflush().

OK, thanks.

Richard

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-05 18:23                 ` David Daney
@ 2007-07-05 19:11                   ` Richard Sandiford
  0 siblings, 0 replies; 34+ messages in thread
From: Richard Sandiford @ 2007-07-05 19:11 UTC (permalink / raw)
  To: David Daney; +Cc: Mark Mitchell, Paolo Bonzini, gcc-patches

David Daney <ddaney@avtrex.com> writes:
> Richard Sandiford wrote:
>> David Daney <ddaney@avtrex.com> writes:
>>> +  /* Evaluate BEGIN and END. */
>>> +  begin_rtx = expand_expr (begin, NULL_RTX, Pmode, EXPAND_NORMAL);
>>> +  begin_rtx = convert_memory_address (Pmode, begin_rtx);
>>> +
>>> +  end_rtx = expand_expr (end, NULL_RTX, Pmode, EXPAND_NORMAL);
>>> +  end_rtx = convert_memory_address (Pmode, end_rtx);
>>> +
>>> +#ifdef HAVE_clear_cache
>>> +  if (HAVE_clear_cache)
>>> +    emit_insn (gen_clear_cache (begin_rtx, end_rtx));
>>> +#endif
>> 
>> When I said that the switch from general_operand to pmode_register_operand
>> would need target-independent changes, I meant it would need changes to
>> this bit of code.  I think you need to check the predicates of each
>> clear_cache operand and force the operand into a register if it doesn't
>> match.  Something like:
>> 
>>   if (HAVE_clear_cache)
>>     {
>>       icode = CODE_FOR_clear_cache;
>>       if (!insn_data[icode].operand[0].predicate (begin_rtx, Pmode))
>> 	begin_rtx = copy_to_mode_reg (Pmode, begin_rtx);
>>       if (!insn_data[icode].operand[1].predicate (end_rtx, Pmode))
>> 	end_rtx = copy_to_mode_reg (Pmode, end_rtx);
>>       emit_insn (gen_clear_cache (begin_rtx, end_rtx));
>>     }
>> 
>> (It's unfortunate that this construct has to be copied so often,
>> but that's the way it is.)
>> 
>> As it stands, the code only seems to guarantee that the operand
>> is an address operand, whereas the predicates in the MIPS patch
>> claim that it will be a register.
>
> The operands in the MIPS patch are both (match_operand:SI 0 
> "general_operand").  Look at the part in mips.c where I do:
>
> begin = force_reg(Pmode, begin);
>
> If that is the wrong way to put something in a register I will change it.

That's the right way to put something into a register if it isn't
already one.  But I'd like the MIPS pattern to say that it needs
pmode_register_operands from the outset, and for the target-independent
code to force the operands into a register where necessary.  I admit
that not all named patterns work this way -- extv, insv and extzv are
ones that have bitten me in the past -- but the vast majority do.  And I
think that, where possible, we should try to make sure that new patterns
work this way too.

For the vast majority of named patterns, the predicates on the expander
really do meaning something: the code calling the generator function
must make sure that the operands match the predicates first.  With the
patch as it stands, nothing checks the general_operand predicate in the
MIPS clear_cache insn; it's just syntactic lip-service.  The problem is
that it looks (at least to me) like it's more than syntactic lip-service.

Richard

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-05  7:36             ` David Daney
  2007-07-05 17:59               ` Richard Sandiford
@ 2007-07-06  6:07               ` Mark Mitchell
  2007-07-06  6:19                 ` David Daney
  1 sibling, 1 reply; 34+ messages in thread
From: Mark Mitchell @ 2007-07-06  6:07 UTC (permalink / raw)
  To: David Daney; +Cc: Paolo Bonzini, gcc-patches

David Daney wrote:

> Ok, I think I have come around to your way of thinking.  I still have
> the new predicate __builtin_clear_cache_inline_p() for use in target
> CLEAR_INSN_CACHE definitions, but I leave __clear_cache unchanged.

I don't understand the need for the inline_p function.  It seems to me
there are two scenarios:

1. The back-end has an inline definition of __builtin___clear_cache.  In
that case, it does:

  #define CLEAR_INSN_CACHE(BEG, END) __builtin___clear_cache(BEG, END);

2. The back-end does not have an inline definition of
__builtin_clear_cache.  In that case, it does:

  #define CLEAR_INSN_CACHE(BEG, END) \
    /* Something not involving __builtin___clear_cache */

In both cases, the libgcc __clear_cache routine just does:

  CLEAR_INSN_CACHE (beg, end);

And, in both cases, the user can write either __clear_cache (always an
out-of-line call), or __builtin_clear_cache (may be an out-of-line call,
or may be inline code).

Is your concern that in case (1) the back-end has to do two things:
define the builtin and define CLEAR_INSN_CACHE?  That's a little lame,
but it doesn't seem worse than having to define two builtins.

Am I missing some intermediate case?

Thanks,

-- 
Mark Mitchell
CodeSourcery
mark@codesourcery.com
(650) 331-3385 x713

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-06  6:07               ` Mark Mitchell
@ 2007-07-06  6:19                 ` David Daney
  2007-07-06 16:39                   ` Mark Mitchell
  0 siblings, 1 reply; 34+ messages in thread
From: David Daney @ 2007-07-06  6:19 UTC (permalink / raw)
  To: Mark Mitchell; +Cc: Paolo Bonzini, gcc-patches

Mark Mitchell wrote:
> David Daney wrote:
>
>   
>> Ok, I think I have come around to your way of thinking.  I still have
>> the new predicate __builtin_clear_cache_inline_p() for use in target
>> CLEAR_INSN_CACHE definitions, but I leave __clear_cache unchanged.
>>     
>
> I don't understand the need for the inline_p function.  It seems to me
> there are two scenarios:
>
> 1. The back-end has an inline definition of __builtin___clear_cache.  In
> that case, it does:
>
>   #define CLEAR_INSN_CACHE(BEG, END) __builtin___clear_cache(BEG, END);
>
> 2. The back-end does not have an inline definition of
> __builtin_clear_cache.  In that case, it does:
>
>   #define CLEAR_INSN_CACHE(BEG, END) \
>     /* Something not involving __builtin___clear_cache */
>
> In both cases, the libgcc __clear_cache routine just does:
>
>   CLEAR_INSN_CACHE (beg, end);
>
> And, in both cases, the user can write either __clear_cache (always an
> out-of-line call), or __builtin_clear_cache (may be an out-of-line call,
> or may be inline code).
>
> Is your concern that in case (1) the back-end has to do two things:
> define the builtin and define CLEAR_INSN_CACHE?  That's a little lame,
> but it doesn't seem worse than having to define two builtins.
>
> Am I missing some intermediate case?
>   

The MIPS family has some members that can clear the instruction cache 
with in-line instructions from user space.  Other members of the family 
can only clear the instruction cache from kernel space and must make a 
system call (perhaps via __clear_cache() in libgcc).  There are command 
line switches that select the ISA, and thus for which of these cases we 
are generating code.

When Paolo Bonzini wrote:
 > try to see how __clear_cache could use the definition of the builtin.

I realized that if one were not careful, infinite recursion might result.

The idea behind __builtin_clear_cache_inline_p() is that it allows us to 
test in source code which of these two cases are in effect at compile 
time.  At the time we write a CLEAR_INSN_CACHE macro, we don't know 
which ISA will be targeted when libgcc is built.

When __builtin___clear_cache() expands to a library call to 
__clear_cache() in libgcc, we cannot have __clear_cache() be implemented 
by __builtin___clear_cache() or we would recurse infinitely.  I envision 
that __builtin_clear_cache_inline_p() would be tested in like this:

#define CLEAR_INSN_CACHE(BEG, END)				\
{								\
  extern void _flush_cache (char *b, int l, int f);		\
  if (__builtin_clear_cache_inline_p())				\
    __builtin___clear_cache ((BEG), (END));			\
  else								\
    _flush_cache ((BEG), ((char *)(END) - (char *)(BEG)), 3);	\
}

The same thing could probably be achieved by setting and examining a set of preprocessor symbols, but I think having a single well defined predicate is a cleaner solution.

That said, after corresponding with Richard Sandiford about the MIPS portion of the patch, I think that I will expand the calls to _flush_cache() directly in the back-end rather than call to __clear_cache() in libgcc which would then do the system call.  So for MIPS __builtin_clear_cache_inline_p() would be unconditionally true.  i386 and x86_64 don't need to clear their instruction caches, so for these targets  __builtin_clear_cache_inline_p() would likewise be true unconditionally.

IIRC there are only two targets that currently define CLEAR_INSN_CACHE, they are arm and m86k.  I don't know if __builtin_clear_cache_inline_p() would be useful for them.

I hope that explains what I was thinking.

Not knowing the intimate details of the majority of GCC targets, I do not know if  
__builtin_clear_cache_inline_p() would ever used for anything.  Hypothetically if arm were to add instructions that flushed the cache without making a system call, something like __builtin_clear_cache_inline_p() would be useful in writing the CLEAR_INSN_CACHE macro.

If it is deemed to be useless, I will remove __builtin_clear_cache_inline_p() from the patch as it is unneeded for my current target of interest (MIPS).

David Daney

 


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-06  6:19                 ` David Daney
@ 2007-07-06 16:39                   ` Mark Mitchell
  2007-07-08 19:22                     ` David Daney
  0 siblings, 1 reply; 34+ messages in thread
From: Mark Mitchell @ 2007-07-06 16:39 UTC (permalink / raw)
  To: David Daney; +Cc: Paolo Bonzini, gcc-patches

David Daney wrote:

> If it is deemed to be useless, I will remove
> __builtin_clear_cache_inline_p() from the patch as it is unneeded for my
> current target of interest (MIPS).

Since you don't need it yet, I would suggest leaving it out for now.
The ARM and 68K ports are using OS syscalls to clear the cache, which is
ISA-independent, so they don't need the inline_p test either.

I understand what you were trying to accomplish, and we may well end up
needing it, but let's not do it until we do.  If we do need it, we may
want it as a predefined macro, not as a builtin, so that it can be used
in preprocessor tests.  (I believe we've done that with other builtins.)

Thanks,

-- 
Mark Mitchell
CodeSourcery
mark@codesourcery.com
(650) 331-3385 x713

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-06 16:39                   ` Mark Mitchell
@ 2007-07-08 19:22                     ` David Daney
  2007-07-09 19:04                       ` Richard Sandiford
  2007-07-11  2:22                       ` Mark Mitchell
  0 siblings, 2 replies; 34+ messages in thread
From: David Daney @ 2007-07-08 19:22 UTC (permalink / raw)
  To: gcc-patches; +Cc: Mark Mitchell, Paolo Bonzini, rsandifo

[-- Attachment #1: Type: text/plain, Size: 2777 bytes --]

Mark Mitchell wrote:
> David Daney wrote:
>
>   
>> If it is deemed to be useless, I will remove
>> __builtin_clear_cache_inline_p() from the patch as it is unneeded for my
>> current target of interest (MIPS).
>>     
>
> Since you don't need it yet, I would suggest leaving it out for now.
> The ARM and 68K ports are using OS syscalls to clear the cache, which is
> ISA-independent, so they don't need the inline_p test either.
>
> I understand what you were trying to accomplish, and we may well end up
> needing it, but let's not do it until we do.  If we do need it, we may
> want it as a predefined macro, not as a builtin, so that it can be used
> in preprocessor tests.  (I believe we've done that with other builtins.)
>   
Here is a new version of the patch.  Taking Mark's suggestion to remove 
__builtin_clear_cache_inline_p() simplifies things somewhat.

My new implementation has three cases for __builtin___clear_cache():

1) No "clear_cache" insn is supplied but the target does supply a 
definition for CLEAR_INSN_CACHE.  In this case (arm, m68k IIRC) the new 
builtin will expand to a call to the existing __clear_cache in libgcc.

2) A "clear_cache" insn is supplied.  For this case, the builtin expands 
to said insn.  The mips portion of the patch currently has the only 
implemtation of "clear_cache".

3) Neither "clear_cache" or CLEAR_INSN_CACHE is defined.  This case 
covers most GCC targets including but not limited to i386 and x86_64.  
In this case the builtin expands to nothing, so it is a nop.

As additional targets add support for __builtin___clear_cache(), there 
are several options:

* Do everything in libgcc via a definition of CLEAR_INSN_CACHE.  If this 
is done, backwards compatibility will have to be maintained in all 
future versions of libgcc.

* Do everything in a clear_cache insn.  In this case there are no 
backwards compatibility issues in libgcc.  This is what we chose to do 
in the MIPS portion of the patch.

* Some sort of hybrid of the other two options.  Care should be taken to 
avoid situations where calls to either the builtin or the library 
function would recursively call each other.

Bootstrapped and regression tested  on: x86_64-unknown-linux-gnu all 
default languages.
i686-unknown-linux-gnu to mipsel-linux (--with-arch=mips32 and 
--with-arch-mips32r2) cross tested with no regressions.

OK to commit?

:ADDPATCH middle-end:

2007-07-08  David Daney  <ddaney@avtrex.com>

    * builtins.def (BUILT_IN_CLEAR_CACHE): New builtin.
    * builtins.c (expand_builtin___clear_cache): New function.
    (expand_builtin): Call expand_builtin___clear_cache for
    BUILT_IN_CLEAR_CACHE case.
    * doc/extend.texi (__builtin___clear_cache): Document new builtin.
    * testsuite/gcc.dg/builtins-64.c: New test.


[-- Attachment #2: flush-cache1.diff --]
[-- Type: text/x-patch, Size: 5272 bytes --]

Index: gcc/builtins.def
===================================================================
--- gcc/builtins.def	(revision 125997)
+++ gcc/builtins.def	(working copy)
@@ -1,6 +1,6 @@
 /* This file contains the definitions and documentation for the
    builtins used in the GNU compiler.
-   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
    Free Software Foundation, Inc.
 
 This file is part of GCC.
@@ -609,6 +609,7 @@ DEF_GCC_BUILTIN        (BUILT_IN_APPLY_A
 DEF_GCC_BUILTIN        (BUILT_IN_ARGS_INFO, "args_info", BT_FN_INT_INT, ATTR_NULL)
 DEF_GCC_BUILTIN        (BUILT_IN_BSWAP32, "bswap32", BT_FN_UINT32_UINT32, ATTR_CONST_NOTHROW_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_BSWAP64, "bswap64", BT_FN_UINT64_UINT64, ATTR_CONST_NOTHROW_LIST)
+DEF_EXT_LIB_BUILTIN    (BUILT_IN_CLEAR_CACHE, "__clear_cache", BT_FN_VOID_PTR_PTR, ATTR_NOTHROW_LIST)
 DEF_LIB_BUILTIN        (BUILT_IN_CALLOC, "calloc", BT_FN_PTR_SIZE_SIZE, ATTR_MALLOC_NOTHROW_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_CLASSIFY_TYPE, "classify_type", BT_FN_INT_VAR, ATTR_NULL)
 DEF_GCC_BUILTIN        (BUILT_IN_CLZ, "clz", BT_FN_INT_UINT, ATTR_CONST_NOTHROW_LIST)
Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c	(revision 125997)
+++ gcc/builtins.c	(working copy)
@@ -5513,6 +5513,58 @@ expand_builtin_profile_func (bool exitp)
   return const0_rtx;
 }
 
+/* Expand a call to __builtin___clear_cache.  */
+
+static rtx
+expand_builtin___clear_cache (tree exp ATTRIBUTE_UNUSED)
+{
+#ifndef HAVE_clear_cache
+#ifdef CLEAR_INSN_CACHE
+  /* There is no "clear_cache" insn, and __clear_cache() in libgcc
+     does something.  Just do the default expansion to a call to __clear_cache().  */
+  return NULL_RTX;
+#else
+  /* There is no "clear_cache" insn, and __clear_cache() in libgcc
+     does nothing.  There is no need to call it.  Do nothing.  */
+  return const0_rtx;
+#endif /* CLEAR_INSN_CACHE */
+#else
+  /*  We have a "clear_cache" insn, and it will handle everything.  */
+  tree begin, end;
+  rtx begin_rtx, end_rtx;
+  enum insn_code icode;
+
+  /* We must not expand to a library call.  If we did, any
+     fallback library function in libgcc that might contain a call to
+     __builtin___clear_cache() would recurse infinitely.  */
+  if (!validate_arglist (exp, POINTER_TYPE, POINTER_TYPE, VOID_TYPE))
+    {
+      error ("both arguments to %<__builtin___clear_cache%> must be pointers");
+      return const0_rtx;
+    }
+
+  if (HAVE_clear_cache)
+    {
+      icode = CODE_FOR_clear_cache;
+
+      begin = CALL_EXPR_ARG (exp, 0);
+      begin_rtx = expand_expr (begin, NULL_RTX, Pmode, EXPAND_NORMAL);
+      begin_rtx = convert_memory_address (Pmode, begin_rtx);
+      if (!insn_data[icode].operand[0].predicate (begin_rtx, Pmode))
+	begin_rtx = copy_to_mode_reg (Pmode, begin_rtx);
+
+      end = CALL_EXPR_ARG (exp, 1);
+      end_rtx = expand_expr (end, NULL_RTX, Pmode, EXPAND_NORMAL);
+      end_rtx = convert_memory_address (Pmode, end_rtx);
+      if (!insn_data[icode].operand[1].predicate (end_rtx, Pmode))
+	end_rtx = copy_to_mode_reg (Pmode, end_rtx);
+
+      emit_insn (gen_clear_cache (begin_rtx, end_rtx));
+    }
+  return const0_rtx;
+#endif /* HAVE_clear_cache */
+}
+
 /* Given a trampoline address, make sure it satisfies TRAMPOLINE_ALIGNMENT.  */
 
 static rtx
@@ -6181,6 +6233,12 @@ expand_builtin (tree exp, rtx target, rt
 	return const0_rtx;
       return expand_builtin_next_arg ();
 
+    case BUILT_IN_CLEAR_CACHE:
+      target = expand_builtin___clear_cache (exp);
+      if (target)
+        return target;
+      break;
+
     case BUILT_IN_CLASSIFY_TYPE:
       return expand_builtin_classify_type (exp);
 
Index: gcc/doc/extend.texi
===================================================================
--- gcc/doc/extend.texi	(revision 125997)
+++ gcc/doc/extend.texi	(working copy)
@@ -6086,6 +6086,18 @@ if (__builtin_expect (ptr != NULL, 1))
 when testing pointer or floating-point values.
 @end deftypefn
 
+@deftypefn {Built-in Function} void __builtin___clear_cache (char *@var{begin}, char *@var{end})
+This functions is used to flush the processor's instruction cache for
+the region of memory between @var{begin} and @var{end}.  Some
+targets require that the instruction cache be flushed, after modifying
+memory containing code, in order to obtain deterministic behavior.
+
+If the target does not require instruction cache flushes,
+@code{__builtin___clear_cache} has no effect.  Otherwise either
+instructions are emitted in-line to clear the instruction cache or a
+call to the @code{__clear_cache} function in libgcc is made.
+@end deftypefn
+
 @deftypefn {Built-in Function} void __builtin_prefetch (const void *@var{addr}, ...)
 This function is used to minimize cache-miss latency by moving data into
 a cache before it is accessed.
Index: gcc/testsuite/gcc.dg/builtins-64.c
===================================================================
--- gcc/testsuite/gcc.dg/builtins-64.c	(revision 0)
+++ gcc/testsuite/gcc.dg/builtins-64.c	(revision 0)
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+
+int main ()
+{
+  char *mem = __builtin_alloca (40);
+  __builtin___clear_cache (mem, mem + 40);
+  return 0;
+}

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 2/3 MIPS support for builtin __builtin_flush_icache().
  2007-07-05 19:05       ` Richard Sandiford
@ 2007-07-08 20:00         ` David Daney
  2007-07-09 19:07           ` Richard Sandiford
  0 siblings, 1 reply; 34+ messages in thread
From: David Daney @ 2007-07-08 20:00 UTC (permalink / raw)
  To: gcc-patches, rsandifo

[-- Attachment #1: Type: text/plain, Size: 8670 bytes --]

Richard Sandiford wrote:
> Thanks for all the updates.
>
> David Daney <ddaney@avtrex.com> writes:
>   
>> I kept the call to mips_cache_flush_func pretty much unchanged because I 
>> didn't want to break compatibility with the command line options that 
>> allow this to be changed.   Also I added CLEAR_INSN_CACHE that expands 
>> in libgcc that is used if  __builtin___clear_cache() cannot be expanded 
>> in-line.  This is slightly inefficient as it adds an extra function call 
>> layer through libgcc and has to do the arithmetic to convert the 
>> function parameters, but I am thinking I shouldn't be concerned about a 
>> few extra cycles doing this when we are about to do a system call.  The 
>> alternative is to have __builtin___clear_cache() directly expand to 
>> mips_cache_flush_func and leave libgcc out of the picture for MIPS. 
>>     
>
> I'm happy either way.  I suppose the latter is appealing in some ways --
> to mirror what you said above, it means that -mflush-func will be
> honoured -- but I can see why the consistency of calling a libgcc
> function is good too.
>
> Perhaps if we aren't sure, the libgcc-less version is better.
> Once we add the function, we'll have to keep it for ABI compatibility.
>   
Right.  That is what I did in this new version of the patch.
>   
>> Index: config/mips/mips.h
>> ===================================================================
>> --- config/mips/mips.h	(revision 125997)
>> +++ config/mips/mips.h	(working copy)
>> @@ -136,6 +136,10 @@ extern const struct mips_cpu_info mips_c
>>  extern const struct mips_cpu_info *mips_arch_info;
>>  extern const struct mips_cpu_info *mips_tune_info;
>>  extern const struct mips_rtx_cost_data *mips_cost;
>> +
>> +extern bool mips_builtin_clear_cache_inline_p (void);
>> +#undef TARGET_BUILTIN_CLEAR_CACHE_INLINE_P
>> +#define TARGET_BUILTIN_CLEAR_CACHE_INLINE_P mips_builtin_clear_cache_inline_p
>>     
>
> I think the last two lines belong in mips.c, and that
> mips_builtin_clear_cache_inline_p can be static.  See how
> similar hooks are handled.
>   
OK, this is moot now.  I removed the target hook from the architecture 
independent part of the patch.

>   
>> +;; Flush the instruction cache starting as operands[0] with length
>> +;; operands[1].
>> +(define_expand "flush_icache"
>> +  [(match_operand:SI 0 "pmode_register_operand")
>> +   (match_operand:SI 1 "register_operand")]
>>     
>
> I think this should be:
>
>   [(match_operand 0 "pmode_register_operand")
>    (match_operand:SI 1 "const_arith_operand")]
>   
This insn is gone now.

>> +;; Expand in-line code to clear the instruction cache between operand[0] and
>> +;; operand[1].
>> +(define_expand "clear_cache"
>> +  [(match_operand:SI 0 "general_operand")
>> +   (match_operand:SI 1 "general_operand")]
>>     
>
> I still think these should be:
>
>   [(match_operand 0 "pmode_register_operand")
>    (match_operand 1 "pmode_register_operand")]
>   
Fixed.

>> +(define_insn "sync"
>> +  [(unspec_volatile [(const_int 0)] UNSPEC_SYNC)]
>> +  ""
>> +  "sync"
>> +  [(set_attr "length" "4")])
>>     
>
> Best make this conditional on ISA_HAS_SYNC, just to be on the safe side.
> Very minor nit, but please drop the length attribute, since it's just
> restating the default.  Likewise for the others.
>
>   
Fixed.
>> +(define_insn "synci"
>> +  [(unspec_volatile [(match_operand:SI 0 "general_operand" "d")] UNSPEC_SYNCI)]
>> +  ""
>> +  "synci\t0(%0)"
>> +  [(set_attr "length" "4")])
>>     
>
> ISA_HAS_SYNC again.  Please also make the predicate "pmode_register_operand"
> (without a mode).
>
>   
Fixed.
>> +(define_insn "rdhwr"
>> +  [(set (match_operand:SI 0 "general_operand" "=d")
>> +        (unspec_volatile [(match_operand:SI 1 "immediate_operand" "n")]
>> +        UNSPEC_RDHWR))]
>> +  ""
>> +  "rdhwr\t%0,$%1"
>> +  [(set_attr "length" "4")])
>>     
>
> Getting even more minor here, but const_int_operand is a little
> more specific than immediate_operand, and matches "n" better.
>
>   
Fixed.
>> Index: config/mips/mips.c
>> ===================================================================
>> --- config/mips/mips.c	(revision 125997)
>> +++ config/mips/mips.c	(working copy)
>> @@ -3759,6 +3759,47 @@ mips_block_move_loop (rtx dest, rtx src,
>>      mips_block_move_straight (dest, src, leftover);
>>  }
>>  \f
>> +
>> +/* Return true if we will expand __builtin_clear_cache() in-line. */
>> +bool
>>     
>
> Silly formatting stuff: two spaces after ".".  Usual (and local) style
> is to have a blank line between comment and function.
>
> I tend to prefer comments like:
>
>    Implement TARGET_BUILTIN_CLEAR_CACHE_INLINE_P.
>
> instead, so that you know instantly which tm.texi macro you need to
> look at in order to get the full interface.
>
>   
Agreed, but moot.  This part was removed.
>> +/* Expand a loop of synci insns */
>> +void
>> +mips_expand_synci_loop (rtx begin, rtx end)
>>     
>
> Blank line, and ".  */".  Pedantically, you should mention every
> argument in the comment, so how about:
>
> /* Expand a loop of synci insns for the address range [BEGIN, END).  */
>
>   
Fixed.  That is what I used.

>> +{
>> +  rtx end_addr, inc, label, label_ref, cmp, cmp_result;
>> +
>> +  begin = force_reg(Pmode, begin);
>> +  end = force_reg(Pmode, end);
>>     
>
> This wouldn't be needed after the pmode_register_operand change,
> although I agree it is needed as things stand.
>
>   
Removed.
>> +  /* Load INC with the cache line size (rdhwr INC,$1). */
>> +  inc = gen_reg_rtx (SImode);
>> +  emit_insn (gen_rdhwr (inc ,gen_rtx_CONST_INT(SImode, 1)));
>>     
>
> CONST_INT objects don't have modes, and formatting.  Should be:
>
>   emit_insn (gen_rdhwr (inc, const1_rtx));
>
>   
Fixed.
>> +  /* Loop back to here. */
>> +  label = gen_label_rtx ();
>> +  emit_label (label);
>> +
>> +  emit_insn (gen_synci (begin));
>> +
>> +  cmp = gen_reg_rtx (SImode);
>>     
>
> Pmode rather than SImode.
>
>   
Fixed.
>> +  emit_insn (gen_rtx_SET (VOIDmode, cmp, gen_rtx_GTU (Pmode, begin, end)));
>>     
>
> mips_emit_binary (GTU, cmp, begin, end);
>
>   
Fixed.

>> +
>> +  emit_insn (gen_rtx_SET (VOIDmode, begin, gen_rtx_PLUS (Pmode, begin, inc)));
>>     
>
> Either:
>
>     emit_insn (gen_add3_insn (begin, begin, inc));
>
> or:
>
>     mips_emit_binary (PLUS, begin, begin, inc);
>
> Probably the latter, givne the GTU thing above.
>
>   
Fixed.  I chose the latter option as suggested.

>> +
>> +  label_ref = gen_rtx_LABEL_REF (VOIDmode, label);
>> +  cmp_result = gen_rtx_EQ (VOIDmode, cmp, gen_rtx_CONST_INT(SImode, 0));
>> +  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
>> +                               gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_result,
>> +                                                     label_ref,
>> +                                                     pc_rtx)));
>>     
>
> cmp_result = gen_rtx_EQ (VOIDmode, cmp, const0_rtx);
> emit_jump_insn (gen_condjump (cmp_result, label));
>   
Fixed.

> All in all, very minor comments.  Thanks again for your work on this.
>   
OK, how about this new version.

INITIALIZE_TRAMPOLINE now uses the new clear_cache insn.  This means 
that it has to add the trampoline size to the ADDR argument before 
expanding clear_cache.  If clear_cache expands to a library call, it has 
to subtract BEGIN from END to get the length.  Looking at the generated 
code it appears that the RTL optimizers see that this 
addition/subtraction pair cancel each other out, and somewhat optimal 
code is generated.

Other than that and removing the __builtin_clear_cache_inline_p target 
hook as suggested by Mark Mitchell, this version of the patch only 
contains some of the cleanups you suggested.

Bootstrapped and regression tested  on: x86_64-unknown-linux-gnu all 
default languages.
i686-unknown-linux-gnu to mipsel-linux (--with-arch=mips32 and 
--with-arch-mips32r2) cross tested with --enable-languages-c,c++,java 
with no regressions.

OK to commit?

2007-07-08  David Daney  <ddaney@avtrex.com>

    * config/mips/mips.h (ISA_HAS_SYNCI): New target capability
    predicate.
    (INITIALIZE_TRAMPOLINE): Emit clear_cache insn instead of  library
    call.
    * config/mips/mips.c (mips_expand_synci_loop): New function.
    * config/mips/mips.md (UNSPEC_CLEAR_HAZARD): New constant.
    (UNSPEC_RDHWR): Same.
    (UNSPEC_SYNCI): Same.
    (UNSPEC_SYNC): Same.
    (clear_cache): New expand.
    (sync): New insn.
    (synci): Same.
    (rdhwr): Same.
    (clear_hazard): Same.
    * config/mips/mips-protos.h (mips_expand_synci_loop): Declare
    function.
    * testsuite/gcc.target/mips/clear-cache-1.c: New test.
    * testsuite/gcc.target/mips/clear-cache-2.c: New test.


[-- Attachment #2: flush-cache2.diff --]
[-- Type: text/x-patch, Size: 7653 bytes --]

Index: gcc/config/mips/mips.h
===================================================================
--- gcc/config/mips/mips.h	(revision 125997)
+++ gcc/config/mips/mips.h	(working copy)
@@ -770,6 +770,10 @@ extern const struct mips_rtx_cost_data *
 				 || ISA_MIPS32R2			\
 				 || ISA_MIPS64				\
 				 || TARGET_MIPS5500)
+
+/* ISA includes synci, jr.hb and jalr.hb */
+#define ISA_HAS_SYNCI ISA_MIPS32R2
+
 \f
 /* Add -G xx support.  */
 
@@ -2116,21 +2120,16 @@ typedef struct mips_args {
 
 #define INITIALIZE_TRAMPOLINE(ADDR, FUNC, CHAIN)			    \
 {									    \
-  rtx func_addr, chain_addr;						    \
+  rtx func_addr, chain_addr, end_addr;                                      \
 									    \
   func_addr = plus_constant (ADDR, 32);					    \
   chain_addr = plus_constant (func_addr, GET_MODE_SIZE (ptr_mode));	    \
   emit_move_insn (gen_rtx_MEM (ptr_mode, func_addr), FUNC);		    \
   emit_move_insn (gen_rtx_MEM (ptr_mode, chain_addr), CHAIN);		    \
-									    \
-  /* Flush both caches.  We need to flush the data cache in case	    \
-     the system has a write-back cache.  */				    \
-  /* ??? Should check the return value for errors.  */			    \
-  if (mips_cache_flush_func && mips_cache_flush_func[0])		    \
-    emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mips_cache_flush_func),   \
-		       0, VOIDmode, 3, ADDR, Pmode,			    \
-		       GEN_INT (TRAMPOLINE_SIZE), TYPE_MODE (integer_type_node),\
-		       GEN_INT (3), TYPE_MODE (integer_type_node));	    \
+  end_addr = gen_reg_rtx (Pmode);					    \
+  emit_insn (gen_add3_insn (end_addr, copy_rtx (ADDR),			    \
+                            GEN_INT (TRAMPOLINE_SIZE)));		    \
+  emit_insn (gen_clear_cache (copy_rtx (ADDR), end_addr));		    \
 }
 \f
 /* Addressing modes, and classification of registers for them.  */
Index: gcc/config/mips/mips.c
===================================================================
--- gcc/config/mips/mips.c	(revision 125997)
+++ gcc/config/mips/mips.c	(working copy)
@@ -3759,6 +3759,33 @@ mips_block_move_loop (rtx dest, rtx src,
     mips_block_move_straight (dest, src, leftover);
 }
 \f
+
+/* Expand a loop of synci insns for the address range [BEGIN, END).  */
+
+void
+mips_expand_synci_loop (rtx begin, rtx end)
+{
+  rtx inc, label, cmp, cmp_result;
+
+  /* Load INC with the cache line size (rdhwr INC,$1). */
+  inc = gen_reg_rtx (SImode);
+  emit_insn (gen_rdhwr (inc , const1_rtx));
+
+  /* Loop back to here.  */
+  label = gen_label_rtx ();
+  emit_label (label);
+
+  emit_insn (gen_synci (begin));
+
+  cmp = gen_reg_rtx (Pmode);
+  mips_emit_binary (GTU, cmp, begin, end);
+
+  mips_emit_binary (PLUS, begin, begin, inc);
+
+  cmp_result = gen_rtx_EQ (VOIDmode, cmp, const0_rtx);
+  emit_jump_insn (gen_condjump (cmp_result, label));
+}
+\f
 /* Expand a movmemsi instruction.  */
 
 bool
Index: gcc/config/mips/mips.md
===================================================================
--- gcc/config/mips/mips.md	(revision 125997)
+++ gcc/config/mips/mips.md	(working copy)
@@ -50,6 +50,10 @@ (define_constants
    (UNSPEC_TLS_GET_TP		28)
    (UNSPEC_MFHC1		31)
    (UNSPEC_MTHC1		32)
+   (UNSPEC_CLEAR_HAZARD		33)
+   (UNSPEC_RDHWR		34)
+   (UNSPEC_SYNCI		35)
+   (UNSPEC_SYNC			36)
 
    (UNSPEC_ADDRESS_FIRST	100)
 
@@ -4171,6 +4175,69 @@ (define_insn "cprestore"
 }
   [(set_attr "type" "store")
    (set_attr "length" "4,12")])
+
+;; Expand in-line code to clear the instruction cache between operand[0] and
+;; operand[1].
+(define_expand "clear_cache"
+  [(match_operand 0 "pmode_register_operand")
+   (match_operand 1 "pmode_register_operand")]
+  ""
+  "
+{
+  if (ISA_HAS_SYNCI)
+    {
+      mips_expand_synci_loop (operands[0], operands[1]);
+      emit_insn (gen_sync ());
+      emit_insn (gen_clear_hazard ());
+    }
+  else if (mips_cache_flush_func && mips_cache_flush_func[0])
+    {
+      rtx len = gen_reg_rtx (SImode);
+      emit_insn (gen_sub3_insn (len, operands[1], operands[0]));
+      /* Flush both caches.  We need to flush the data cache in case
+         the system has a write-back cache.  */
+      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mips_cache_flush_func),
+                         0, VOIDmode, 3, operands[0], Pmode,
+                         len, TYPE_MODE (integer_type_node),
+                         GEN_INT (3), TYPE_MODE (integer_type_node));
+   }
+  DONE;
+}")
+
+(define_insn "sync"
+  [(unspec_volatile [(const_int 0)] UNSPEC_SYNC)]
+  "ISA_HAS_SYNCI"
+  "sync")
+
+(define_insn "synci"
+  [(unspec_volatile [(match_operand 0 "pmode_register_operand" "d")]
+		    UNSPEC_SYNCI)]
+  "ISA_HAS_SYNCI"
+  "synci\t0(%0)")
+
+(define_insn "rdhwr"
+  [(set (match_operand:SI 0 "general_operand" "=d")
+        (unspec_volatile [(match_operand:SI 1 "const_int_operand" "n")]
+        UNSPEC_RDHWR))]
+  "ISA_HAS_SYNCI"
+  "rdhwr\t%0,$%1")
+
+(define_insn "clear_hazard"
+  [(unspec_volatile [(const_int 0)] UNSPEC_CLEAR_HAZARD)
+   (clobber (reg:SI 31))]
+  "ISA_HAS_SYNCI"
+{
+  return ".set\tpush\n"
+         "\t.set\tnoreorder\n"
+         "\t.set\tnomacro\n"
+         "\tbal\t1f\n"
+         "\tnop\n"
+         "1:\taddiu\t$31,$31,12\n"
+         "\tjr.hb\t$31\n"
+         "\tnop\n"
+         "\t.set\tpop";
+}
+  [(set_attr "length" "20")])
 \f
 ;; Block moves, see mips.c for more details.
 ;; Argument 0 is the destination
Index: gcc/config/mips/mips-protos.h
===================================================================
--- gcc/config/mips/mips-protos.h	(revision 125997)
+++ gcc/config/mips/mips-protos.h	(working copy)
@@ -1,6 +1,6 @@
 /* Prototypes of target machine for GNU compiler.  MIPS version.
    Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
-   1999, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   1999, 2001, 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
    Contributed by A. Lichnewsky (lich@inria.inria.fr).
    Changed by Michael Meissner	(meissner@osf.org).
    64-bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and
@@ -185,6 +185,7 @@ extern void mips_expand_call (rtx, rtx, 
 extern void mips_emit_fcc_reload (rtx, rtx, rtx);
 extern void mips_set_return_address (rtx, rtx);
 extern bool mips_expand_block_move (rtx, rtx, rtx);
+extern void mips_expand_synci_loop (rtx, rtx);
 
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx);
 extern void function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
Index: gcc/testsuite/gcc.target/mips/clear-cache-1.c
===================================================================
--- gcc/testsuite/gcc.target/mips/clear-cache-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/mips/clear-cache-1.c	(revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-mips-options "-O2 -mips32r2" } */
+/* { dg-final { scan-assembler "synci" } } */
+/* { dg-final { scan-assembler "jr.hb" } } */
+/* { dg-final { scan-assembler-not "_flush_cache" } } */
+
+void f()
+{
+  int size = 40;
+  char *memory = __builtin_alloca(size);
+  __builtin___clear_cache(memory, memory + size);
+}
+
Index: gcc/testsuite/gcc.target/mips/clear-cache-2.c
===================================================================
--- gcc/testsuite/gcc.target/mips/clear-cache-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/mips/clear-cache-2.c	(revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-mips-options "-O2 -mips32" } */
+/* { dg-final { scan-assembler-not "synci" } } */
+/* { dg-final { scan-assembler-not "jr.hb" } } */
+/* { dg-final { scan-assembler "_flush_cache" } } */
+
+void f()
+{
+  int size = 40;
+  char *memory = __builtin_alloca(size);
+  __builtin___clear_cache(memory, memory + size);
+}
+

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 4/3 i386 target support for __builtin___clear_cache()
  2007-07-05  9:05 ` [Patch] 4/3 i386 target support for __builtin___clear_cache() David Daney
@ 2007-07-08 20:39   ` David Daney
  0 siblings, 0 replies; 34+ messages in thread
From: David Daney @ 2007-07-08 20:39 UTC (permalink / raw)
  To: gcc-patches

David Daney wrote:
> This patch adds support for __builtin___clear_cache() to the i386 (and 
> x86_64) target.
>
> There is not much to see here.  All I do is define the 
> TARGET_BUILTIN_CLEAR_CACHE_INLINE_P target hook to always return 
> false.  This causes __builtin___clear_cache() to be a nop for this 
> target.
>
> Tested with bootstrap and make -k check with no regressions on 
> x86_64-unknown-linux-gnu.
>
> OK to commit if the target independent portion of the patch is approved?
>
> 2007-07-04  David Daney  <ddaney@avtrex.com>
>
>    * config/i386/i386.h (TARGET_BUILTIN_CLEAR_CACHE_INLINE_P): Define
>    target hook.
>
This patch is withdrawn.  With the revised target independent portion of 
the patch, it is no longer needed.

David Daney

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-08 19:22                     ` David Daney
@ 2007-07-09 19:04                       ` Richard Sandiford
  2007-07-11  2:22                       ` Mark Mitchell
  1 sibling, 0 replies; 34+ messages in thread
From: Richard Sandiford @ 2007-07-09 19:04 UTC (permalink / raw)
  To: David Daney; +Cc: gcc-patches, Mark Mitchell, Paolo Bonzini

FWIW, I like this.  I can't approve it, but since I've read it, here's a
few nitpicky comments.

David Daney <ddaney@avtrex.com> writes:
> +/* Expand a call to __builtin___clear_cache.  */
> +
> +static rtx
> +expand_builtin___clear_cache (tree exp ATTRIBUTE_UNUSED)
> +{
> +#ifndef HAVE_clear_cache
> +#ifdef CLEAR_INSN_CACHE
> +  /* There is no "clear_cache" insn, and __clear_cache() in libgcc
> +     does something.  Just do the default expansion to a call to __clear_cache().  */

line > 80 characters

> +  return NULL_RTX;
> +#else
> +  /* There is no "clear_cache" insn, and __clear_cache() in libgcc
> +     does nothing.  There is no need to call it.  Do nothing.  */
> +  return const0_rtx;
> +#endif /* CLEAR_INSN_CACHE */
> +#else
> +  /*  We have a "clear_cache" insn, and it will handle everything.  */

"/* We"

> +@deftypefn {Built-in Function} void __builtin___clear_cache (char *@var{begin}, char *@var{end})
> +This functions is used to flush the processor's instruction cache for

"This function"

> +the region of memory between @var{begin} and @var{end}.  Some

It might be worth saying that BEGIN is inclusive and END is exclusive.

You also need to add a "clear_cache" entry to the "Standard Names"
section of md.texi.  The reason I noticed this is that the entry
should say that, if clear_cache is defined, HAVE_clear_cache can
only be false for targets that don't need a flush.

Richard

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 2/3 MIPS support for builtin __builtin_flush_icache().
  2007-07-08 20:00         ` David Daney
@ 2007-07-09 19:07           ` Richard Sandiford
  2007-07-11  5:45             ` David Daney
  0 siblings, 1 reply; 34+ messages in thread
From: Richard Sandiford @ 2007-07-09 19:07 UTC (permalink / raw)
  To: David Daney; +Cc: gcc-patches

Thanks for all the updates.  This patch looks good.

David Daney <ddaney@avtrex.com> writes:
> INITIALIZE_TRAMPOLINE now uses the new clear_cache insn.  This means 
> that it has to add the trampoline size to the ADDR argument before 
> expanding clear_cache.  If clear_cache expands to a library call, it has 
> to subtract BEGIN from END to get the length.  Looking at the generated 
> code it appears that the RTL optimizers see that this 
> addition/subtraction pair cancel each other out, and somewhat optimal 
> code is generated.

It's good to know that the sequence is optimised.  I agree this is
the way to go.  Just a few minor comments:

> +/* ISA includes synci, jr.hb and jalr.hb */

".  */"

> +  /* Load INC with the cache line size (rdhwr INC,$1). */
> +  inc = gen_reg_rtx (SImode);

Just for the record, this will eventually need to be Pmode too,
but we'd then need two rdhwr patterns.  I'll leave that to whoever
adds MIPS64r2 support though; there's no need to change the patch here.

> +  emit_insn (gen_rdhwr (inc , const1_rtx));

"inc, const1_rtx"

> +  else if (mips_cache_flush_func && mips_cache_flush_func[0])
> +    {
> +      rtx len = gen_reg_rtx (SImode);
> +      emit_insn (gen_sub3_insn (len, operands[1], operands[0]));

This too should be Pmode, since the two input operands are Pmode.
(AIUI, it's OK to pass a Pmode value to emit_library_call, even if
the argument is SImode.)  Please make this change, since it's
trivial in this context.

OK with those three changes.  They're all trivial, so if it compiles,
there's no need to retest.

Richard

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-08 19:22                     ` David Daney
  2007-07-09 19:04                       ` Richard Sandiford
@ 2007-07-11  2:22                       ` Mark Mitchell
  2007-07-11  4:47                         ` David Daney
  1 sibling, 1 reply; 34+ messages in thread
From: Mark Mitchell @ 2007-07-11  2:22 UTC (permalink / raw)
  To: David Daney; +Cc: gcc-patches, Paolo Bonzini, rsandifo

David Daney wrote:

> My new implementation has three cases for __builtin___clear_cache():
> 
> 1) No "clear_cache" insn is supplied but the target does supply a
> definition for CLEAR_INSN_CACHE.  In this case (arm, m68k IIRC) the new
> builtin will expand to a call to the existing __clear_cache in libgcc.
> 
> 2) A "clear_cache" insn is supplied.  For this case, the builtin expands
> to said insn.  The mips portion of the patch currently has the only
> implemtation of "clear_cache".
> 
> 3) Neither "clear_cache" or CLEAR_INSN_CACHE is defined.  This case
> covers most GCC targets including but not limited to i386 and x86_64. 
> In this case the builtin expands to nothing, so it is a nop.

I think one could make an argument that for case 3)
__builtin___clear_cache should still expant to __clear_cache (just as
with case 1).  That would allow you to provide a new libgcc.so in future
and fix binaries that were trying to clear caches, but failing.

But, I don't think that's a very good argument.  It means that on a
system with no cache, you now have to explicit define a clear_cache
instruction (expanding to nothing) in order to say "no, there really is
no cache to clear here".

> 2007-07-08  David Daney  <ddaney@avtrex.com>
> 
>    * builtins.def (BUILT_IN_CLEAR_CACHE): New builtin.
>    * builtins.c (expand_builtin___clear_cache): New function.
>    (expand_builtin): Call expand_builtin___clear_cache for
>    BUILT_IN_CLEAR_CACHE case.
>    * doc/extend.texi (__builtin___clear_cache): Document new builtin.
>    * testsuite/gcc.dg/builtins-64.c: New test.

:REVIEWMAIL: OK

Thanks,

-- 
Mark Mitchell
CodeSourcery
mark@codesourcery.com
(650) 331-3385 x713

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 1/3 Add new builtin __builtin_flush_icache().
  2007-07-11  2:22                       ` Mark Mitchell
@ 2007-07-11  4:47                         ` David Daney
  0 siblings, 0 replies; 34+ messages in thread
From: David Daney @ 2007-07-11  4:47 UTC (permalink / raw)
  To: Mark Mitchell, gcc-patches; +Cc: Paolo Bonzini, rsandifo

[-- Attachment #1: Type: text/plain, Size: 1088 bytes --]

Mark Mitchell wrote:
> David Daney wrote:
>   
>> 2007-07-08  David Daney  <ddaney@avtrex.com>
>>
>>    * builtins.def (BUILT_IN_CLEAR_CACHE): New builtin.
>>    * builtins.c (expand_builtin___clear_cache): New function.
>>    (expand_builtin): Call expand_builtin___clear_cache for
>>    BUILT_IN_CLEAR_CACHE case.
>>    * doc/extend.texi (__builtin___clear_cache): Document new builtin.
>>    * testsuite/gcc.dg/builtins-64.c: New test.
>>     
>
> :REVIEWMAIL: OK
>
> Thanks,
>
>   
Thanks Mark,

This is the version I committed.  The only changes are punctuation in 
comments and the addition of the clear_cache documentation in md.texi.


2007-07-10  David Daney  <ddaney@avtrex.com>

    * builtins.def (BUILT_IN_CLEAR_CACHE): New builtin.
    * builtins.c (expand_builtin___clear_cache): New function.
    (expand_builtin): Call expand_builtin___clear_cache for
    BUILT_IN_CLEAR_CACHE case.
    * doc/extend.texi (__builtin___clear_cache): Document new builtin.
    * doc/md.texi (clear_cache): Document new instruction pattern.
    * testsuite/gcc.dg/builtins-64.c: New test.



[-- Attachment #2: flush-cache1.diff --]
[-- Type: text/x-patch, Size: 264 bytes --]

Sending        gcc/ChangeLog
Sending        gcc/builtins.c
Sending        gcc/builtins.def
Sending        gcc/doc/extend.texi
Sending        gcc/doc/md.texi
Adding         gcc/testsuite/gcc.dg/builtins-64.c
Transmitting file data ......
Committed revision 126535.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 2/3 MIPS support for builtin __builtin_flush_icache().
  2007-07-09 19:07           ` Richard Sandiford
@ 2007-07-11  5:45             ` David Daney
  0 siblings, 0 replies; 34+ messages in thread
From: David Daney @ 2007-07-11  5:45 UTC (permalink / raw)
  To: David Daney, gcc-patches, rsandifo

[-- Attachment #1: Type: text/plain, Size: 986 bytes --]

Richard Sandiford wrote:
>
> OK with those three changes.  They're all trivial, so if it compiles,
> there's no need to retest.
>   
Thanks for reviewing this Richard.

This is the version I committed.  The only changes are those you suggested.

David Daney

2007-07-10  David Daney  <ddaney@avtrex.com>

    * config/mips/mips.h (ISA_HAS_SYNCI): New target capability
    predicate.
    (INITIALIZE_TRAMPOLINE): Emit clear_cache insn instead of  library
    call.
    * config/mips/mips.c (mips_expand_synci_loop): New function.
    * config/mips/mips.md (UNSPEC_CLEAR_HAZARD): New constant.
    (UNSPEC_RDHWR): Same.
    (UNSPEC_SYNCI): Same.
    (UNSPEC_SYNC): Same.
    (clear_cache): New expand.
    (sync): New insn.
    (synci): Same.
    (rdhwr): Same.
    (clear_hazard): Same.
    * config/mips/mips-protos.h (mips_expand_synci_loop): Declare
    function.
    * testsuite/gcc.target/mips/clear-cache-1.c: New test.
    * testsuite/gcc.target/mips/clear-cache-2.c: New test.


[-- Attachment #2: flush-cache2.diff --]
[-- Type: text/x-patch, Size: 7581 bytes --]

Index: testsuite/gcc.target/mips/clear-cache-2.c
===================================================================
--- testsuite/gcc.target/mips/clear-cache-2.c	(revision 0)
+++ testsuite/gcc.target/mips/clear-cache-2.c	(revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-mips-options "-O2 -mips32" } */
+/* { dg-final { scan-assembler-not "synci" } } */
+/* { dg-final { scan-assembler-not "jr.hb" } } */
+/* { dg-final { scan-assembler "_flush_cache" } } */
+
+void f()
+{
+  int size = 40;
+  char *memory = __builtin_alloca(size);
+  __builtin___clear_cache(memory, memory + size);
+}
+
Index: testsuite/gcc.target/mips/clear-cache-1.c
===================================================================
--- testsuite/gcc.target/mips/clear-cache-1.c	(revision 0)
+++ testsuite/gcc.target/mips/clear-cache-1.c	(revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-mips-options "-O2 -mips32r2" } */
+/* { dg-final { scan-assembler "synci" } } */
+/* { dg-final { scan-assembler "jr.hb" } } */
+/* { dg-final { scan-assembler-not "_flush_cache" } } */
+
+void f()
+{
+  int size = 40;
+  char *memory = __builtin_alloca(size);
+  __builtin___clear_cache(memory, memory + size);
+}
+
Index: config/mips/mips.md
===================================================================
--- config/mips/mips.md	(revision 126536)
+++ config/mips/mips.md	(working copy)
@@ -50,6 +50,10 @@ (define_constants
    (UNSPEC_TLS_GET_TP		28)
    (UNSPEC_MFHC1		31)
    (UNSPEC_MTHC1		32)
+   (UNSPEC_CLEAR_HAZARD		33)
+   (UNSPEC_RDHWR		34)
+   (UNSPEC_SYNCI		35)
+   (UNSPEC_SYNC			36)
 
    (UNSPEC_ADDRESS_FIRST	100)
 
@@ -4221,6 +4225,69 @@ (define_insn "cprestore"
 }
   [(set_attr "type" "store")
    (set_attr "length" "4,12")])
+
+;; Expand in-line code to clear the instruction cache between operand[0] and
+;; operand[1].
+(define_expand "clear_cache"
+  [(match_operand 0 "pmode_register_operand")
+   (match_operand 1 "pmode_register_operand")]
+  ""
+  "
+{
+  if (ISA_HAS_SYNCI)
+    {
+      mips_expand_synci_loop (operands[0], operands[1]);
+      emit_insn (gen_sync ());
+      emit_insn (gen_clear_hazard ());
+    }
+  else if (mips_cache_flush_func && mips_cache_flush_func[0])
+    {
+      rtx len = gen_reg_rtx (Pmode);
+      emit_insn (gen_sub3_insn (len, operands[1], operands[0]));
+      /* Flush both caches.  We need to flush the data cache in case
+         the system has a write-back cache.  */
+      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mips_cache_flush_func),
+                         0, VOIDmode, 3, operands[0], Pmode,
+                         len, TYPE_MODE (integer_type_node),
+                         GEN_INT (3), TYPE_MODE (integer_type_node));
+   }
+  DONE;
+}")
+
+(define_insn "sync"
+  [(unspec_volatile [(const_int 0)] UNSPEC_SYNC)]
+  "ISA_HAS_SYNCI"
+  "sync")
+
+(define_insn "synci"
+  [(unspec_volatile [(match_operand 0 "pmode_register_operand" "d")]
+		    UNSPEC_SYNCI)]
+  "ISA_HAS_SYNCI"
+  "synci\t0(%0)")
+
+(define_insn "rdhwr"
+  [(set (match_operand:SI 0 "general_operand" "=d")
+        (unspec_volatile [(match_operand:SI 1 "const_int_operand" "n")]
+        UNSPEC_RDHWR))]
+  "ISA_HAS_SYNCI"
+  "rdhwr\t%0,$%1")
+
+(define_insn "clear_hazard"
+  [(unspec_volatile [(const_int 0)] UNSPEC_CLEAR_HAZARD)
+   (clobber (reg:SI 31))]
+  "ISA_HAS_SYNCI"
+{
+  return ".set\tpush\n"
+         "\t.set\tnoreorder\n"
+         "\t.set\tnomacro\n"
+         "\tbal\t1f\n"
+         "\tnop\n"
+         "1:\taddiu\t$31,$31,12\n"
+         "\tjr.hb\t$31\n"
+         "\tnop\n"
+         "\t.set\tpop";
+}
+  [(set_attr "length" "20")])
 \f
 ;; Block moves, see mips.c for more details.
 ;; Argument 0 is the destination
Index: config/mips/mips-protos.h
===================================================================
--- config/mips/mips-protos.h	(revision 126536)
+++ config/mips/mips-protos.h	(working copy)
@@ -1,6 +1,6 @@
 /* Prototypes of target machine for GNU compiler.  MIPS version.
    Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
-   1999, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   1999, 2001, 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
    Contributed by A. Lichnewsky (lich@inria.inria.fr).
    Changed by Michael Meissner	(meissner@osf.org).
    64-bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and
@@ -187,6 +187,7 @@ extern void mips_expand_call (rtx, rtx, 
 extern void mips_emit_fcc_reload (rtx, rtx, rtx);
 extern void mips_set_return_address (rtx, rtx);
 extern bool mips_expand_block_move (rtx, rtx, rtx);
+extern void mips_expand_synci_loop (rtx, rtx);
 
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx);
 extern void function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
Index: config/mips/mips.c
===================================================================
--- config/mips/mips.c	(revision 126536)
+++ config/mips/mips.c	(working copy)
@@ -3883,6 +3883,33 @@ mips_block_move_loop (rtx dest, rtx src,
     mips_block_move_straight (dest, src, leftover);
 }
 \f
+
+/* Expand a loop of synci insns for the address range [BEGIN, END).  */
+
+void
+mips_expand_synci_loop (rtx begin, rtx end)
+{
+  rtx inc, label, cmp, cmp_result;
+
+  /* Load INC with the cache line size (rdhwr INC,$1). */
+  inc = gen_reg_rtx (SImode);
+  emit_insn (gen_rdhwr (inc, const1_rtx));
+
+  /* Loop back to here.  */
+  label = gen_label_rtx ();
+  emit_label (label);
+
+  emit_insn (gen_synci (begin));
+
+  cmp = gen_reg_rtx (Pmode);
+  mips_emit_binary (GTU, cmp, begin, end);
+
+  mips_emit_binary (PLUS, begin, begin, inc);
+
+  cmp_result = gen_rtx_EQ (VOIDmode, cmp, const0_rtx);
+  emit_jump_insn (gen_condjump (cmp_result, label));
+}
+\f
 /* Expand a movmemsi instruction.  */
 
 bool
Index: config/mips/mips.h
===================================================================
--- config/mips/mips.h	(revision 126536)
+++ config/mips/mips.h	(working copy)
@@ -804,6 +804,10 @@ extern const struct mips_rtx_cost_data *
 				 || ISA_MIPS32R2			\
 				 || ISA_MIPS64				\
 				 || TARGET_MIPS5500)
+
+/* ISA includes synci, jr.hb and jalr.hb.  */
+#define ISA_HAS_SYNCI ISA_MIPS32R2
+
 \f
 /* Add -G xx support.  */
 
@@ -2151,21 +2155,16 @@ typedef struct mips_args {
 
 #define INITIALIZE_TRAMPOLINE(ADDR, FUNC, CHAIN)			    \
 {									    \
-  rtx func_addr, chain_addr;						    \
+  rtx func_addr, chain_addr, end_addr;                                      \
 									    \
   func_addr = plus_constant (ADDR, 32);					    \
   chain_addr = plus_constant (func_addr, GET_MODE_SIZE (ptr_mode));	    \
   emit_move_insn (gen_rtx_MEM (ptr_mode, func_addr), FUNC);		    \
   emit_move_insn (gen_rtx_MEM (ptr_mode, chain_addr), CHAIN);		    \
-									    \
-  /* Flush both caches.  We need to flush the data cache in case	    \
-     the system has a write-back cache.  */				    \
-  /* ??? Should check the return value for errors.  */			    \
-  if (mips_cache_flush_func && mips_cache_flush_func[0])		    \
-    emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mips_cache_flush_func),   \
-		       0, VOIDmode, 3, ADDR, Pmode,			    \
-		       GEN_INT (TRAMPOLINE_SIZE), TYPE_MODE (integer_type_node),\
-		       GEN_INT (3), TYPE_MODE (integer_type_node));	    \
+  end_addr = gen_reg_rtx (Pmode);					    \
+  emit_insn (gen_add3_insn (end_addr, copy_rtx (ADDR),			    \
+                            GEN_INT (TRAMPOLINE_SIZE)));		    \
+  emit_insn (gen_clear_cache (copy_rtx (ADDR), end_addr));		    \
 }
 \f
 /* Addressing modes, and classification of registers for them.  */

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Patch] 3/3 FFI: Use __builtin_flush_icache() to flush MIPS i-cache.
  2007-07-05 19:08     ` Richard Sandiford
@ 2007-07-11 18:55       ` Tom Tromey
  0 siblings, 0 replies; 34+ messages in thread
From: Tom Tromey @ 2007-07-11 18:55 UTC (permalink / raw)
  To: David Daney; +Cc: gcc-patches, java-patches, rsandifo

>>>>> "Richard" == Richard Sandiford <rsandifo@nildram.co.uk> writes:

Richard> David Daney <ddaney@avtrex.com> writes:
>> A new version of the patch.  All that changed is the name of the 
>> builtin.  It is now __builtin___clear_cache().

Richard> Assuming this is considered part of the MIPS target...

Yes, target maintainers can approve patches to the corresponding
libffi port.

Tom

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2007-07-11 18:12 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-07-01  5:01 [Patch] 1/3 Add new builtin __builtin_flush_icache() David Daney
2007-07-01  5:05 ` [Patch] 2/3 MIPS support for " David Daney
2007-07-01 10:56   ` Richard Sandiford
2007-07-05  7:50     ` David Daney
2007-07-05 19:05       ` Richard Sandiford
2007-07-08 20:00         ` David Daney
2007-07-09 19:07           ` Richard Sandiford
2007-07-11  5:45             ` David Daney
2007-07-01  5:11 ` [Patch] 3/3 FFI: Use __builtin_flush_icache() to flush MIPS i-cache David Daney
2007-07-05  8:34   ` David Daney
2007-07-05 19:08     ` Richard Sandiford
2007-07-11 18:55       ` Tom Tromey
2007-07-01  8:01 ` [Patch] 1/3 Add new builtin __builtin_flush_icache() Paolo Bonzini
2007-07-01 17:51   ` Thiemo Seufer
2007-07-01 18:47   ` David Daney
2007-07-02  5:04     ` Paolo Bonzini
2007-07-03 23:55       ` Mark Mitchell
2007-07-04  7:18         ` Paolo Bonzini
2007-07-04 17:17           ` Mark Mitchell
2007-07-04 17:10         ` David Daney
2007-07-04 17:51           ` Mark Mitchell
2007-07-05  7:36             ` David Daney
2007-07-05 17:59               ` Richard Sandiford
2007-07-05 18:23                 ` David Daney
2007-07-05 19:11                   ` Richard Sandiford
2007-07-06  6:07               ` Mark Mitchell
2007-07-06  6:19                 ` David Daney
2007-07-06 16:39                   ` Mark Mitchell
2007-07-08 19:22                     ` David Daney
2007-07-09 19:04                       ` Richard Sandiford
2007-07-11  2:22                       ` Mark Mitchell
2007-07-11  4:47                         ` David Daney
2007-07-05  9:05 ` [Patch] 4/3 i386 target support for __builtin___clear_cache() David Daney
2007-07-08 20:39   ` David Daney

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).