public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, i386]: Optimize ix86_atomic_assign_expand_fenv a bit
@ 2014-07-12  9:07 Uros Bizjak
  2014-07-14  8:05 ` Kyrill Tkachov
  0 siblings, 1 reply; 3+ messages in thread
From: Uros Bizjak @ 2014-07-12  9:07 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 884 bytes --]

Hello!

Attached patch optimizes ix86_atomic_assign_expand_fenv by using
register form of "fnstsw %ax" instead of "fnstsw <mem>". This way a
memory RW cycle is removed for a temporary.

2014-07-12  Uros Bizjak  <ubizjak@gmail.com>

    * config/i386/i386-builtin-types.def: Add USHORT_FTYPE_VOID.
    Remove VOID_FTYPE_PUSHORT.
    * config/i386/i386.c (bdesc_special_args) <__builtin_ia32_fnstsw>:
    Change code to USHORT_FTYPE_VOID.
    (ix86_expand_special_args_builtin): Handle USHORT_FTYPE_VOID.
    (ix86_expand_builtin): Remove IX86_BUILTIN_FNSTSW handling.
    (ix86_atomic_assign_expand_fenv): Update for
    __builtin_ia32_fnstsw changes.
    * config/i386/i386.md (x86_fnstsw_1): Set length unconditionally to 2.
    (fnstsw): Change operand 0 to nonimmediate operand.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32} and
committed to mainline SVN.

Uros.

[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 7633 bytes --]

Index: config/alpha/alpha.c
===================================================================
--- config/alpha/alpha.c	(revision 212477)
+++ config/alpha/alpha.c	(working copy)
@@ -9888,6 +9888,72 @@ alpha_canonicalize_comparison (int *code, rtx *op0
       *op1 = GEN_INT (255);
     }
 }
+
+/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
+
+static void
+alpha_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+  const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17);
+
+  tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
+  tree new_fenv_var, reload_fenv, restore_fnenv;
+  tree update_call, atomic_feraiseexcept, hold_fnclex;
+
+  /* Assume OSF/1 compatible interfaces.  */
+  if (!TARGET_ABI_OSF)
+    return;
+
+  /* Generate the equivalent of :
+       unsigned long fenv_var;
+       fenv_var = __ieee_get_fp_control ();
+
+       unsigned long masked_fenv;
+       masked_fenv = fenv_var & mask;
+
+       __ieee_set_fp_control (masked_fenv);  */
+
+  fenv_var = create_tmp_var (long_unsigned_type_node, NULL);
+  get_fpscr
+    = build_fn_decl ("__ieee_get_fp_control",
+		     build_function_type_list (long_unsigned_type_node, NULL));
+  set_fpscr
+    = build_fn_decl ("__ieee_set_fp_control",
+		     build_function_type_list (void_type_node, NULL));
+  mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK);
+  ld_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node,
+		    fenv_var, build_call_expr (get_fpscr, 0));
+  masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask);
+  hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
+  *hold = build2 (COMPOUND_EXPR, void_type_node,
+		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
+		  hold_fnclex);
+
+  /* Store the value of masked_fenv to clear the exceptions:
+     __ieee_set_fp_control (masked_fenv);  */
+
+  *clear = build_call_expr (set_fpscr, 1, masked_fenv);
+
+  /* Generate the equivalent of :
+       unsigned long new_fenv_var;
+       new_fenv_var = __ieee_get_fp_control ();
+
+       __ieee_set_fp_control (fenv_var);
+
+       __atomic_feraiseexcept (new_fenv_var);  */
+
+  new_fenv_var = create_tmp_var (long_unsigned_type_node, NULL);
+  reload_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node, new_fenv_var,
+			build_call_expr (get_fpscr, 0));
+  restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
+  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+  update_call
+    = build_call_expr (atomic_feraiseexcept, 1,
+		       fold_convert (integer_type_node, new_fenv_var));
+  *update = build2 (COMPOUND_EXPR, void_type_node,
+		    build2 (COMPOUND_EXPR, void_type_node,
+			    reload_fenv, restore_fnenv), update_call);
+}
 \f
 /* Initialize the GCC target structure.  */
 #if TARGET_ABI_OPEN_VMS
@@ -10060,6 +10126,9 @@ alpha_canonicalize_comparison (int *code, rtx *op0
 #undef TARGET_CANONICALIZE_COMPARISON
 #define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
 
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV alpha_atomic_assign_expand_fenv
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 \f
Index: config/i386/i386-builtin-types.def
===================================================================
--- config/i386/i386-builtin-types.def	(revision 212477)
+++ config/i386/i386-builtin-types.def	(working copy)
@@ -162,6 +162,7 @@ DEF_POINTER_TYPE (PCV16SI, V16SI, CONST)
 DEF_FUNCTION_TYPE (FLOAT128)
 DEF_FUNCTION_TYPE (UINT64)
 DEF_FUNCTION_TYPE (UNSIGNED)
+DEF_FUNCTION_TYPE (USHORT)
 DEF_FUNCTION_TYPE (INT)
 DEF_FUNCTION_TYPE (VOID)
 DEF_FUNCTION_TYPE (PVOID)
@@ -256,7 +257,6 @@ DEF_FUNCTION_TYPE (VOID, PCVOID)
 DEF_FUNCTION_TYPE (VOID, PVOID)
 DEF_FUNCTION_TYPE (VOID, UINT64)
 DEF_FUNCTION_TYPE (VOID, UNSIGNED)
-DEF_FUNCTION_TYPE (VOID, PUSHORT)
 DEF_FUNCTION_TYPE (INT, PUSHORT)
 DEF_FUNCTION_TYPE (INT, PUNSIGNED)
 DEF_FUNCTION_TYPE (INT, PULONGLONG)
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 212477)
+++ config/i386/i386.c	(working copy)
@@ -28994,7 +28994,7 @@ static const struct builtin_description bdesc_spec
   /* 80387 (for use internally for atomic compound assignment).  */
   { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
   { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
-  { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
+  { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
   { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
 
   /* MMX */
@@ -34598,6 +34598,7 @@ ix86_expand_special_args_builtin (const struct bui
       break;
 
     case INT_FTYPE_VOID:
+    case USHORT_FTYPE_VOID:
     case UINT64_FTYPE_VOID:
     case UNSIGNED_FTYPE_VOID:
       nargs = 0;
@@ -35283,7 +35284,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx sub
     case IX86_BUILTIN_FXRSTOR64:
     case IX86_BUILTIN_FNSTENV:
     case IX86_BUILTIN_FLDENV:
-    case IX86_BUILTIN_FNSTSW:
       mode0 = BLKmode;
       switch (fcode)
 	{
@@ -35305,10 +35305,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx sub
 	case IX86_BUILTIN_FLDENV:
 	  icode = CODE_FOR_fldenv;
 	  break;
-	case IX86_BUILTIN_FNSTSW:
-	  icode = CODE_FOR_fnstsw;
-	  mode0 = HImode;
-	  break;
 	default:
 	  gcc_unreachable ();
 	}
@@ -46894,15 +46890,14 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *
 		      hold_fnclex);
       *clear = build_call_expr (fnclex, 0);
       tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
-      mark_addressable (sw_var);
-      tree su_ptr = build_pointer_type (short_unsigned_type_node);
-      tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
-      tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
+      tree fnstsw_call = build_call_expr (fnstsw, 0);
+      tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
+			    sw_var, fnstsw_call);
       tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
       tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
 				exceptions_var, exceptions_x87);
       *update = build2 (COMPOUND_EXPR, integer_type_node,
-			fnstsw_call, update_mod);
+			sw_mod, update_mod);
       tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
       *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
     }
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 212477)
+++ config/i386/i386.md	(working copy)
@@ -1532,8 +1532,7 @@
 	(unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
   "TARGET_80387"
   "fnstsw\t%0"
-  [(set (attr "length")
-	(symbol_ref "ix86_attr_length_address_default (insn) + 2"))
+  [(set_attr "length" "2")
    (set_attr "mode" "SI")
    (set_attr "unit" "i387")])
 
@@ -18114,12 +18113,12 @@
         (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
 
 (define_insn "fnstsw"
-  [(set (match_operand:HI 0 "memory_operand" "=m")
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=a,m")
 	(unspec_volatile:HI [(const_int 0)] UNSPECV_FNSTSW))]
   "TARGET_80387"
   "fnstsw\t%0"
-  [(set_attr "type" "other")
-   (set_attr "memory" "store")
+  [(set_attr "type" "other,other")
+   (set_attr "memory" "none,store")
    (set (attr "length")
         (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH, i386]: Optimize ix86_atomic_assign_expand_fenv a bit
  2014-07-12  9:07 [PATCH, i386]: Optimize ix86_atomic_assign_expand_fenv a bit Uros Bizjak
@ 2014-07-14  8:05 ` Kyrill Tkachov
  2014-07-14  8:11   ` Uros Bizjak
  0 siblings, 1 reply; 3+ messages in thread
From: Kyrill Tkachov @ 2014-07-14  8:05 UTC (permalink / raw)
  To: Uros Bizjak, gcc-patches


On 12/07/14 10:07, Uros Bizjak wrote:
> Hello!
>
> Attached patch optimizes ix86_atomic_assign_expand_fenv by using
> register form of "fnstsw %ax" instead of "fnstsw <mem>". This way a
> memory RW cycle is removed for a temporary.

Hi Uros,

The patch you attached seems to be the hook implementation for alpha...

Kyrill

> 2014-07-12  Uros Bizjak  <ubizjak@gmail.com>
>
>      * config/i386/i386-builtin-types.def: Add USHORT_FTYPE_VOID.
>      Remove VOID_FTYPE_PUSHORT.
>      * config/i386/i386.c (bdesc_special_args) <__builtin_ia32_fnstsw>:
>      Change code to USHORT_FTYPE_VOID.
>      (ix86_expand_special_args_builtin): Handle USHORT_FTYPE_VOID.
>      (ix86_expand_builtin): Remove IX86_BUILTIN_FNSTSW handling.
>      (ix86_atomic_assign_expand_fenv): Update for
>      __builtin_ia32_fnstsw changes.
>      * config/i386/i386.md (x86_fnstsw_1): Set length unconditionally to 2.
>      (fnstsw): Change operand 0 to nonimmediate operand.
>
> Bootstrapped and regression tested on x86_64-linux-gnu {,-m32} and
> committed to mainline SVN.
>
> Uros.


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH, i386]: Optimize ix86_atomic_assign_expand_fenv a bit
  2014-07-14  8:05 ` Kyrill Tkachov
@ 2014-07-14  8:11   ` Uros Bizjak
  0 siblings, 0 replies; 3+ messages in thread
From: Uros Bizjak @ 2014-07-14  8:11 UTC (permalink / raw)
  To: Kyrill Tkachov; +Cc: gcc-patches

On Mon, Jul 14, 2014 at 10:05 AM, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote:

>> Attached patch optimizes ix86_atomic_assign_expand_fenv by using
>> register form of "fnstsw %ax" instead of "fnstsw <mem>". This way a
>> memory RW cycle is removed for a temporary.
>
> The patch you attached seems to be the hook implementation for alpha...

Eh, the complete i386 patch follows the alpha hook. I have diffed from
config/ directory instead of config/i386.

Uros.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2014-07-14  8:11 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-12  9:07 [PATCH, i386]: Optimize ix86_atomic_assign_expand_fenv a bit Uros Bizjak
2014-07-14  8:05 ` Kyrill Tkachov
2014-07-14  8:11   ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).