public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, i386]: Optimize ix86_atomic_assign_expand_fenv a bit
@ 2014-07-12  9:07 Uros Bizjak
  2014-07-14  8:05 ` Kyrill Tkachov
  0 siblings, 1 reply; 3+ messages in thread
From: Uros Bizjak @ 2014-07-12  9:07 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 884 bytes --]

Hello!

Attached patch optimizes ix86_atomic_assign_expand_fenv by using
register form of "fnstsw %ax" instead of "fnstsw <mem>". This way a
memory RW cycle is removed for a temporary.

2014-07-12  Uros Bizjak  <ubizjak@gmail.com>

    * config/i386/i386-builtin-types.def: Add USHORT_FTYPE_VOID.
    Remove VOID_FTYPE_PUSHORT.
    * config/i386/i386.c (bdesc_special_args) <__builtin_ia32_fnstsw>:
    Change code to USHORT_FTYPE_VOID.
    (ix86_expand_special_args_builtin): Handle USHORT_FTYPE_VOID.
    (ix86_expand_builtin): Remove IX86_BUILTIN_FNSTSW handling.
    (ix86_atomic_assign_expand_fenv): Update for
    __builtin_ia32_fnstsw changes.
    * config/i386/i386.md (x86_fnstsw_1): Set length unconditionally to 2.
    (fnstsw): Change operand 0 to nonimmediate operand.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32} and
committed to mainline SVN.

Uros.

[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 7633 bytes --]

Index: config/alpha/alpha.c
===================================================================
--- config/alpha/alpha.c	(revision 212477)
+++ config/alpha/alpha.c	(working copy)
@@ -9888,6 +9888,72 @@ alpha_canonicalize_comparison (int *code, rtx *op0
       *op1 = GEN_INT (255);
     }
 }
+
+/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
+
+static void
+alpha_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+  const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17);
+
+  tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
+  tree new_fenv_var, reload_fenv, restore_fnenv;
+  tree update_call, atomic_feraiseexcept, hold_fnclex;
+
+  /* Assume OSF/1 compatible interfaces.  */
+  if (!TARGET_ABI_OSF)
+    return;
+
+  /* Generate the equivalent of :
+       unsigned long fenv_var;
+       fenv_var = __ieee_get_fp_control ();
+
+       unsigned long masked_fenv;
+       masked_fenv = fenv_var & mask;
+
+       __ieee_set_fp_control (masked_fenv);  */
+
+  fenv_var = create_tmp_var (long_unsigned_type_node, NULL);
+  get_fpscr
+    = build_fn_decl ("__ieee_get_fp_control",
+		     build_function_type_list (long_unsigned_type_node, NULL));
+  set_fpscr
+    = build_fn_decl ("__ieee_set_fp_control",
+		     build_function_type_list (void_type_node, NULL));
+  mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK);
+  ld_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node,
+		    fenv_var, build_call_expr (get_fpscr, 0));
+  masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask);
+  hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
+  *hold = build2 (COMPOUND_EXPR, void_type_node,
+		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
+		  hold_fnclex);
+
+  /* Store the value of masked_fenv to clear the exceptions:
+     __ieee_set_fp_control (masked_fenv);  */
+
+  *clear = build_call_expr (set_fpscr, 1, masked_fenv);
+
+  /* Generate the equivalent of :
+       unsigned long new_fenv_var;
+       new_fenv_var = __ieee_get_fp_control ();
+
+       __ieee_set_fp_control (fenv_var);
+
+       __atomic_feraiseexcept (new_fenv_var);  */
+
+  new_fenv_var = create_tmp_var (long_unsigned_type_node, NULL);
+  reload_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node, new_fenv_var,
+			build_call_expr (get_fpscr, 0));
+  restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
+  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+  update_call
+    = build_call_expr (atomic_feraiseexcept, 1,
+		       fold_convert (integer_type_node, new_fenv_var));
+  *update = build2 (COMPOUND_EXPR, void_type_node,
+		    build2 (COMPOUND_EXPR, void_type_node,
+			    reload_fenv, restore_fnenv), update_call);
+}
 \f
 /* Initialize the GCC target structure.  */
 #if TARGET_ABI_OPEN_VMS
@@ -10060,6 +10126,9 @@ alpha_canonicalize_comparison (int *code, rtx *op0
 #undef TARGET_CANONICALIZE_COMPARISON
 #define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
 
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV alpha_atomic_assign_expand_fenv
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 \f
Index: config/i386/i386-builtin-types.def
===================================================================
--- config/i386/i386-builtin-types.def	(revision 212477)
+++ config/i386/i386-builtin-types.def	(working copy)
@@ -162,6 +162,7 @@ DEF_POINTER_TYPE (PCV16SI, V16SI, CONST)
 DEF_FUNCTION_TYPE (FLOAT128)
 DEF_FUNCTION_TYPE (UINT64)
 DEF_FUNCTION_TYPE (UNSIGNED)
+DEF_FUNCTION_TYPE (USHORT)
 DEF_FUNCTION_TYPE (INT)
 DEF_FUNCTION_TYPE (VOID)
 DEF_FUNCTION_TYPE (PVOID)
@@ -256,7 +257,6 @@ DEF_FUNCTION_TYPE (VOID, PCVOID)
 DEF_FUNCTION_TYPE (VOID, PVOID)
 DEF_FUNCTION_TYPE (VOID, UINT64)
 DEF_FUNCTION_TYPE (VOID, UNSIGNED)
-DEF_FUNCTION_TYPE (VOID, PUSHORT)
 DEF_FUNCTION_TYPE (INT, PUSHORT)
 DEF_FUNCTION_TYPE (INT, PUNSIGNED)
 DEF_FUNCTION_TYPE (INT, PULONGLONG)
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 212477)
+++ config/i386/i386.c	(working copy)
@@ -28994,7 +28994,7 @@ static const struct builtin_description bdesc_spec
   /* 80387 (for use internally for atomic compound assignment).  */
   { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
   { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
-  { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
+  { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
   { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
 
   /* MMX */
@@ -34598,6 +34598,7 @@ ix86_expand_special_args_builtin (const struct bui
       break;
 
     case INT_FTYPE_VOID:
+    case USHORT_FTYPE_VOID:
     case UINT64_FTYPE_VOID:
     case UNSIGNED_FTYPE_VOID:
       nargs = 0;
@@ -35283,7 +35284,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx sub
     case IX86_BUILTIN_FXRSTOR64:
     case IX86_BUILTIN_FNSTENV:
     case IX86_BUILTIN_FLDENV:
-    case IX86_BUILTIN_FNSTSW:
       mode0 = BLKmode;
       switch (fcode)
 	{
@@ -35305,10 +35305,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx sub
 	case IX86_BUILTIN_FLDENV:
 	  icode = CODE_FOR_fldenv;
 	  break;
-	case IX86_BUILTIN_FNSTSW:
-	  icode = CODE_FOR_fnstsw;
-	  mode0 = HImode;
-	  break;
 	default:
 	  gcc_unreachable ();
 	}
@@ -46894,15 +46890,14 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *
 		      hold_fnclex);
       *clear = build_call_expr (fnclex, 0);
       tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
-      mark_addressable (sw_var);
-      tree su_ptr = build_pointer_type (short_unsigned_type_node);
-      tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
-      tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
+      tree fnstsw_call = build_call_expr (fnstsw, 0);
+      tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
+			    sw_var, fnstsw_call);
       tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
       tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
 				exceptions_var, exceptions_x87);
       *update = build2 (COMPOUND_EXPR, integer_type_node,
-			fnstsw_call, update_mod);
+			sw_mod, update_mod);
       tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
       *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
     }
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 212477)
+++ config/i386/i386.md	(working copy)
@@ -1532,8 +1532,7 @@
 	(unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
   "TARGET_80387"
   "fnstsw\t%0"
-  [(set (attr "length")
-	(symbol_ref "ix86_attr_length_address_default (insn) + 2"))
+  [(set_attr "length" "2")
    (set_attr "mode" "SI")
    (set_attr "unit" "i387")])
 
@@ -18114,12 +18113,12 @@
         (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
 
 (define_insn "fnstsw"
-  [(set (match_operand:HI 0 "memory_operand" "=m")
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=a,m")
 	(unspec_volatile:HI [(const_int 0)] UNSPECV_FNSTSW))]
   "TARGET_80387"
   "fnstsw\t%0"
-  [(set_attr "type" "other")
-   (set_attr "memory" "store")
+  [(set_attr "type" "other,other")
+   (set_attr "memory" "none,store")
    (set (attr "length")
         (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
 

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2014-07-14  8:11 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-12  9:07 [PATCH, i386]: Optimize ix86_atomic_assign_expand_fenv a bit Uros Bizjak
2014-07-14  8:05 ` Kyrill Tkachov
2014-07-14  8:11   ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).