From: Uros Bizjak <ubizjak@gmail.com>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Subject: [PATCH, i386]: Optimize ix86_atomic_assign_expand_fenv a bit
Date: Sat, 12 Jul 2014 09:07:00 -0000 [thread overview]
Message-ID: <CAFULd4aEXGU+3ZRTf9Hwh5uu6vX071MZnAw_Q9A1zAwiYL-R+g@mail.gmail.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 884 bytes --]
Hello!
Attached patch optimizes ix86_atomic_assign_expand_fenv by using
register form of "fnstsw %ax" instead of "fnstsw <mem>". This way a
memory RW cycle is removed for a temporary.
2014-07-12 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386-builtin-types.def: Add USHORT_FTYPE_VOID.
Remove VOID_FTYPE_PUSHORT.
* config/i386/i386.c (bdesc_special_args) <__builtin_ia32_fnstsw>:
Change code to USHORT_FTYPE_VOID.
(ix86_expand_special_args_builtin): Handle USHORT_FTYPE_VOID.
(ix86_expand_builtin): Remove IX86_BUILTIN_FNSTSW handling.
(ix86_atomic_assign_expand_fenv): Update for
__builtin_ia32_fnstsw changes.
* config/i386/i386.md (x86_fnstsw_1): Set length unconditionally to 2.
(fnstsw): Change operand 0 to nonimmediate operand.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32} and
committed to mainline SVN.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 7633 bytes --]
Index: config/alpha/alpha.c
===================================================================
--- config/alpha/alpha.c (revision 212477)
+++ config/alpha/alpha.c (working copy)
@@ -9888,6 +9888,72 @@ alpha_canonicalize_comparison (int *code, rtx *op0
*op1 = GEN_INT (255);
}
}
+
+/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
+
+static void
+alpha_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+ const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17);
+
+ tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
+ tree new_fenv_var, reload_fenv, restore_fnenv;
+ tree update_call, atomic_feraiseexcept, hold_fnclex;
+
+ /* Assume OSF/1 compatible interfaces. */
+ if (!TARGET_ABI_OSF)
+ return;
+
+ /* Generate the equivalent of :
+ unsigned long fenv_var;
+ fenv_var = __ieee_get_fp_control ();
+
+ unsigned long masked_fenv;
+ masked_fenv = fenv_var & mask;
+
+ __ieee_set_fp_control (masked_fenv); */
+
+ fenv_var = create_tmp_var (long_unsigned_type_node, NULL);
+ get_fpscr
+ = build_fn_decl ("__ieee_get_fp_control",
+ build_function_type_list (long_unsigned_type_node, NULL));
+ set_fpscr
+ = build_fn_decl ("__ieee_set_fp_control",
+ build_function_type_list (void_type_node, NULL));
+ mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK);
+ ld_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node,
+ fenv_var, build_call_expr (get_fpscr, 0));
+ masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask);
+ hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
+ *hold = build2 (COMPOUND_EXPR, void_type_node,
+ build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
+ hold_fnclex);
+
+ /* Store the value of masked_fenv to clear the exceptions:
+ __ieee_set_fp_control (masked_fenv); */
+
+ *clear = build_call_expr (set_fpscr, 1, masked_fenv);
+
+ /* Generate the equivalent of :
+ unsigned long new_fenv_var;
+ new_fenv_var = __ieee_get_fp_control ();
+
+ __ieee_set_fp_control (fenv_var);
+
+ __atomic_feraiseexcept (new_fenv_var); */
+
+ new_fenv_var = create_tmp_var (long_unsigned_type_node, NULL);
+ reload_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node, new_fenv_var,
+ build_call_expr (get_fpscr, 0));
+ restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
+ atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+ update_call
+ = build_call_expr (atomic_feraiseexcept, 1,
+ fold_convert (integer_type_node, new_fenv_var));
+ *update = build2 (COMPOUND_EXPR, void_type_node,
+ build2 (COMPOUND_EXPR, void_type_node,
+ reload_fenv, restore_fnenv), update_call);
+}
\f
/* Initialize the GCC target structure. */
#if TARGET_ABI_OPEN_VMS
@@ -10060,6 +10126,9 @@ alpha_canonicalize_comparison (int *code, rtx *op0
#undef TARGET_CANONICALIZE_COMPARISON
#define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV alpha_atomic_assign_expand_fenv
+
struct gcc_target targetm = TARGET_INITIALIZER;
\f
Index: config/i386/i386-builtin-types.def
===================================================================
--- config/i386/i386-builtin-types.def (revision 212477)
+++ config/i386/i386-builtin-types.def (working copy)
@@ -162,6 +162,7 @@ DEF_POINTER_TYPE (PCV16SI, V16SI, CONST)
DEF_FUNCTION_TYPE (FLOAT128)
DEF_FUNCTION_TYPE (UINT64)
DEF_FUNCTION_TYPE (UNSIGNED)
+DEF_FUNCTION_TYPE (USHORT)
DEF_FUNCTION_TYPE (INT)
DEF_FUNCTION_TYPE (VOID)
DEF_FUNCTION_TYPE (PVOID)
@@ -256,7 +257,6 @@ DEF_FUNCTION_TYPE (VOID, PCVOID)
DEF_FUNCTION_TYPE (VOID, PVOID)
DEF_FUNCTION_TYPE (VOID, UINT64)
DEF_FUNCTION_TYPE (VOID, UNSIGNED)
-DEF_FUNCTION_TYPE (VOID, PUSHORT)
DEF_FUNCTION_TYPE (INT, PUSHORT)
DEF_FUNCTION_TYPE (INT, PUNSIGNED)
DEF_FUNCTION_TYPE (INT, PULONGLONG)
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 212477)
+++ config/i386/i386.c (working copy)
@@ -28994,7 +28994,7 @@ static const struct builtin_description bdesc_spec
/* 80387 (for use internally for atomic compound assignment). */
{ 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
{ 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
- { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
+ { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
{ 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
/* MMX */
@@ -34598,6 +34598,7 @@ ix86_expand_special_args_builtin (const struct bui
break;
case INT_FTYPE_VOID:
+ case USHORT_FTYPE_VOID:
case UINT64_FTYPE_VOID:
case UNSIGNED_FTYPE_VOID:
nargs = 0;
@@ -35283,7 +35284,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx sub
case IX86_BUILTIN_FXRSTOR64:
case IX86_BUILTIN_FNSTENV:
case IX86_BUILTIN_FLDENV:
- case IX86_BUILTIN_FNSTSW:
mode0 = BLKmode;
switch (fcode)
{
@@ -35305,10 +35305,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx sub
case IX86_BUILTIN_FLDENV:
icode = CODE_FOR_fldenv;
break;
- case IX86_BUILTIN_FNSTSW:
- icode = CODE_FOR_fnstsw;
- mode0 = HImode;
- break;
default:
gcc_unreachable ();
}
@@ -46894,15 +46890,14 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *
hold_fnclex);
*clear = build_call_expr (fnclex, 0);
tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
- mark_addressable (sw_var);
- tree su_ptr = build_pointer_type (short_unsigned_type_node);
- tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
- tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
+ tree fnstsw_call = build_call_expr (fnstsw, 0);
+ tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
+ sw_var, fnstsw_call);
tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
exceptions_var, exceptions_x87);
*update = build2 (COMPOUND_EXPR, integer_type_node,
- fnstsw_call, update_mod);
+ sw_mod, update_mod);
tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
*update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
}
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 212477)
+++ config/i386/i386.md (working copy)
@@ -1532,8 +1532,7 @@
(unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
"TARGET_80387"
"fnstsw\t%0"
- [(set (attr "length")
- (symbol_ref "ix86_attr_length_address_default (insn) + 2"))
+ [(set_attr "length" "2")
(set_attr "mode" "SI")
(set_attr "unit" "i387")])
@@ -18114,12 +18113,12 @@
(symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
(define_insn "fnstsw"
- [(set (match_operand:HI 0 "memory_operand" "=m")
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=a,m")
(unspec_volatile:HI [(const_int 0)] UNSPECV_FNSTSW))]
"TARGET_80387"
"fnstsw\t%0"
- [(set_attr "type" "other")
- (set_attr "memory" "store")
+ [(set_attr "type" "other,other")
+ (set_attr "memory" "none,store")
(set (attr "length")
(symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
next reply other threads:[~2014-07-12 9:07 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-07-12 9:07 Uros Bizjak [this message]
2014-07-14 8:05 ` Kyrill Tkachov
2014-07-14 8:11 ` Uros Bizjak
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAFULd4aEXGU+3ZRTf9Hwh5uu6vX071MZnAw_Q9A1zAwiYL-R+g@mail.gmail.com \
--to=ubizjak@gmail.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).