public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [RFC][AARCH64] TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook
@ 2014-04-26 12:00 Kugan
  2014-04-28 10:03 ` Marcus Shawcroft
  2014-04-28 11:06 ` Ramana Radhakrishnan
  0 siblings, 2 replies; 12+ messages in thread
From: Kugan @ 2014-04-26 12:00 UTC (permalink / raw)
  To: gcc-patches; +Cc: Marcus Shawcroft, Richard Earnshaw

[-- Attachment #1: Type: text/plain, Size: 997 bytes --]

Attached patch implements TARGET_ATOMIC_ASSIGN_EXPAND_FENV for AARCH64.
With this, atomic test-case gcc.dg/atomic/c11-atomic-exec-5.c now PASS.

This implementation is based on SPARC and i386 implementations.

Regression tested on qemu-aarch64 for aarch64-none-linux-gnu with no new
regression. Is this OK for trunk?

Thanks,
Kugan

gcc/
+2014-04-27  Kugan Vivekanandarajah  <kuganv@linaro.org>
+
+	* config/aarch64/aarch64.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New
+	define.
+	* config/aarch64/aarch64-builtins.c (arm_builtins) : Add
+	AARCH64_BUILTIN_LDFPSCR and AARCH64_BUILTIN_STFPSCR.
+	(aarch64_init_builtins) : Initialize builtins
+	__builtins_aarch64_stfpscr and __builtins_aarch64_ldfpscr.
+	(aarch64_expand_builtin) : Expand builtins __builtins_aarch64_stfpscr
+	and __builtins_aarch64_ldfpscr.
+	(aarch64_atomic_assign_expand_fenv): New function.
+	* config/aarch64/aarch64.md (stfpscr): New pattern.
+	(ldfpscr) : Likewise.
+	(unspecv): Add UNSPECV_LDFPSCR and UNSPECV_STFPSCR.
+






[-- Attachment #2: aarch64.txt --]
[-- Type: text/plain, Size: 6485 bytes --]

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 55cfe0a..70d3efa 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -371,6 +371,10 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
 enum aarch64_builtins
 {
   AARCH64_BUILTIN_MIN,
+
+  AARCH64_BUILTIN_LDFPSCR,
+  AARCH64_BUILTIN_STFPSCR,
+
   AARCH64_SIMD_BUILTIN_BASE,
 #include "aarch64-simd-builtins.def"
   AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE
@@ -752,6 +756,18 @@ aarch64_init_simd_builtins (void)
 void
 aarch64_init_builtins (void)
 {
+  tree ftype_stfpscr
+    = build_function_type_list (void_type_node, unsigned_type_node, NULL);
+  tree ftype_ldfpscr
+    = build_function_type_list (unsigned_type_node, NULL);
+
+  aarch64_builtin_decls[AARCH64_BUILTIN_LDFPSCR]
+    = add_builtin_function ("__builtin_aarch64_ldfscr", ftype_ldfpscr,
+			    AARCH64_BUILTIN_LDFPSCR, BUILT_IN_MD, NULL, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_STFPSCR]
+    = add_builtin_function ("__builtin_aarch64_stfscr", ftype_stfpscr,
+			    AARCH64_BUILTIN_STFPSCR, BUILT_IN_MD, NULL, NULL_TREE);
+
   if (TARGET_SIMD)
     aarch64_init_simd_builtins ();
 }
@@ -964,6 +980,31 @@ aarch64_expand_builtin (tree exp,
 {
   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
   int fcode = DECL_FUNCTION_CODE (fndecl);
+  int icode;
+  rtx pat, op0;
+  tree arg0;
+
+  switch (fcode)
+    {
+    case AARCH64_BUILTIN_LDFPSCR:
+    case AARCH64_BUILTIN_STFPSCR:
+      if (fcode == AARCH64_BUILTIN_LDFPSCR)
+	{
+	  icode = CODE_FOR_ldfpscr;
+	  target = gen_reg_rtx (SImode);
+	  pat = GEN_FCN (icode) (target);
+	}
+      else
+	{
+	  target = NULL_RTX;
+	  icode = CODE_FOR_stfpscr;
+	  arg0 = CALL_EXPR_ARG (exp, 0);
+	  op0 = expand_normal (arg0);
+	  pat = GEN_FCN (icode) (op0);
+	}
+      emit_insn (pat);
+      return target;
+    }
 
   if (fcode >= AARCH64_SIMD_BUILTIN_BASE)
     return aarch64_simd_expand_builtin (fcode, exp, target);
@@ -1196,6 +1237,70 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   return changed;
 }
 
+void
+aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+  const unsigned FE_INVALID = 1;
+  const unsigned FE_DIVBYZERO = 2;
+  const unsigned FE_OVERFLOW = 4;
+  const unsigned FE_UNDERFLOW = 8;
+  const unsigned FE_INEXACT = 16;
+  const unsigned HOST_WIDE_INT FE_ALL_EXCEPT = (FE_INVALID | FE_DIVBYZERO
+						| FE_OVERFLOW | FE_UNDERFLOW
+					| FE_INEXACT);
+  const unsigned HOST_WIDE_INT FE_EXCEPT_SHIFT = 8;
+
+  /* Genareate the equivalence of :
+       unsigned int fenv_var;
+       fenv_var = __builtin_aarch64_ldfpscr ();
+
+       unsigned int masked_fenv;
+       tmp1_var = fenv_var & ~ mask;
+
+       __builtin_aarch64_fpscr (&tmp1_var);  */
+
+  tree fenv_var = create_tmp_var (unsigned_type_node, NULL);
+  tree ldfpscr = aarch64_builtin_decls[AARCH64_BUILTIN_LDFPSCR];
+  tree stfpscr = aarch64_builtin_decls[AARCH64_BUILTIN_STFPSCR];
+  tree mask = build_int_cst (unsigned_type_node,
+			     ~((FE_ALL_EXCEPT << FE_EXCEPT_SHIFT)
+			       | FE_ALL_EXCEPT));
+  tree ld_fenv_stmt = build2 (MODIFY_EXPR, unsigned_type_node,
+			      fenv_var, build_call_expr (ldfpscr, 0));
+  tree masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
+  tree hold_fnclex = build_call_expr (stfpscr, 1, masked_fenv);
+  *hold = build2 (COMPOUND_EXPR, void_type_node,
+		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv,
+			  ld_fenv_stmt), hold_fnclex);
+
+  /* Store the value of masked_fenv to clear the exceptions:
+     __builtin_aarch64_stfpscr (masked_fenv);  */
+
+  *clear = build_call_expr (stfpscr, 1, masked_fenv);
+
+  /* Generate the equivalent of :
+       unsigned int tmp2_var;
+       tmp_var = __builtin_aarch64_fpscr ();
+
+       __builtin_aarch64_stfpscr (fenv_var);
+
+       __atomic_feraiseexcept (tmp_var);  */
+
+  tree tmp_var = create_tmp_var (unsigned_type_node, NULL);
+  tree reload_fenv_stmt = build2 (MODIFY_EXPR, unsigned_type_node,
+				  tmp_var, build_call_expr (ldfpscr, 0));
+  tree restore_fnenv = build_call_expr (stfpscr, 1, fenv_var);
+  tree atomic_feraiseexcept
+    = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+  tree update_call
+    = build_call_expr (atomic_feraiseexcept, 1,
+		       fold_convert (integer_type_node, tmp_var));
+  *update = build2 (COMPOUND_EXPR, void_type_node,
+		    build2 (COMPOUND_EXPR, void_type_node,
+			    reload_fenv_stmt, restore_fnenv), update_call);
+}
+
+
 #undef AARCH64_CHECK_BUILTIN_MODE
 #undef AARCH64_FIND_FRINT_VARIANT
 #undef BUILTIN_DX
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a3147ee..0f5ea48 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -332,6 +332,8 @@ static const char * const aarch64_condition_codes[] =
   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 };
 
+void aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
+
 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 unsigned
 aarch64_dbx_register_number (unsigned regno)
@@ -8488,6 +8490,10 @@ aarch64_cannot_change_mode_class (enum machine_mode from,
 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
   aarch64_autovectorize_vector_sizes
 
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
+  aarch64_atomic_assign_expand_fenv
+
 /* Section anchor support.  */
 
 #undef TARGET_MIN_ANCHOR_OFFSET
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c86a29d..e916ff5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -107,6 +107,8 @@
 
 (define_c_enum "unspecv" [
     UNSPECV_EH_RETURN		; Represent EH_RETURN
+    UNSPECV_LDFPSCR		; load floating point status and control register.
+    UNSPECV_STFPSCR		; store floating point status and control register.
   ]
 )
 
@@ -3635,6 +3637,21 @@
   DONE;
 })
 
+;; Write Floating-point Status Register.
+(define_insn "stfpscr"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_STFPSCR)]
+  ""
+  "msr\\tfpsr, %0"
+  [(set_attr "type" "mrs")])
+
+;; Read Floating-point Status Register.
+(define_insn "ldfpscr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(const_int 0)] UNSPECV_LDFPSCR))]
+  ""
+  "mrs\\t%0, fpsr"
+  [(set_attr "type" "mrs")])
+
 ;; AdvSIMD Stuff
 (include "aarch64-simd.md")
 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][AARCH64] TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook
  2014-04-26 12:00 [RFC][AARCH64] TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook Kugan
@ 2014-04-28 10:03 ` Marcus Shawcroft
  2014-04-28 11:06 ` Ramana Radhakrishnan
  1 sibling, 0 replies; 12+ messages in thread
From: Marcus Shawcroft @ 2014-04-28 10:03 UTC (permalink / raw)
  To: Kugan; +Cc: gcc-patches, Marcus Shawcroft, Richard Earnshaw

Hi Kugan, Thanks for this, couple of comments inline:

On 26 April 2014 11:57, Kugan <kugan.vivekanandarajah@linaro.org> wrote:

> gcc/
> +2014-04-27  Kugan Vivekanandarajah  <kuganv@linaro.org>
> +
> +       * config/aarch64/aarch64.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New
> +       define.
> +       * config/aarch64/aarch64-builtins.c (arm_builtins) : Add

aarch64_builtins ?

> +       AARCH64_BUILTIN_LDFPSCR and AARCH64_BUILTIN_STFPSCR.

AArch32 has the traditional combined FPSCR, but AArch64 splits this
register into FPSR and FPCR therefore I think AARCH64_BUILTIN_GET_FPCR
and AARCH64_BUILTIN_SET_FPCR are more appropriate names.  Likewise
subsequent references to FPSCR in this patch should change to FPCR.

> +       (aarch64_init_builtins) : Initialize builtins
> +       __builtins_aarch64_stfpscr and __builtins_aarch64_ldfpscr.
> +       (aarch64_expand_builtin) : Expand builtins __builtins_aarch64_stfpscr
> +       and __builtins_aarch64_ldfpscr.
> +       (aarch64_atomic_assign_expand_fenv): New function.
> +       * config/aarch64/aarch64.md (stfpscr): New pattern.
> +       (ldfpscr) : Likewise.
> +       (unspecv): Add UNSPECV_LDFPSCR and UNSPECV_STFPSCR.
> +

+  aarch64_builtin_decls[AARCH64_BUILTIN_LDFPSCR]
+    = add_builtin_function ("__builtin_aarch64_ldfscr", ftype_ldfpscr,

I'd prefer __builtin_aarch64_get_fpcr and __builtin_aarch64_set_fpcr.

We should document them in doc/extend.texi

+  const unsigned HOST_WIDE_INT FE_ALL_EXCEPT = (FE_INVALID | FE_DIVBYZERO
+ | FE_OVERFLOW | FE_UNDERFLOW
+ | FE_INEXACT);

Indentation is funny here..

+  /* Genareate the equivalence of :

Spelling.

+  tree fenv_var = create_tmp_var (unsigned_type_node, NULL);
+  tree ldfpscr = aarch64_builtin_decls[AARCH64_BUILTIN_LDFPSCR];
+  tree stfpscr = aarch64_builtin_decls[AARCH64_BUILTIN_STFPSCR];

Move the declarations to the top of the function please.

+void aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
+

Drop the argument names and relocate to aarch64-protos.h please.

+    UNSPECV_LDFPSCR ; load floating point status and control register.

It isn't a status register, how about:

UNSPECV_GET_FPCR ; Represent fetch of FPCR content.

Cheers
/Marcus

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][AARCH64] TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook
  2014-04-26 12:00 [RFC][AARCH64] TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook Kugan
  2014-04-28 10:03 ` Marcus Shawcroft
@ 2014-04-28 11:06 ` Ramana Radhakrishnan
  2014-04-29  3:03   ` Kugan
  1 sibling, 1 reply; 12+ messages in thread
From: Ramana Radhakrishnan @ 2014-04-28 11:06 UTC (permalink / raw)
  To: Kugan; +Cc: gcc-patches, Marcus Shawcroft, Richard Earnshaw

On 04/26/14 11:57, Kugan wrote:
> Attached patch implements TARGET_ATOMIC_ASSIGN_EXPAND_FENV for AARCH64.
> With this, atomic test-case gcc.dg/atomic/c11-atomic-exec-5.c now PASS.
>
> This implementation is based on SPARC and i386 implementations.
>
> Regression tested on qemu-aarch64 for aarch64-none-linux-gnu with no new
> regression. Is this OK for trunk?

Again like A32 please test on hardware to make sure this behaves 
correctly with c11-atomic-exec-5.c .

If you don't have access to hardware, let us know : we'll take it for a 
spin once you update the patch according to Marcus's comments.

regards
Ramana

>
> Thanks,
> Kugan
>
> gcc/
> +2014-04-27  Kugan Vivekanandarajah  <kuganv@linaro.org>
> +
> +	* config/aarch64/aarch64.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New
> +	define.
> +	* config/aarch64/aarch64-builtins.c (arm_builtins) : Add
> +	AARCH64_BUILTIN_LDFPSCR and AARCH64_BUILTIN_STFPSCR.
> +	(aarch64_init_builtins) : Initialize builtins
> +	__builtins_aarch64_stfpscr and __builtins_aarch64_ldfpscr.
> +	(aarch64_expand_builtin) : Expand builtins __builtins_aarch64_stfpscr
> +	and __builtins_aarch64_ldfpscr.
> +	(aarch64_atomic_assign_expand_fenv): New function.
> +	* config/aarch64/aarch64.md (stfpscr): New pattern.
> +	(ldfpscr) : Likewise.
> +	(unspecv): Add UNSPECV_LDFPSCR and UNSPECV_STFPSCR.
> +
>
>
>
>


-- 
Ramana Radhakrishnan
Principal Engineer
ARM Ltd.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][AARCH64] TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook
  2014-04-28 11:06 ` Ramana Radhakrishnan
@ 2014-04-29  3:03   ` Kugan
  2014-05-02  9:08     ` Kugan
  2014-05-02 10:06     ` Marcus Shawcroft
  0 siblings, 2 replies; 12+ messages in thread
From: Kugan @ 2014-04-29  3:03 UTC (permalink / raw)
  To: Ramana Radhakrishnan; +Cc: gcc-patches, Marcus Shawcroft, Richard Earnshaw

[-- Attachment #1: Type: text/plain, Size: 2723 bytes --]


On 28/04/14 21:01, Ramana Radhakrishnan wrote:
> On 04/26/14 11:57, Kugan wrote:
>> Attached patch implements TARGET_ATOMIC_ASSIGN_EXPAND_FENV for AARCH64.
>> With this, atomic test-case gcc.dg/atomic/c11-atomic-exec-5.c now PASS.
>>
>> This implementation is based on SPARC and i386 implementations.
>>
>> Regression tested on qemu-aarch64 for aarch64-none-linux-gnu with no new
>> regression. Is this OK for trunk?
> 
> Again like A32 please test on hardware to make sure this behaves
> correctly with c11-atomic-exec-5.c .
> 
> If you don't have access to hardware, let us know : we'll take it for a
> spin once you update the patch according to Marcus's comments.
> 

Thanks for the review. I have updated the patch. I also have updated
hold, clear and update to be exactly as in feholdexcpt.c, fclrexcpt.c
and feupdateenv.c of glibc/ports/sysdeps/aarch64/fpu.

I have limited real hardware access and just did a bootstrap and tested
c11-atomic-exec-5.c alone to make sure that it PASS. I have also
regression tested again on qemu-aarch64 for aarch64-none-linux-gnu with
no new regressions. I will appreciate if you could do the regression
testing on real hw.

As for the ARM version of the patch, I did test the previous version for
c11-atomic-exec-5.c and did verified it on chromebook before I posted
the match . I have now updated the patch based on your review and the
full bootstrap and regression testing is now under way. I will post the
patch once the results are available.

Thanks,
Kugan

+2014-04-29  Kugan Vivekanandarajah  <kuganv@linaro.org>
+
+	* config/aarch64/aarch64.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New
+	define.
+	* config/aarch64/aarch64-protos.h (aarch64_atomic_assign_expand_fenv):
+	New function declaration.
+	* config/aarch64/aarch64-builtins.c (aarch64_builtins) : Add
+	AARCH64_BUILTIN_GET_FPCR, AARCH64_BUILTIN_SET_FPCR.
+	AARCH64_BUILTIN_GET_FPSR and AARCH64_BUILTIN_SET_FPSR.
+	(aarch64_init_builtins) : Initialize builtins
+	__builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
+	__builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.
+	(aarch64_expand_builtin) : Expand builtins __builtins_aarch64_set_fpcr
+	__builtins_aarch64_get_fpcr, __builtins_aarch64_get_fpsr,
+	and __builtins_aarch64_set_fpsr.
+	(aarch64_atomic_assign_expand_fenv): New function.
+	* config/aarch64/aarch64.md (set_fpcr): New pattern.
+	(get_fpcr) : Likewise.
+	(set_fpsr) : Likewise.
+	(get_fpsr) : Likewise.
+	(unspecv): Add UNSPECV_GET_FPCR and UNSPECV_SET_FPCR, UNSPECV_GET_FPSR
+	 and UNSPECV_SET_FPSR.
+	* doc/extend.texi (AARCH64 Built-in Functions) : Document
+	__builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
+	__builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.



[-- Attachment #2: aarch64.txt --]
[-- Type: text/plain, Size: 10318 bytes --]

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 55cfe0a..5cdc978 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -371,6 +371,12 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
 enum aarch64_builtins
 {
   AARCH64_BUILTIN_MIN,
+
+  AARCH64_BUILTIN_GET_FPCR,
+  AARCH64_BUILTIN_SET_FPCR,
+  AARCH64_BUILTIN_GET_FPSR,
+  AARCH64_BUILTIN_SET_FPSR,
+
   AARCH64_SIMD_BUILTIN_BASE,
 #include "aarch64-simd-builtins.def"
   AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE
@@ -752,6 +758,24 @@ aarch64_init_simd_builtins (void)
 void
 aarch64_init_builtins (void)
 {
+  tree ftype_set_fpr
+    = build_function_type_list (void_type_node, unsigned_type_node, NULL);
+  tree ftype_get_fpr
+    = build_function_type_list (unsigned_type_node, NULL);
+
+  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
+    = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr,
+			    AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
+    = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr,
+			    AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
+    = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr,
+			    AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
+    = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr,
+			    AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
+
   if (TARGET_SIMD)
     aarch64_init_simd_builtins ();
 }
@@ -964,6 +988,36 @@ aarch64_expand_builtin (tree exp,
 {
   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
   int fcode = DECL_FUNCTION_CODE (fndecl);
+  int icode;
+  rtx pat, op0;
+  tree arg0;
+
+  switch (fcode)
+    {
+    case AARCH64_BUILTIN_GET_FPCR:
+    case AARCH64_BUILTIN_SET_FPCR:
+    case AARCH64_BUILTIN_GET_FPSR:
+    case AARCH64_BUILTIN_SET_FPSR:
+      if ((fcode == AARCH64_BUILTIN_GET_FPCR)
+	  || (fcode == AARCH64_BUILTIN_GET_FPSR))
+	{
+	  icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ?
+	    CODE_FOR_get_fpsr : CODE_FOR_get_fpcr;
+	  target = gen_reg_rtx (SImode);
+	  pat = GEN_FCN (icode) (target);
+	}
+      else
+	{
+	  target = NULL_RTX;
+	  icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ?
+	    CODE_FOR_set_fpsr : CODE_FOR_set_fpcr;
+	  arg0 = CALL_EXPR_ARG (exp, 0);
+	  op0 = expand_normal (arg0);
+	  pat = GEN_FCN (icode) (op0);
+	}
+      emit_insn (pat);
+      return target;
+    }
 
   if (fcode >= AARCH64_SIMD_BUILTIN_BASE)
     return aarch64_simd_expand_builtin (fcode, exp, target);
@@ -1196,6 +1250,103 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   return changed;
 }
 
+void
+aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+  const unsigned FE_INVALID = 1;
+  const unsigned FE_DIVBYZERO = 2;
+  const unsigned FE_OVERFLOW = 4;
+  const unsigned FE_UNDERFLOW = 8;
+  const unsigned FE_INEXACT = 16;
+  const unsigned HOST_WIDE_INT FE_ALL_EXCEPT = (FE_INVALID | FE_DIVBYZERO
+						| FE_OVERFLOW | FE_UNDERFLOW
+						| FE_INEXACT);
+  const unsigned HOST_WIDE_INT FE_EXCEPT_SHIFT = 8;
+  tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr;
+  tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr;
+  tree hold_fnclex_sr, tmp_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr;
+  tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv;
+
+  /* Generate the equivalence of :
+       unsigned int fenv_cr;
+       fenv_cr = __builtin_aarch64_get_fpcr ();
+
+       unsigned int fenv_sr;
+       fenv_sr = __builtin_aarch64_get_fpsr ();
+
+       Now set all exceptions to non-stop
+       unsigned int mask_cr = ~(FE_ALL_EXCEPT << FE_EXCEPT_SHIFT);
+       unsigned int masked_cr;
+       masked_cr = fenv_cr & mask_cr;
+
+       And clear all exception flags
+       unsigned int maske_sr = ~FE_ALL_EXCEPT;
+       unsigned int masked_cr;
+       masked_sr = fenv_sr & mask_sr;
+
+       __builtin_aarch64_set_cr (masked_cr);
+       __builtin_aarch64_set_sr (masked_sr);  */
+
+  fenv_cr = create_tmp_var (unsigned_type_node, NULL);
+  fenv_sr = create_tmp_var (unsigned_type_node, NULL);
+
+  get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR];
+  set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR];
+  get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR];
+  set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR];
+
+  mask_cr = build_int_cst (unsigned_type_node,
+			   ~(FE_ALL_EXCEPT << FE_EXCEPT_SHIFT));
+  mask_sr = build_int_cst (unsigned_type_node,
+			   ~(FE_ALL_EXCEPT));
+
+  ld_fenv_cr = build2 (MODIFY_EXPR, unsigned_type_node,
+		    fenv_cr, build_call_expr (get_fpcr, 0));
+  ld_fenv_sr = build2 (MODIFY_EXPR, unsigned_type_node,
+		    fenv_sr, build_call_expr (get_fpsr, 0));
+
+  masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
+  masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);
+
+  hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr);
+  hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr);
+
+  hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr,
+			hold_fnclex_sr);
+  masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr,
+			masked_fenv_sr);
+  ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr);
+
+  *hold = build2 (COMPOUND_EXPR, void_type_node,
+		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
+		  hold_fnclex);
+
+  /* Store the value of masked_fenv to clear the exceptions:
+     __builtin_aarch64_set_fpcr (masked_sr);  */
+
+  *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr);
+
+  /* Generate the equivalent of :
+       unsigned int tmp2_var;
+       tmp_var = __builtin_aarch64_get_fpsr ();
+
+       __builtin_aarch64_set_fpsr (fenv_sr);
+
+       __atomic_feraiseexcept (tmp_var);  */
+
+  tmp_var = create_tmp_var (unsigned_type_node, NULL);
+  reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
+			tmp_var, build_call_expr (get_fpsr, 0));
+  restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
+  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+  update_call = build_call_expr (atomic_feraiseexcept, 1,
+				 fold_convert (integer_type_node, tmp_var));
+  *update = build2 (COMPOUND_EXPR, void_type_node,
+		    build2 (COMPOUND_EXPR, void_type_node,
+			    reload_fenv, restore_fnenv), update_call);
+}
+
+
 #undef AARCH64_CHECK_BUILTIN_MODE
 #undef AARCH64_FIND_FRINT_VARIANT
 #undef BUILTIN_DX
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 5542f02..f4f3f61 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -289,4 +289,5 @@ extern void aarch64_split_combinev16qi (rtx operands[3]);
 extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
 extern bool
 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
+void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *);
 #endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a3147ee..fbbdc23 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -8488,6 +8488,10 @@ aarch64_cannot_change_mode_class (enum machine_mode from,
 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
   aarch64_autovectorize_vector_sizes
 
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
+  aarch64_atomic_assign_expand_fenv
+
 /* Section anchor support.  */
 
 #undef TARGET_MIN_ANCHOR_OFFSET
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c86a29d..24f235f 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -107,6 +107,10 @@
 
 (define_c_enum "unspecv" [
     UNSPECV_EH_RETURN		; Represent EH_RETURN
+    UNSPECV_GET_FPCR		; Represent fetch of FPCR content.
+    UNSPECV_SET_FPCR		; Represent assign of FPCR content.
+    UNSPECV_GET_FPSR		; Represent fetch of FPSR content.
+    UNSPECV_SET_FPSR		; Represent assign of FPSR content.
   ]
 )
 
@@ -3635,6 +3639,37 @@
   DONE;
 })
 
+;; Write Floating-point Control Register.
+(define_insn "set_fpcr"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPCR)]
+  ""
+  "msr\\tfpcr, %0"
+  [(set_attr "type" "mrs")])
+
+;; Read Floating-point Control Register.
+(define_insn "get_fpcr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPCR))]
+  ""
+  "mrs\\t%0, fpcr"
+  [(set_attr "type" "mrs")])
+
+;; Write Floating-point Status Register.
+(define_insn "set_fpsr"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)]
+  ""
+  "msr\\tfpsr, %0"
+  [(set_attr "type" "mrs")])
+
+;; Read Floating-point Status Register.
+(define_insn "get_fpsr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))]
+  ""
+  "mrs\\t%0, fpsr"
+  [(set_attr "type" "mrs")])
+
+
 ;; AdvSIMD Stuff
 (include "aarch64-simd.md")
 
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 347a94a..8bd13f3 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -9107,6 +9107,7 @@ to those machines.  Generally these generate calls to specific machine
 instructions, but allow the compiler to schedule those calls.
 
 @menu
+* AARCH64 Built-in Functions::
 * Alpha Built-in Functions::
 * Altera Nios II Built-in Functions::
 * ARC Built-in Functions::
@@ -9139,6 +9140,18 @@ instructions, but allow the compiler to schedule those calls.
 * TILEPro Built-in Functions::
 @end menu
 
+@node AARCH64 Built-in Functions
+@subsection AARCH64 Built-in Functions
+
+These built-in functions are available for the AARCH64 family of
+processors.
+@smallexample
+unsigned int __builtin_aarch64_get_fpcr ()
+void __builtin_aarch64_set_fpcr (unsigned int)
+unsigned int __builtin_aarch64_get_fpsr ()
+void __builtin_aarch64_set_fpsr (unsigned int)
+@end smallexample
+
 @node Alpha Built-in Functions
 @subsection Alpha Built-in Functions
 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][AARCH64] TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook
  2014-04-29  3:03   ` Kugan
@ 2014-05-02  9:08     ` Kugan
  2014-05-02 11:58       ` Yufeng Zhang
  2014-05-02 10:06     ` Marcus Shawcroft
  1 sibling, 1 reply; 12+ messages in thread
From: Kugan @ 2014-05-02  9:08 UTC (permalink / raw)
  To: Ramana Radhakrishnan; +Cc: gcc-patches, Marcus Shawcroft, Richard Earnshaw

[-- Attachment #1: Type: text/plain, Size: 1424 bytes --]

> 
> +2014-04-29  Kugan Vivekanandarajah  <kuganv@linaro.org>
> +
> +	* config/aarch64/aarch64.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New
> +	define.
> +	* config/aarch64/aarch64-protos.h (aarch64_atomic_assign_expand_fenv):
> +	New function declaration.
> +	* config/aarch64/aarch64-builtins.c (aarch64_builtins) : Add
> +	AARCH64_BUILTIN_GET_FPCR, AARCH64_BUILTIN_SET_FPCR.
> +	AARCH64_BUILTIN_GET_FPSR and AARCH64_BUILTIN_SET_FPSR.
> +	(aarch64_init_builtins) : Initialize builtins
> +	__builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
> +	__builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.
> +	(aarch64_expand_builtin) : Expand builtins __builtins_aarch64_set_fpcr
> +	__builtins_aarch64_get_fpcr, __builtins_aarch64_get_fpsr,
> +	and __builtins_aarch64_set_fpsr.
> +	(aarch64_atomic_assign_expand_fenv): New function.
> +	* config/aarch64/aarch64.md (set_fpcr): New pattern.
> +	(get_fpcr) : Likewise.
> +	(set_fpsr) : Likewise.
> +	(get_fpsr) : Likewise.
> +	(unspecv): Add UNSPECV_GET_FPCR and UNSPECV_SET_FPCR, UNSPECV_GET_FPSR
> +	 and UNSPECV_SET_FPSR.
> +	* doc/extend.texi (AARCH64 Built-in Functions) : Document
> +	__builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
> +	__builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.
> 

Updated is based on the review at
http://gcc.gnu.org/ml/gcc-patches/2014-05/msg00041.html. FE_* values are
now changed to AARCH64_FE-*.

Thanks,
Kugan

[-- Attachment #2: aarch64_v3.txt --]
[-- Type: text/plain, Size: 10517 bytes --]

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 55cfe0a..40d53b1 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -371,6 +371,12 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
 enum aarch64_builtins
 {
   AARCH64_BUILTIN_MIN,
+
+  AARCH64_BUILTIN_GET_FPCR,
+  AARCH64_BUILTIN_SET_FPCR,
+  AARCH64_BUILTIN_GET_FPSR,
+  AARCH64_BUILTIN_SET_FPSR,
+
   AARCH64_SIMD_BUILTIN_BASE,
 #include "aarch64-simd-builtins.def"
   AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE
@@ -752,6 +758,24 @@ aarch64_init_simd_builtins (void)
 void
 aarch64_init_builtins (void)
 {
+  tree ftype_set_fpr
+    = build_function_type_list (void_type_node, unsigned_type_node, NULL);
+  tree ftype_get_fpr
+    = build_function_type_list (unsigned_type_node, NULL);
+
+  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
+    = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr,
+			    AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
+    = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr,
+			    AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
+    = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr,
+			    AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
+    = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr,
+			    AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
+
   if (TARGET_SIMD)
     aarch64_init_simd_builtins ();
 }
@@ -964,6 +988,36 @@ aarch64_expand_builtin (tree exp,
 {
   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
   int fcode = DECL_FUNCTION_CODE (fndecl);
+  int icode;
+  rtx pat, op0;
+  tree arg0;
+
+  switch (fcode)
+    {
+    case AARCH64_BUILTIN_GET_FPCR:
+    case AARCH64_BUILTIN_SET_FPCR:
+    case AARCH64_BUILTIN_GET_FPSR:
+    case AARCH64_BUILTIN_SET_FPSR:
+      if ((fcode == AARCH64_BUILTIN_GET_FPCR)
+	  || (fcode == AARCH64_BUILTIN_GET_FPSR))
+	{
+	  icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ?
+	    CODE_FOR_get_fpsr : CODE_FOR_get_fpcr;
+	  target = gen_reg_rtx (SImode);
+	  pat = GEN_FCN (icode) (target);
+	}
+      else
+	{
+	  target = NULL_RTX;
+	  icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ?
+	    CODE_FOR_set_fpsr : CODE_FOR_set_fpcr;
+	  arg0 = CALL_EXPR_ARG (exp, 0);
+	  op0 = expand_normal (arg0);
+	  pat = GEN_FCN (icode) (op0);
+	}
+      emit_insn (pat);
+      return target;
+    }
 
   if (fcode >= AARCH64_SIMD_BUILTIN_BASE)
     return aarch64_simd_expand_builtin (fcode, exp, target);
@@ -1196,6 +1250,106 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   return changed;
 }
 
+void
+aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+  const unsigned AARCH64_FE_INVALID = 1;
+  const unsigned AARCH64_FE_DIVBYZERO = 2;
+  const unsigned AARCH64_FE_OVERFLOW = 4;
+  const unsigned AARCH64_FE_UNDERFLOW = 8;
+  const unsigned AARCH64_FE_INEXACT = 16;
+  const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID
+							| AARCH64_FE_DIVBYZERO
+							| AARCH64_FE_OVERFLOW
+							| AARCH64_FE_UNDERFLOW
+							| AARCH64_FE_INEXACT);
+  const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8;
+  tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr;
+  tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr;
+  tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr;
+  tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv;
+
+  /* Generate the equivalence of :
+       unsigned int fenv_cr;
+       fenv_cr = __builtin_aarch64_get_fpcr ();
+
+       unsigned int fenv_sr;
+       fenv_sr = __builtin_aarch64_get_fpsr ();
+
+       Now set all exceptions to non-stop
+       unsigned int mask_cr
+		= ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT);
+       unsigned int masked_cr;
+       masked_cr = fenv_cr & mask_cr;
+
+       And clear all exception flags
+       unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT;
+       unsigned int masked_cr;
+       masked_sr = fenv_sr & mask_sr;
+
+       __builtin_aarch64_set_cr (masked_cr);
+       __builtin_aarch64_set_sr (masked_sr);  */
+
+  fenv_cr = create_tmp_var (unsigned_type_node, NULL);
+  fenv_sr = create_tmp_var (unsigned_type_node, NULL);
+
+  get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR];
+  set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR];
+  get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR];
+  set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR];
+
+  mask_cr = build_int_cst (unsigned_type_node,
+			   ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT));
+  mask_sr = build_int_cst (unsigned_type_node,
+			   ~(AARCH64_FE_ALL_EXCEPT));
+
+  ld_fenv_cr = build2 (MODIFY_EXPR, unsigned_type_node,
+		    fenv_cr, build_call_expr (get_fpcr, 0));
+  ld_fenv_sr = build2 (MODIFY_EXPR, unsigned_type_node,
+		    fenv_sr, build_call_expr (get_fpsr, 0));
+
+  masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
+  masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);
+
+  hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr);
+  hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr);
+
+  hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr,
+			hold_fnclex_sr);
+  masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr,
+			masked_fenv_sr);
+  ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr);
+
+  *hold = build2 (COMPOUND_EXPR, void_type_node,
+		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
+		  hold_fnclex);
+
+  /* Store the value of masked_fenv to clear the exceptions:
+     __builtin_aarch64_set_fpcr (masked_sr);  */
+
+  *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr);
+
+  /* Generate the equivalent of :
+       unsigned int new_fenv_var;
+       new_fenv_var = __builtin_aarch64_get_fpsr ();
+
+       __builtin_aarch64_set_fpsr (fenv_sr);
+
+       __atomic_feraiseexcept (new_fenv_var);  */
+
+  new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
+  reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
+			new_fenv_var, build_call_expr (get_fpsr, 0));
+  restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
+  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+  update_call = build_call_expr (atomic_feraiseexcept, 1,
+				 fold_convert (integer_type_node, new_fenv_var));
+  *update = build2 (COMPOUND_EXPR, void_type_node,
+		    build2 (COMPOUND_EXPR, void_type_node,
+			    reload_fenv, restore_fnenv), update_call);
+}
+
+
 #undef AARCH64_CHECK_BUILTIN_MODE
 #undef AARCH64_FIND_FRINT_VARIANT
 #undef BUILTIN_DX
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 5542f02..f4f3f61 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -289,4 +289,5 @@ extern void aarch64_split_combinev16qi (rtx operands[3]);
 extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
 extern bool
 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
+void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *);
 #endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a3147ee..fbbdc23 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -8488,6 +8488,10 @@ aarch64_cannot_change_mode_class (enum machine_mode from,
 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
   aarch64_autovectorize_vector_sizes
 
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
+  aarch64_atomic_assign_expand_fenv
+
 /* Section anchor support.  */
 
 #undef TARGET_MIN_ANCHOR_OFFSET
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c86a29d..24f235f 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -107,6 +107,10 @@
 
 (define_c_enum "unspecv" [
     UNSPECV_EH_RETURN		; Represent EH_RETURN
+    UNSPECV_GET_FPCR		; Represent fetch of FPCR content.
+    UNSPECV_SET_FPCR		; Represent assign of FPCR content.
+    UNSPECV_GET_FPSR		; Represent fetch of FPSR content.
+    UNSPECV_SET_FPSR		; Represent assign of FPSR content.
   ]
 )
 
@@ -3635,6 +3639,37 @@
   DONE;
 })
 
+;; Write Floating-point Control Register.
+(define_insn "set_fpcr"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPCR)]
+  ""
+  "msr\\tfpcr, %0"
+  [(set_attr "type" "mrs")])
+
+;; Read Floating-point Control Register.
+(define_insn "get_fpcr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPCR))]
+  ""
+  "mrs\\t%0, fpcr"
+  [(set_attr "type" "mrs")])
+
+;; Write Floating-point Status Register.
+(define_insn "set_fpsr"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)]
+  ""
+  "msr\\tfpsr, %0"
+  [(set_attr "type" "mrs")])
+
+;; Read Floating-point Status Register.
+(define_insn "get_fpsr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))]
+  ""
+  "mrs\\t%0, fpsr"
+  [(set_attr "type" "mrs")])
+
+
 ;; AdvSIMD Stuff
 (include "aarch64-simd.md")
 
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 347a94a..8bd13f3 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -9107,6 +9107,7 @@ to those machines.  Generally these generate calls to specific machine
 instructions, but allow the compiler to schedule those calls.
 
 @menu
+* AARCH64 Built-in Functions::
 * Alpha Built-in Functions::
 * Altera Nios II Built-in Functions::
 * ARC Built-in Functions::
@@ -9139,6 +9140,18 @@ instructions, but allow the compiler to schedule those calls.
 * TILEPro Built-in Functions::
 @end menu
 
+@node AARCH64 Built-in Functions
+@subsection AARCH64 Built-in Functions
+
+These built-in functions are available for the AARCH64 family of
+processors.
+@smallexample
+unsigned int __builtin_aarch64_get_fpcr ()
+void __builtin_aarch64_set_fpcr (unsigned int)
+unsigned int __builtin_aarch64_get_fpsr ()
+void __builtin_aarch64_set_fpsr (unsigned int)
+@end smallexample
+
 @node Alpha Built-in Functions
 @subsection Alpha Built-in Functions
 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][AARCH64] TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook
  2014-04-29  3:03   ` Kugan
  2014-05-02  9:08     ` Kugan
@ 2014-05-02 10:06     ` Marcus Shawcroft
  2014-05-02 10:15       ` Marcus Shawcroft
  2014-05-02 12:27       ` Kugan
  1 sibling, 2 replies; 12+ messages in thread
From: Marcus Shawcroft @ 2014-05-02 10:06 UTC (permalink / raw)
  To: Kugan; +Cc: gcc-patches

On 29 April 2014 03:37, Kugan <kugan.vivekanandarajah@linaro.org> wrote:
>
> On 28/04/14 21:01, Ramana Radhakrishnan wrote:
>> On 04/26/14 11:57, Kugan wrote:
>>> Attached patch implements TARGET_ATOMIC_ASSIGN_EXPAND_FENV for AARCH64.
>>> With this, atomic test-case gcc.dg/atomic/c11-atomic-exec-5.c now PASS.
>>>
>>> This implementation is based on SPARC and i386 implementations.
>>>
>>> Regression tested on qemu-aarch64 for aarch64-none-linux-gnu with no new
>>> regression. Is this OK for trunk?
>>
>> Again like A32 please test on hardware to make sure this behaves
>> correctly with c11-atomic-exec-5.c .
>>
>> If you don't have access to hardware, let us know : we'll take it for a
>> spin once you update the patch according to Marcus's comments.
>>
>
> Thanks for the review. I have updated the patch. I also have updated
> hold, clear and update to be exactly as in feholdexcpt.c, fclrexcpt.c
> and feupdateenv.c of glibc/ports/sysdeps/aarch64/fpu.
>

Kugan, I've not looked at the respin in detail yet, but it has just
occurred to me that the sequence used here to set FPCR is
insufficient.  The architecture reference manual requires that any
write to FPCR must be syncrhronized by a context synchronization
operation so we need to plant an ISB after the write.   Both the write
and ISB are likely to be expensive on some implementations so it would
be good to ensure that both the write and the isb are scheduled
independently.  IIRC there si

> I have limited real hardware access and just did a bootstrap and tested
> c11-atomic-exec-5.c alone to make sure that it PASS. I have also
> regression tested again on qemu-aarch64 for aarch64-none-linux-gnu with
> no new regressions. I will appreciate if you could do the regression
> testing on real hw.

Once the ISB issue is resolved I'll give the patch a spin on HW here.
/Marcus

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][AARCH64] TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook
  2014-05-02 10:06     ` Marcus Shawcroft
@ 2014-05-02 10:15       ` Marcus Shawcroft
  2014-05-02 12:27       ` Kugan
  1 sibling, 0 replies; 12+ messages in thread
From: Marcus Shawcroft @ 2014-05-02 10:15 UTC (permalink / raw)
  To: Kugan; +Cc: gcc-patches

On 2 May 2014 11:06, Marcus Shawcroft <marcus.shawcroft@gmail.com> wrote:

> Kugan, I've not looked at the respin in detail yet, but it has just
> occurred to me that the sequence used here to set FPCR is
> insufficient.  The architecture reference manual requires that any
> write to FPCR must be syncrhronized by a context synchronization
> operation so we need to plant an ISB after the write.   Both the write
> and ISB are likely to be expensive on some implementations so it would
> be good to ensure that both the write and the isb are scheduled
> independently.  IIRC there si

Sorry, incomplete sentence.  I had started to write that IIRC the same
issue did not apply to FPSCR in the ARM patch.  I have doubled checked
and the FPSCR does not have the issue therefore the ARM patch is fine
in this respect.

/Marcus

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][AARCH64] TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook
  2014-05-02  9:08     ` Kugan
@ 2014-05-02 11:58       ` Yufeng Zhang
  0 siblings, 0 replies; 12+ messages in thread
From: Yufeng Zhang @ 2014-05-02 11:58 UTC (permalink / raw)
  To: Kugan
  Cc: Ramana Radhakrishnan, gcc-patches, Marcus Shawcroft, Richard Earnshaw

On 05/02/14 10:08, Kugan wrote:
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 347a94a..8bd13f3 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -9107,6 +9107,7 @@ to those machines.  Generally these generate calls to specific machine
>   instructions, but allow the compiler to schedule those calls.
>
>   @menu
> +* AARCH64 Built-in Functions::
>   * Alpha Built-in Functions::
>   * Altera Nios II Built-in Functions::
>   * ARC Built-in Functions::
> @@ -9139,6 +9140,18 @@ instructions, but allow the compiler to schedule those calls.
>   * TILEPro Built-in Functions::
>   @end menu
>
> +@node AARCH64 Built-in Functions
> +@subsection AARCH64 Built-in Functions
> +
> +These built-in functions are available for the AARCH64 family of
> +processors.
> +@smallexample
> +unsigned int __builtin_aarch64_get_fpcr ()
> +void __builtin_aarch64_set_fpcr (unsigned int)
> +unsigned int __builtin_aarch64_get_fpsr ()
> +void __builtin_aarch64_set_fpsr (unsigned int)
> +@end smallexample
> +
>   @node Alpha Built-in Functions
>   @subsection Alpha Built-in Functions
>

Please s/AARCH64/AArch64 to stay consistent with the existing usage, 
e.g. those in invoke.texi.

Thanks,
Yufeng

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][AARCH64] TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook
  2014-05-02 10:06     ` Marcus Shawcroft
  2014-05-02 10:15       ` Marcus Shawcroft
@ 2014-05-02 12:27       ` Kugan
  2014-05-11 23:45         ` Kugan
  2014-05-22 14:24         ` Marcus Shawcroft
  1 sibling, 2 replies; 12+ messages in thread
From: Kugan @ 2014-05-02 12:27 UTC (permalink / raw)
  To: Marcus Shawcroft; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 3340 bytes --]



On 02/05/14 20:06, Marcus Shawcroft wrote:
> On 29 April 2014 03:37, Kugan <kugan.vivekanandarajah@linaro.org> wrote:
>>
>> On 28/04/14 21:01, Ramana Radhakrishnan wrote:
>>> On 04/26/14 11:57, Kugan wrote:
>>>> Attached patch implements TARGET_ATOMIC_ASSIGN_EXPAND_FENV for AARCH64.
>>>> With this, atomic test-case gcc.dg/atomic/c11-atomic-exec-5.c now PASS.
>>>>
>>>> This implementation is based on SPARC and i386 implementations.
>>>>
>>>> Regression tested on qemu-aarch64 for aarch64-none-linux-gnu with no new
>>>> regression. Is this OK for trunk?
>>>
>>> Again like A32 please test on hardware to make sure this behaves
>>> correctly with c11-atomic-exec-5.c .
>>>
>>> If you don't have access to hardware, let us know : we'll take it for a
>>> spin once you update the patch according to Marcus's comments.
>>>
>>
>> Thanks for the review. I have updated the patch. I also have updated
>> hold, clear and update to be exactly as in feholdexcpt.c, fclrexcpt.c
>> and feupdateenv.c of glibc/ports/sysdeps/aarch64/fpu.
>>
> 
> Kugan, I've not looked at the respin in detail yet, but it has just
> occurred to me that the sequence used here to set FPCR is
> insufficient.  The architecture reference manual requires that any
> write to FPCR must be syncrhronized by a context synchronization
> operation so we need to plant an ISB after the write.   Both the write
> and ISB are likely to be expensive on some implementations so it would
> be good to ensure that both the write and the isb are scheduled
> independently.  IIRC there si
> 
>> I have limited real hardware access and just did a bootstrap and tested
>> c11-atomic-exec-5.c alone to make sure that it PASS. I have also
>> regression tested again on qemu-aarch64 for aarch64-none-linux-gnu with
>> no new regressions. I will appreciate if you could do the regression
>> testing on real hw.
> 
> Once the ISB issue is resolved I'll give the patch a spin on HW here.

Here is the modified patch which also includes changes Yufeng has
suggested. Regression tested on qemu-aarch64 for aarch64-none-linux-gnu
with no new regressions.

Thanks,
Kugan


gcc/
+2014-05-02  Kugan Vivekanandarajah  <kuganv@linaro.org>
+
+	* config/aarch64/aarch64.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New
+	define.
+	* config/aarch64/aarch64-protos.h (aarch64_atomic_assign_expand_fenv):
+	New function declaration.
+	* config/aarch64/aarch64-builtins.c (aarch64_builtins) : Add
+	AARCH64_BUILTIN_GET_FPCR, AARCH64_BUILTIN_SET_FPCR.
+	AARCH64_BUILTIN_GET_FPSR and AARCH64_BUILTIN_SET_FPSR.
+	(aarch64_init_builtins) : Initialize builtins
+	__builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
+	__builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.
+	(aarch64_expand_builtin) : Expand builtins __builtins_aarch64_set_fpcr
+	__builtins_aarch64_get_fpcr, __builtins_aarch64_get_fpsr,
+	and __builtins_aarch64_set_fpsr.
+	(aarch64_atomic_assign_expand_fenv): New function.
+	* config/aarch64/aarch64.md (set_fpcr): New pattern.
+	(get_fpcr) : Likewise.
+	(set_fpsr) : Likewise.
+	(get_fpsr) : Likewise.
+	(unspecv): Add UNSPECV_GET_FPCR and UNSPECV_SET_FPCR, UNSPECV_GET_FPSR
+	 and UNSPECV_SET_FPSR.
+	* doc/extend.texi (AARCH64 Built-in Functions) : Document
+	__builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
+	__builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.
+

[-- Attachment #2: aarch64_v4.txt --]
[-- Type: text/plain, Size: 10527 bytes --]

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 55cfe0a..a5af874 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -371,6 +371,12 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
 enum aarch64_builtins
 {
   AARCH64_BUILTIN_MIN,
+
+  AARCH64_BUILTIN_GET_FPCR,
+  AARCH64_BUILTIN_SET_FPCR,
+  AARCH64_BUILTIN_GET_FPSR,
+  AARCH64_BUILTIN_SET_FPSR,
+
   AARCH64_SIMD_BUILTIN_BASE,
 #include "aarch64-simd-builtins.def"
   AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE
@@ -752,6 +758,24 @@ aarch64_init_simd_builtins (void)
 void
 aarch64_init_builtins (void)
 {
+  tree ftype_set_fpr
+    = build_function_type_list (void_type_node, unsigned_type_node, NULL);
+  tree ftype_get_fpr
+    = build_function_type_list (unsigned_type_node, NULL);
+
+  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
+    = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr,
+			    AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
+    = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr,
+			    AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
+    = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr,
+			    AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
+    = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr,
+			    AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
+
   if (TARGET_SIMD)
     aarch64_init_simd_builtins ();
 }
@@ -964,6 +988,36 @@ aarch64_expand_builtin (tree exp,
 {
   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
   int fcode = DECL_FUNCTION_CODE (fndecl);
+  int icode;
+  rtx pat, op0;
+  tree arg0;
+
+  switch (fcode)
+    {
+    case AARCH64_BUILTIN_GET_FPCR:
+    case AARCH64_BUILTIN_SET_FPCR:
+    case AARCH64_BUILTIN_GET_FPSR:
+    case AARCH64_BUILTIN_SET_FPSR:
+      if ((fcode == AARCH64_BUILTIN_GET_FPCR)
+	  || (fcode == AARCH64_BUILTIN_GET_FPSR))
+	{
+	  icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ?
+	    CODE_FOR_get_fpsr : CODE_FOR_get_fpcr;
+	  target = gen_reg_rtx (SImode);
+	  pat = GEN_FCN (icode) (target);
+	}
+      else
+	{
+	  target = NULL_RTX;
+	  icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ?
+	    CODE_FOR_set_fpsr : CODE_FOR_set_fpcr;
+	  arg0 = CALL_EXPR_ARG (exp, 0);
+	  op0 = expand_normal (arg0);
+	  pat = GEN_FCN (icode) (op0);
+	}
+      emit_insn (pat);
+      return target;
+    }
 
   if (fcode >= AARCH64_SIMD_BUILTIN_BASE)
     return aarch64_simd_expand_builtin (fcode, exp, target);
@@ -1196,6 +1250,106 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   return changed;
 }
 
+void
+aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+  const unsigned AARCH64_FE_INVALID = 1;
+  const unsigned AARCH64_FE_DIVBYZERO = 2;
+  const unsigned AARCH64_FE_OVERFLOW = 4;
+  const unsigned AARCH64_FE_UNDERFLOW = 8;
+  const unsigned AARCH64_FE_INEXACT = 16;
+  const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID
+							| AARCH64_FE_DIVBYZERO
+							| AARCH64_FE_OVERFLOW
+							| AARCH64_FE_UNDERFLOW
+							| AARCH64_FE_INEXACT);
+  const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8;
+  tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr;
+  tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr;
+  tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr;
+  tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv;
+
+  /* Generate the equivalence of :
+       unsigned int fenv_cr;
+       fenv_cr = __builtin_aarch64_get_fpcr ();
+
+       unsigned int fenv_sr;
+       fenv_sr = __builtin_aarch64_get_fpsr ();
+
+       Now set all exceptions to non-stop
+       unsigned int mask_cr
+		= ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT);
+       unsigned int masked_cr;
+       masked_cr = fenv_cr & mask_cr;
+
+       And clear all exception flags
+       unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT;
+       unsigned int masked_cr;
+       masked_sr = fenv_sr & mask_sr;
+
+       __builtin_aarch64_set_cr (masked_cr);
+       __builtin_aarch64_set_sr (masked_sr);  */
+
+  fenv_cr = create_tmp_var (unsigned_type_node, NULL);
+  fenv_sr = create_tmp_var (unsigned_type_node, NULL);
+
+  get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR];
+  set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR];
+  get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR];
+  set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR];
+
+  mask_cr = build_int_cst (unsigned_type_node,
+			   ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT));
+  mask_sr = build_int_cst (unsigned_type_node,
+			   ~(AARCH64_FE_ALL_EXCEPT));
+
+  ld_fenv_cr = build2 (MODIFY_EXPR, unsigned_type_node,
+		    fenv_cr, build_call_expr (get_fpcr, 0));
+  ld_fenv_sr = build2 (MODIFY_EXPR, unsigned_type_node,
+		    fenv_sr, build_call_expr (get_fpsr, 0));
+
+  masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
+  masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);
+
+  hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr);
+  hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr);
+
+  hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr,
+			hold_fnclex_sr);
+  masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr,
+			masked_fenv_sr);
+  ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr);
+
+  *hold = build2 (COMPOUND_EXPR, void_type_node,
+		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
+		  hold_fnclex);
+
+  /* Store the value of masked_fenv to clear the exceptions:
+     __builtin_aarch64_set_fpsr (masked_fenv_sr);  */
+
+  *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr);
+
+  /* Generate the equivalent of :
+       unsigned int new_fenv_var;
+       new_fenv_var = __builtin_aarch64_get_fpsr ();
+
+       __builtin_aarch64_set_fpsr (fenv_sr);
+
+       __atomic_feraiseexcept (new_fenv_var);  */
+
+  new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
+  reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
+			new_fenv_var, build_call_expr (get_fpsr, 0));
+  restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
+  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+  update_call = build_call_expr (atomic_feraiseexcept, 1,
+				 fold_convert (integer_type_node, new_fenv_var));
+  *update = build2 (COMPOUND_EXPR, void_type_node,
+		    build2 (COMPOUND_EXPR, void_type_node,
+			    reload_fenv, restore_fnenv), update_call);
+}
+
+
 #undef AARCH64_CHECK_BUILTIN_MODE
 #undef AARCH64_FIND_FRINT_VARIANT
 #undef BUILTIN_DX
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 5542f02..f4f3f61 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -289,4 +289,5 @@ extern void aarch64_split_combinev16qi (rtx operands[3]);
 extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
 extern bool
 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
+void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *);
 #endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a3147ee..fbbdc23 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -8488,6 +8488,10 @@ aarch64_cannot_change_mode_class (enum machine_mode from,
 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
   aarch64_autovectorize_vector_sizes
 
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
+  aarch64_atomic_assign_expand_fenv
+
 /* Section anchor support.  */
 
 #undef TARGET_MIN_ANCHOR_OFFSET
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c86a29d..9f64f3d 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -107,6 +107,10 @@
 
 (define_c_enum "unspecv" [
     UNSPECV_EH_RETURN		; Represent EH_RETURN
+    UNSPECV_GET_FPCR		; Represent fetch of FPCR content.
+    UNSPECV_SET_FPCR		; Represent assign of FPCR content.
+    UNSPECV_GET_FPSR		; Represent fetch of FPSR content.
+    UNSPECV_SET_FPSR		; Represent assign of FPSR content.
   ]
 )
 
@@ -3635,6 +3639,37 @@
   DONE;
 })
 
+;; Write Floating-point Control Register.
+(define_insn "set_fpcr"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPCR)]
+  ""
+  "msr\\tfpcr, %0\;isb"
+  [(set_attr "type" "mrs")])
+
+;; Read Floating-point Control Register.
+(define_insn "get_fpcr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPCR))]
+  ""
+  "mrs\\t%0, fpcr"
+  [(set_attr "type" "mrs")])
+
+;; Write Floating-point Status Register.
+(define_insn "set_fpsr"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)]
+  ""
+  "msr\\tfpsr, %0"
+  [(set_attr "type" "mrs")])
+
+;; Read Floating-point Status Register.
+(define_insn "get_fpsr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))]
+  ""
+  "mrs\\t%0, fpsr"
+  [(set_attr "type" "mrs")])
+
+
 ;; AdvSIMD Stuff
 (include "aarch64-simd.md")
 
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 347a94a..7d27ce0 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -9107,6 +9107,7 @@ to those machines.  Generally these generate calls to specific machine
 instructions, but allow the compiler to schedule those calls.
 
 @menu
+* AArch64 Built-in Functions::
 * Alpha Built-in Functions::
 * Altera Nios II Built-in Functions::
 * ARC Built-in Functions::
@@ -9139,6 +9140,18 @@ instructions, but allow the compiler to schedule those calls.
 * TILEPro Built-in Functions::
 @end menu
 
+@node AArch64 Built-in Functions
+@subsection AArch64 Built-in Functions
+
+These built-in functions are available for the AArch64 family of
+processors.
+@smallexample
+unsigned int __builtin_aarch64_get_fpcr ()
+void __builtin_aarch64_set_fpcr (unsigned int)
+unsigned int __builtin_aarch64_get_fpsr ()
+void __builtin_aarch64_set_fpsr (unsigned int)
+@end smallexample
+
 @node Alpha Built-in Functions
 @subsection Alpha Built-in Functions
 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][AARCH64] TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook
  2014-05-02 12:27       ` Kugan
@ 2014-05-11 23:45         ` Kugan
  2014-05-22 14:24         ` Marcus Shawcroft
  1 sibling, 0 replies; 12+ messages in thread
From: Kugan @ 2014-05-11 23:45 UTC (permalink / raw)
  To: Marcus Shawcroft; +Cc: gcc-patches

Ping ?

Thanks,
Kugan
On 02/05/14 22:27, Kugan wrote:
> 
> 
> On 02/05/14 20:06, Marcus Shawcroft wrote:
>> On 29 April 2014 03:37, Kugan <kugan.vivekanandarajah@linaro.org> wrote:
>>>
>>> On 28/04/14 21:01, Ramana Radhakrishnan wrote:
>>>> On 04/26/14 11:57, Kugan wrote:
>>>>> Attached patch implements TARGET_ATOMIC_ASSIGN_EXPAND_FENV for AARCH64.
>>>>> With this, atomic test-case gcc.dg/atomic/c11-atomic-exec-5.c now PASS.
>>>>>
>>>>> This implementation is based on SPARC and i386 implementations.
>>>>>
>>>>> Regression tested on qemu-aarch64 for aarch64-none-linux-gnu with no new
>>>>> regression. Is this OK for trunk?
>>>>
>>>> Again like A32 please test on hardware to make sure this behaves
>>>> correctly with c11-atomic-exec-5.c .
>>>>
>>>> If you don't have access to hardware, let us know : we'll take it for a
>>>> spin once you update the patch according to Marcus's comments.
>>>>
>>>
>>> Thanks for the review. I have updated the patch. I also have updated
>>> hold, clear and update to be exactly as in feholdexcpt.c, fclrexcpt.c
>>> and feupdateenv.c of glibc/ports/sysdeps/aarch64/fpu.
>>>
>>
>> Kugan, I've not looked at the respin in detail yet, but it has just
>> occurred to me that the sequence used here to set FPCR is
>> insufficient.  The architecture reference manual requires that any
>> write to FPCR must be syncrhronized by a context synchronization
>> operation so we need to plant an ISB after the write.   Both the write
>> and ISB are likely to be expensive on some implementations so it would
>> be good to ensure that both the write and the isb are scheduled
>> independently.  IIRC there si
>>
>>> I have limited real hardware access and just did a bootstrap and tested
>>> c11-atomic-exec-5.c alone to make sure that it PASS. I have also
>>> regression tested again on qemu-aarch64 for aarch64-none-linux-gnu with
>>> no new regressions. I will appreciate if you could do the regression
>>> testing on real hw.
>>
>> Once the ISB issue is resolved I'll give the patch a spin on HW here.
> 
> Here is the modified patch which also includes changes Yufeng has
> suggested. Regression tested on qemu-aarch64 for aarch64-none-linux-gnu
> with no new regressions.
> 
> Thanks,
> Kugan
> 
> 
> gcc/
> +2014-05-02  Kugan Vivekanandarajah  <kuganv@linaro.org>
> +
> +	* config/aarch64/aarch64.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New
> +	define.
> +	* config/aarch64/aarch64-protos.h (aarch64_atomic_assign_expand_fenv):
> +	New function declaration.
> +	* config/aarch64/aarch64-builtins.c (aarch64_builtins) : Add
> +	AARCH64_BUILTIN_GET_FPCR, AARCH64_BUILTIN_SET_FPCR.
> +	AARCH64_BUILTIN_GET_FPSR and AARCH64_BUILTIN_SET_FPSR.
> +	(aarch64_init_builtins) : Initialize builtins
> +	__builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
> +	__builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.
> +	(aarch64_expand_builtin) : Expand builtins __builtins_aarch64_set_fpcr
> +	__builtins_aarch64_get_fpcr, __builtins_aarch64_get_fpsr,
> +	and __builtins_aarch64_set_fpsr.
> +	(aarch64_atomic_assign_expand_fenv): New function.
> +	* config/aarch64/aarch64.md (set_fpcr): New pattern.
> +	(get_fpcr) : Likewise.
> +	(set_fpsr) : Likewise.
> +	(get_fpsr) : Likewise.
> +	(unspecv): Add UNSPECV_GET_FPCR and UNSPECV_SET_FPCR, UNSPECV_GET_FPSR
> +	 and UNSPECV_SET_FPSR.
> +	* doc/extend.texi (AARCH64 Built-in Functions) : Document
> +	__builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
> +	__builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.
> +
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][AARCH64] TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook
  2014-05-02 12:27       ` Kugan
  2014-05-11 23:45         ` Kugan
@ 2014-05-22 14:24         ` Marcus Shawcroft
  2015-06-17 16:35           ` James Greenhalgh
  1 sibling, 1 reply; 12+ messages in thread
From: Marcus Shawcroft @ 2014-05-22 14:24 UTC (permalink / raw)
  To: Kugan; +Cc: gcc-patches

On 2 May 2014 13:27, Kugan <kugan.vivekanandarajah@linaro.org> wrote:

> +2014-05-02  Kugan Vivekanandarajah  <kuganv@linaro.org>
> +
> +       * config/aarch64/aarch64.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New
> +       define.
> +       * config/aarch64/aarch64-protos.h (aarch64_atomic_assign_expand_fenv):
> +       New function declaration.
> +       * config/aarch64/aarch64-builtins.c (aarch64_builtins) : Add
> +       AARCH64_BUILTIN_GET_FPCR, AARCH64_BUILTIN_SET_FPCR.
> +       AARCH64_BUILTIN_GET_FPSR and AARCH64_BUILTIN_SET_FPSR.
> +       (aarch64_init_builtins) : Initialize builtins
> +       __builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
> +       __builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.
> +       (aarch64_expand_builtin) : Expand builtins __builtins_aarch64_set_fpcr
> +       __builtins_aarch64_get_fpcr, __builtins_aarch64_get_fpsr,
> +       and __builtins_aarch64_set_fpsr.
> +       (aarch64_atomic_assign_expand_fenv): New function.
> +       * config/aarch64/aarch64.md (set_fpcr): New pattern.
> +       (get_fpcr) : Likewise.
> +       (set_fpsr) : Likewise.
> +       (get_fpsr) : Likewise.
> +       (unspecv): Add UNSPECV_GET_FPCR and UNSPECV_SET_FPCR, UNSPECV_GET_FPSR
> +        and UNSPECV_SET_FPSR.
> +       * doc/extend.texi (AARCH64 Built-in Functions) : Document
> +       __builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
> +       __builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.
> +

OK, thanks Kugan.

/Marcus

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][AARCH64] TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook
  2014-05-22 14:24         ` Marcus Shawcroft
@ 2015-06-17 16:35           ` James Greenhalgh
  0 siblings, 0 replies; 12+ messages in thread
From: James Greenhalgh @ 2015-06-17 16:35 UTC (permalink / raw)
  To: Marcus Shawcroft; +Cc: Kugan, gcc-patches

On Thu, May 22, 2014 at 03:24:23PM +0100, Marcus Shawcroft wrote:
> On 2 May 2014 13:27, Kugan <kugan.vivekanandarajah@linaro.org> wrote:
> 
> > +2014-05-02  Kugan Vivekanandarajah  <kuganv@linaro.org>
> > +
> > +       * config/aarch64/aarch64.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New
> > +       define.
> > +       * config/aarch64/aarch64-protos.h (aarch64_atomic_assign_expand_fenv):
> > +       New function declaration.
> > +       * config/aarch64/aarch64-builtins.c (aarch64_builtins) : Add
> > +       AARCH64_BUILTIN_GET_FPCR, AARCH64_BUILTIN_SET_FPCR.
> > +       AARCH64_BUILTIN_GET_FPSR and AARCH64_BUILTIN_SET_FPSR.
> > +       (aarch64_init_builtins) : Initialize builtins
> > +       __builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
> > +       __builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.
> > +       (aarch64_expand_builtin) : Expand builtins __builtins_aarch64_set_fpcr
> > +       __builtins_aarch64_get_fpcr, __builtins_aarch64_get_fpsr,
> > +       and __builtins_aarch64_set_fpsr.
> > +       (aarch64_atomic_assign_expand_fenv): New function.
> > +       * config/aarch64/aarch64.md (set_fpcr): New pattern.
> > +       (get_fpcr) : Likewise.
> > +       (set_fpsr) : Likewise.
> > +       (get_fpsr) : Likewise.
> > +       (unspecv): Add UNSPECV_GET_FPCR and UNSPECV_SET_FPCR, UNSPECV_GET_FPSR
> > +        and UNSPECV_SET_FPSR.
> > +       * doc/extend.texi (AARCH64 Built-in Functions) : Document
> > +       __builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
> > +       __builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.
> > +
> 
> OK, thanks Kugan.
> 

I appreciate it is quite late in the day for the 4.9.3 branch, but do
we want to consider this patch for backporting (either now or after
the branch reopens)?

  gcc.dg/atomic/c11-atomic-exec-5.c

is the only interesting test I see failing on a native AArch64 build of
the 4.9.3 release candidate (there is plenty of other FAILures, but they
are guality, scan assembler or missed optimization fails).

Thanks,
James

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2015-06-17 16:10 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-04-26 12:00 [RFC][AARCH64] TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook Kugan
2014-04-28 10:03 ` Marcus Shawcroft
2014-04-28 11:06 ` Ramana Radhakrishnan
2014-04-29  3:03   ` Kugan
2014-05-02  9:08     ` Kugan
2014-05-02 11:58       ` Yufeng Zhang
2014-05-02 10:06     ` Marcus Shawcroft
2014-05-02 10:15       ` Marcus Shawcroft
2014-05-02 12:27       ` Kugan
2014-05-11 23:45         ` Kugan
2014-05-22 14:24         ` Marcus Shawcroft
2015-06-17 16:35           ` James Greenhalgh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).