public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH]AArch64: remove reliance on register allocator for simd/gpreg costing. [PR114741]
@ 2024-04-18 10:11 Tamar Christina
  2024-04-18 10:20 ` Richard Earnshaw (lists)
  0 siblings, 1 reply; 2+ messages in thread
From: Tamar Christina @ 2024-04-18 10:11 UTC (permalink / raw)
  To: gcc-patches
  Cc: nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov,
	richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 7296 bytes --]

Hi All,

In PR114741 we see that we have a regression in codegen when SVE is enable where
the simple testcase:

void foo(unsigned v, unsigned *p)
{
    *p = v & 1;
}

generates

foo:
        fmov    s31, w0
        and     z31.s, z31.s, #1
        str     s31, [x1]
        ret

instead of:

foo:
        and     w0, w0, 1
        str     w0, [x1]
        ret

This causes an impact it not just codesize but also performance.  This is caused
by the use of the ^ constraint modifier in the pattern <optab><mode>3.

The documentation states that this modifier should only have an effect on the
alternative costing in that a particular alternative is to be preferred unless
a non-psuedo reload is needed.

The pattern was trying to convey that whenever both r and w are required, that
it should prefer r unless a reload is needed.  This is because if a reload is
needed then we can construct the constants more flexibly on the SIMD side.

We were using this so simplify the implementation and to get generic cases such
as:

double negabs (double x)
{
   unsigned long long y;
   memcpy (&y, &x, sizeof(double));
   y = y | (1UL << 63);
   memcpy (&x, &y, sizeof(double));
   return x;
}

which don't go through an expander.
However the implementation of ^ in the register allocator is not according to
the documentation in that it also has an effect during coloring.  During initial
register class selection it applies a penalty to a class, similar to how ? does.

In this example the penalty makes the use of GP regs expensive enough that it no
longer considers them:

    r106: preferred FP_REGS, alternative NO_REGS, allocno FP_REGS
;;        3--> b  0: i   9 r106=r105&0x1
    :cortex_a53_slot_any:GENERAL_REGS+0(-1)FP_REGS+1(1)PR_LO_REGS+0(0)
                         PR_HI_REGS+0(0):model 4

which is not the expected behavior.  For GCC 14 this is a conservative fix.

1. we remove the ^ modifier from the logical optabs.

2. In order not to regress copysign we then move the copysign expansion to
   directly use the SIMD variant.  Since copysign only supports floating point
   modes this is fine and no longer relies on the register allocator to select
   the right alternative.

It once again regresses the general case, but this case wasn't optimized in
earlier GCCs either so it's not a regression in GCC 14.  This change gives
strict better codegen than earlier GCCs and still optimizes the important cases.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:


	PR target/114741
	* config/aarch64/aarch64.md (<optab><mode>3): Remove ^ from alt 2.
	(copysign<GPF:mode>3): Use SIMD version of IOR directly.

gcc/testsuite/ChangeLog:

	PR target/114741
	* gcc.target/aarch64/fneg-abs_2.c: Update codegen.
	* gcc.target/aarch64/fneg-abs_4.c: xfail for now.
	* gcc.target/aarch64/pr114741.c: New test.

---
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 385a669b9b3c31cc9108a660e881b9091c71fc7c..dbde066f7478bec51a8703b017ea553aa98be309 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4811,7 +4811,7 @@ (define_insn "<optab><mode>3"
   ""
   {@ [ cons: =0 , 1  , 2        ; attrs: type , arch  ]
      [ r        , %r , r        ; logic_reg   , *     ] <logical>\t%<w>0, %<w>1, %<w>2
-     [ rk       , ^r , <lconst> ; logic_imm   , *     ] <logical>\t%<w>0, %<w>1, %2
+     [ rk       , r  , <lconst> ; logic_imm   , *     ] <logical>\t%<w>0, %<w>1, %2
      [ w        , 0  , <lconst> ; *           , sve   ] <logical>\t%Z0.<s>, %Z0.<s>, #%2
      [ w        , w  , w        ; neon_logic  , simd  ] <logical>\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
   }
@@ -7192,22 +7192,29 @@ (define_expand "copysign<GPF:mode>3"
    (match_operand:GPF 2 "nonmemory_operand")]
   "TARGET_SIMD"
 {
-  machine_mode int_mode = <V_INT_EQUIV>mode;
-  rtx bitmask = gen_reg_rtx (int_mode);
-  emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U
-				    << (GET_MODE_BITSIZE (<MODE>mode) - 1)));
+  rtx signbit_const = GEN_INT (HOST_WIDE_INT_M1U
+			       << (GET_MODE_BITSIZE (<MODE>mode) - 1));
   /* copysign (x, -1) should instead be expanded as orr with the sign
      bit.  */
   rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
   if (GET_CODE (op2_elt) == CONST_DOUBLE
       && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
     {
-      emit_insn (gen_ior<v_int_equiv>3 (
-	lowpart_subreg (int_mode, operands[0], <MODE>mode),
-	lowpart_subreg (int_mode, operands[1], <MODE>mode), bitmask));
+      rtx v_bitmask
+	= force_reg (V2<V_INT_EQUIV>mode,
+		     gen_const_vec_duplicate (V2<V_INT_EQUIV>mode,
+					      signbit_const));
+
+      emit_insn (gen_iorv2<v_int_equiv>3 (
+	lowpart_subreg (V2<V_INT_EQUIV>mode, operands[0], <MODE>mode),
+	lowpart_subreg (V2<V_INT_EQUIV>mode, operands[1], <MODE>mode),
+	v_bitmask));
       DONE;
     }
 
+  machine_mode int_mode = <V_INT_EQUIV>mode;
+  rtx bitmask = gen_reg_rtx (int_mode);
+  emit_move_insn (bitmask, signbit_const);
   operands[2] = force_reg (<MODE>mode, operands[2]);
   emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
 				       bitmask));
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
index eed41ea18e69ff60ac79cbdb7c0c935850b49b33..18d10ee834d5d9b4361d890447060e78f09d3a73 100644
--- a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
@@ -9,8 +9,7 @@
 
 /*
 ** f1:
-**	movi	v[0-9]+.2s, 0x80, lsl 24
-**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	orr	v[0-9]+.2s, #?128, lsl #?24
 **	ret
 */
 float32_t f1 (float32_t a)
@@ -22,7 +21,7 @@ float32_t f1 (float32_t a)
 ** f2:
 **	movi	v[0-9]+.4s, #?0
 **	fneg	v[0-9]+.2d, v[0-9]+.2d
-**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
 **	ret
 */
 float64_t f2 (float64_t a)
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
index d45c3d1210c682f38177a89f1137ab4f6decfbc1..6da95a4b052ca3d9e0b8db0e465c5f40139b2b18 100644
--- a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
@@ -7,7 +7,7 @@
 #include <string.h>
 
 /*
-** negabs:
+** negabs: { xfail *-*-* }
 **	movi	v31.4s, #?0
 **	fneg	v[0-9]+.2d, v[0-9]+.2d
 **	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
@@ -23,7 +23,7 @@ double negabs (double x)
 }
 
 /*
-** negabsf:
+** negabsf: { xfail *-*-* }
 **	movi	v[0-9]+.2s, 0x80, lsl 24
 **	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
 **	ret
diff --git a/gcc/testsuite/gcc.target/aarch64/pr114741.c b/gcc/testsuite/gcc.target/aarch64/pr114741.c
new file mode 100644
index 0000000000000000000000000000000000000000..fd2182da0e966afcf0ab5bdef2f8d3892c2a3d67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr114741.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+/*
+** foo1:
+**	and	w0, w0, 1
+**	str	w0, \[x1\]
+**	ret
+*/
+void foo1(unsigned v, unsigned *p)
+{
+    *p = v & 1;
+}
+
+#pragma GCC target "+sve"
+
+/*
+** foo2:
+**	and	w0, w0, 1
+**	str	w0, \[x1\]
+**	ret
+*/
+void foo2(unsigned v, unsigned *p)
+{
+    *p = v & 1;
+}




-- 

[-- Attachment #2: rb18427.patch --]
[-- Type: text/x-diff, Size: 4418 bytes --]

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 385a669b9b3c31cc9108a660e881b9091c71fc7c..dbde066f7478bec51a8703b017ea553aa98be309 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4811,7 +4811,7 @@ (define_insn "<optab><mode>3"
   ""
   {@ [ cons: =0 , 1  , 2        ; attrs: type , arch  ]
      [ r        , %r , r        ; logic_reg   , *     ] <logical>\t%<w>0, %<w>1, %<w>2
-     [ rk       , ^r , <lconst> ; logic_imm   , *     ] <logical>\t%<w>0, %<w>1, %2
+     [ rk       , r  , <lconst> ; logic_imm   , *     ] <logical>\t%<w>0, %<w>1, %2
      [ w        , 0  , <lconst> ; *           , sve   ] <logical>\t%Z0.<s>, %Z0.<s>, #%2
      [ w        , w  , w        ; neon_logic  , simd  ] <logical>\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
   }
@@ -7192,22 +7192,29 @@ (define_expand "copysign<GPF:mode>3"
    (match_operand:GPF 2 "nonmemory_operand")]
   "TARGET_SIMD"
 {
-  machine_mode int_mode = <V_INT_EQUIV>mode;
-  rtx bitmask = gen_reg_rtx (int_mode);
-  emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U
-				    << (GET_MODE_BITSIZE (<MODE>mode) - 1)));
+  rtx signbit_const = GEN_INT (HOST_WIDE_INT_M1U
+			       << (GET_MODE_BITSIZE (<MODE>mode) - 1));
   /* copysign (x, -1) should instead be expanded as orr with the sign
      bit.  */
   rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
   if (GET_CODE (op2_elt) == CONST_DOUBLE
       && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
     {
-      emit_insn (gen_ior<v_int_equiv>3 (
-	lowpart_subreg (int_mode, operands[0], <MODE>mode),
-	lowpart_subreg (int_mode, operands[1], <MODE>mode), bitmask));
+      rtx v_bitmask
+	= force_reg (V2<V_INT_EQUIV>mode,
+		     gen_const_vec_duplicate (V2<V_INT_EQUIV>mode,
+					      signbit_const));
+
+      emit_insn (gen_iorv2<v_int_equiv>3 (
+	lowpart_subreg (V2<V_INT_EQUIV>mode, operands[0], <MODE>mode),
+	lowpart_subreg (V2<V_INT_EQUIV>mode, operands[1], <MODE>mode),
+	v_bitmask));
       DONE;
     }
 
+  machine_mode int_mode = <V_INT_EQUIV>mode;
+  rtx bitmask = gen_reg_rtx (int_mode);
+  emit_move_insn (bitmask, signbit_const);
   operands[2] = force_reg (<MODE>mode, operands[2]);
   emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
 				       bitmask));
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
index eed41ea18e69ff60ac79cbdb7c0c935850b49b33..18d10ee834d5d9b4361d890447060e78f09d3a73 100644
--- a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
@@ -9,8 +9,7 @@
 
 /*
 ** f1:
-**	movi	v[0-9]+.2s, 0x80, lsl 24
-**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	orr	v[0-9]+.2s, #?128, lsl #?24
 **	ret
 */
 float32_t f1 (float32_t a)
@@ -22,7 +21,7 @@ float32_t f1 (float32_t a)
 ** f2:
 **	movi	v[0-9]+.4s, #?0
 **	fneg	v[0-9]+.2d, v[0-9]+.2d
-**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
 **	ret
 */
 float64_t f2 (float64_t a)
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
index d45c3d1210c682f38177a89f1137ab4f6decfbc1..6da95a4b052ca3d9e0b8db0e465c5f40139b2b18 100644
--- a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
@@ -7,7 +7,7 @@
 #include <string.h>
 
 /*
-** negabs:
+** negabs: { xfail *-*-* }
 **	movi	v31.4s, #?0
 **	fneg	v[0-9]+.2d, v[0-9]+.2d
 **	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
@@ -23,7 +23,7 @@ double negabs (double x)
 }
 
 /*
-** negabsf:
+** negabsf: { xfail *-*-* }
 **	movi	v[0-9]+.2s, 0x80, lsl 24
 **	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
 **	ret
diff --git a/gcc/testsuite/gcc.target/aarch64/pr114741.c b/gcc/testsuite/gcc.target/aarch64/pr114741.c
new file mode 100644
index 0000000000000000000000000000000000000000..fd2182da0e966afcf0ab5bdef2f8d3892c2a3d67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr114741.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+/*
+** foo1:
+**	and	w0, w0, 1
+**	str	w0, \[x1\]
+**	ret
+*/
+void foo1(unsigned v, unsigned *p)
+{
+    *p = v & 1;
+}
+
+#pragma GCC target "+sve"
+
+/*
+** foo2:
+**	and	w0, w0, 1
+**	str	w0, \[x1\]
+**	ret
+*/
+void foo2(unsigned v, unsigned *p)
+{
+    *p = v & 1;
+}




^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH]AArch64: remove reliance on register allocator for simd/gpreg costing. [PR114741]
  2024-04-18 10:11 [PATCH]AArch64: remove reliance on register allocator for simd/gpreg costing. [PR114741] Tamar Christina
@ 2024-04-18 10:20 ` Richard Earnshaw (lists)
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Earnshaw (lists) @ 2024-04-18 10:20 UTC (permalink / raw)
  To: Tamar Christina, gcc-patches
  Cc: nd, Marcus.Shawcroft, Kyrylo.Tkachov, richard.sandiford

On 18/04/2024 11:11, Tamar Christina wrote:
> Hi All,
> 
> In PR114741 we see that we have a regression in codegen when SVE is enable where
> the simple testcase:
> 
> void foo(unsigned v, unsigned *p)
> {
>     *p = v & 1;
> }
> 
> generates
> 
> foo:
>         fmov    s31, w0
>         and     z31.s, z31.s, #1
>         str     s31, [x1]
>         ret
> 
> instead of:
> 
> foo:
>         and     w0, w0, 1
>         str     w0, [x1]
>         ret
> 
> This causes an impact it not just codesize but also performance.  This is caused
> by the use of the ^ constraint modifier in the pattern <optab><mode>3.
> 
> The documentation states that this modifier should only have an effect on the
> alternative costing in that a particular alternative is to be preferred unless
> a non-psuedo reload is needed.
> 
> The pattern was trying to convey that whenever both r and w are required, that
> it should prefer r unless a reload is needed.  This is because if a reload is
> needed then we can construct the constants more flexibly on the SIMD side.
> 
> We were using this so simplify the implementation and to get generic cases such
> as:
> 
> double negabs (double x)
> {
>    unsigned long long y;
>    memcpy (&y, &x, sizeof(double));
>    y = y | (1UL << 63);
>    memcpy (&x, &y, sizeof(double));
>    return x;
> }
> 
> which don't go through an expander.
> However the implementation of ^ in the register allocator is not according to
> the documentation in that it also has an effect during coloring.  During initial
> register class selection it applies a penalty to a class, similar to how ? does.
> 
> In this example the penalty makes the use of GP regs expensive enough that it no
> longer considers them:
> 
>     r106: preferred FP_REGS, alternative NO_REGS, allocno FP_REGS
> ;;        3--> b  0: i   9 r106=r105&0x1
>     :cortex_a53_slot_any:GENERAL_REGS+0(-1)FP_REGS+1(1)PR_LO_REGS+0(0)
>                          PR_HI_REGS+0(0):model 4
> 
> which is not the expected behavior.  For GCC 14 this is a conservative fix.
> 
> 1. we remove the ^ modifier from the logical optabs.
> 
> 2. In order not to regress copysign we then move the copysign expansion to
>    directly use the SIMD variant.  Since copysign only supports floating point
>    modes this is fine and no longer relies on the register allocator to select
>    the right alternative.
> 
> It once again regresses the general case, but this case wasn't optimized in
> earlier GCCs either so it's not a regression in GCC 14.  This change gives
> strict better codegen than earlier GCCs and still optimizes the important cases.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 
> 	PR target/114741
> 	* config/aarch64/aarch64.md (<optab><mode>3): Remove ^ from alt 2.
> 	(copysign<GPF:mode>3): Use SIMD version of IOR directly.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR target/114741
> 	* gcc.target/aarch64/fneg-abs_2.c: Update codegen.
> 	* gcc.target/aarch64/fneg-abs_4.c: xfail for now.
> 	* gcc.target/aarch64/pr114741.c: New test.
> 
> ---
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 385a669b9b3c31cc9108a660e881b9091c71fc7c..dbde066f7478bec51a8703b017ea553aa98be309 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -4811,7 +4811,7 @@ (define_insn "<optab><mode>3"
>    ""
>    {@ [ cons: =0 , 1  , 2        ; attrs: type , arch  ]
>       [ r        , %r , r        ; logic_reg   , *     ] <logical>\t%<w>0, %<w>1, %<w>2
> -     [ rk       , ^r , <lconst> ; logic_imm   , *     ] <logical>\t%<w>0, %<w>1, %2
> +     [ rk       , r  , <lconst> ; logic_imm   , *     ] <logical>\t%<w>0, %<w>1, %2
>       [ w        , 0  , <lconst> ; *           , sve   ] <logical>\t%Z0.<s>, %Z0.<s>, #%2
>       [ w        , w  , w        ; neon_logic  , simd  ] <logical>\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
>    }
> @@ -7192,22 +7192,29 @@ (define_expand "copysign<GPF:mode>3"
>     (match_operand:GPF 2 "nonmemory_operand")]
>    "TARGET_SIMD"
>  {
> -  machine_mode int_mode = <V_INT_EQUIV>mode;
> -  rtx bitmask = gen_reg_rtx (int_mode);
> -  emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U
> -				    << (GET_MODE_BITSIZE (<MODE>mode) - 1)));
> +  rtx signbit_const = GEN_INT (HOST_WIDE_INT_M1U
> +			       << (GET_MODE_BITSIZE (<MODE>mode) - 1));
>    /* copysign (x, -1) should instead be expanded as orr with the sign
>       bit.  */
>    rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
>    if (GET_CODE (op2_elt) == CONST_DOUBLE
>        && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
>      {
> -      emit_insn (gen_ior<v_int_equiv>3 (
> -	lowpart_subreg (int_mode, operands[0], <MODE>mode),
> -	lowpart_subreg (int_mode, operands[1], <MODE>mode), bitmask));
> +      rtx v_bitmask
> +	= force_reg (V2<V_INT_EQUIV>mode,
> +		     gen_const_vec_duplicate (V2<V_INT_EQUIV>mode,
> +					      signbit_const));
> +
> +      emit_insn (gen_iorv2<v_int_equiv>3 (
> +	lowpart_subreg (V2<V_INT_EQUIV>mode, operands[0], <MODE>mode),
> +	lowpart_subreg (V2<V_INT_EQUIV>mode, operands[1], <MODE>mode),
> +	v_bitmask));
>        DONE;
>      }
>  
> +  machine_mode int_mode = <V_INT_EQUIV>mode;
> +  rtx bitmask = gen_reg_rtx (int_mode);
> +  emit_move_insn (bitmask, signbit_const);
>    operands[2] = force_reg (<MODE>mode, operands[2]);
>    emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
>  				       bitmask));
> diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> index eed41ea18e69ff60ac79cbdb7c0c935850b49b33..18d10ee834d5d9b4361d890447060e78f09d3a73 100644
> --- a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> @@ -9,8 +9,7 @@
>  
>  /*
>  ** f1:
> -**	movi	v[0-9]+.2s, 0x80, lsl 24
> -**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	orr	v[0-9]+.2s, #?128, lsl #?24
>  **	ret
>  */
>  float32_t f1 (float32_t a)
> @@ -22,7 +21,7 @@ float32_t f1 (float32_t a)
>  ** f2:
>  **	movi	v[0-9]+.4s, #?0
>  **	fneg	v[0-9]+.2d, v[0-9]+.2d
> -**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
>  **	ret
>  */
>  float64_t f2 (float64_t a)
> diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> index d45c3d1210c682f38177a89f1137ab4f6decfbc1..6da95a4b052ca3d9e0b8db0e465c5f40139b2b18 100644
> --- a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> @@ -7,7 +7,7 @@
>  #include <string.h>
>  
>  /*
> -** negabs:
> +** negabs: { xfail *-*-* }
>  **	movi	v31.4s, #?0
>  **	fneg	v[0-9]+.2d, v[0-9]+.2d
>  **	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> @@ -23,7 +23,7 @@ double negabs (double x)
>  }
>  
>  /*
> -** negabsf:
> +** negabsf: { xfail *-*-* }
>  **	movi	v[0-9]+.2s, 0x80, lsl 24
>  **	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
>  **	ret
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr114741.c b/gcc/testsuite/gcc.target/aarch64/pr114741.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..fd2182da0e966afcf0ab5bdef2f8d3892c2a3d67
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr114741.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#pragma GCC target "+nosve"
> +
> +/*
> +** foo1:
> +**	and	w0, w0, 1
> +**	str	w0, \[x1\]
> +**	ret
> +*/
> +void foo1(unsigned v, unsigned *p)
> +{
> +    *p = v & 1;
> +}
> +
> +#pragma GCC target "+sve"
> +
> +/*
> +** foo2:
> +**	and	w0, w0, 1
> +**	str	w0, \[x1\]
> +**	ret
> +*/
> +void foo2(unsigned v, unsigned *p)
> +{
> +    *p = v & 1;
> +}
> 
> 
> 
> 


OK.

R.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-04-18 10:20 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-18 10:11 [PATCH]AArch64: remove reliance on register allocator for simd/gpreg costing. [PR114741] Tamar Christina
2024-04-18 10:20 ` Richard Earnshaw (lists)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).