[PATCH]AArch64 xorsign: Fix scalar xorsign lowering

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH]AArch64 xorsign: Fix scalar xorsign lowering
@ 2023-09-01 12:55 Tamar Christina
  2023-09-01 13:35 ` Richard Sandiford
  2023-09-19 12:31 ` Tamar Christina
  0 siblings, 2 replies; 5+ messages in thread
From: Tamar Christina @ 2023-09-01 12:55 UTC (permalink / raw)
  To: gcc-patches
  Cc: nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov,
	richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 5108 bytes --]

Hi All,

In GCC-9 our scalar xorsign pattern broke and we didn't notice it because the
testcase was not strong enough.  With this commit

8d2d39587d941a40f25ea0144cceb677df115040 is the first bad commit
commit 8d2d39587d941a40f25ea0144cceb677df115040
Author: Segher Boessenkool <segher@kernel.crashing.org>
Date:   Mon Oct 22 22:23:39 2018 +0200

    combine: Do not combine moves from hard registers

combine started introducing useless moves on hard registers,  when one of the
arguments to our scalar xorsign is a hardreg we get an additional move inserted.

This leads to combine forming an AND with the immediate inside and using the
superflous move to do the r->w move, instead of what we wanted before which was
for the `and` to be a vector and and have reload pick the right alternative.

To fix this the patch just forces the use of the vector version directly and
so combine has no chance to mess it up.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* config/aarch64/aarch64-simd.md (xorsign<mode>3): Renamed to..
	(@xorsign<mode>3): ...This.
	* config/aarch64/aarch64.md (xorsign<mode>3): Renamed to...
	(@xorsign<mode>3): ..This and emit vectors directly
	* config/aarch64/iterators.md (VCONQ): Add SF and DF.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/xorsign.c:

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index f67eb70577d0c2d9911d8c867d38a4d0b390337c..e955691f1be8830efacc237465119764ce2a4942 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -500,7 +500,7 @@ (define_expand "ctz<mode>2"
   }
 )
 
-(define_expand "xorsign<mode>3"
+(define_expand "@xorsign<mode>3"
   [(match_operand:VHSDF 0 "register_operand")
    (match_operand:VHSDF 1 "register_operand")
    (match_operand:VHSDF 2 "register_operand")]
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 01cf989641fce8e6c3828f6cfef62e101c4142df..9db82347bf891f9bc40aedecdc8462c94bf1a769 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6953,31 +6953,20 @@ (define_insn "copysign<GPF:mode>3_insn"
 ;; EOR   v0.8B, v0.8B, v3.8B
 ;;
 
-(define_expand "xorsign<mode>3"
+(define_expand "@xorsign<mode>3"
   [(match_operand:GPF 0 "register_operand")
    (match_operand:GPF 1 "register_operand")
    (match_operand:GPF 2 "register_operand")]
   "TARGET_SIMD"
 {
-
-  machine_mode imode = <V_INT_EQUIV>mode;
-  rtx mask = gen_reg_rtx (imode);
-  rtx op1x = gen_reg_rtx (imode);
-  rtx op2x = gen_reg_rtx (imode);
-
-  int bits = GET_MODE_BITSIZE (<MODE>mode) - 1;
-  emit_move_insn (mask, GEN_INT (trunc_int_for_mode (HOST_WIDE_INT_M1U << bits,
-						     imode)));
-
-  emit_insn (gen_and<v_int_equiv>3 (op2x, mask,
-				    lowpart_subreg (imode, operands[2],
-						    <MODE>mode)));
-  emit_insn (gen_xor<v_int_equiv>3 (op1x,
-				    lowpart_subreg (imode, operands[1],
-						    <MODE>mode),
-				    op2x));
+  rtx tmp = gen_reg_rtx (<VCONQ>mode);
+  rtx op1 = gen_reg_rtx (<VCONQ>mode);
+  rtx op2 = gen_reg_rtx (<VCONQ>mode);
+  emit_move_insn (op1, lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode));
+  emit_move_insn (op2, lowpart_subreg (<VCONQ>mode, operands[2], <MODE>mode));
+  emit_insn (gen_xorsign3(<VCONQ>mode, tmp, op1, op2));
   emit_move_insn (operands[0],
-		  lowpart_subreg (<MODE>mode, op1x, imode));
+		  lowpart_subreg (<MODE>mode, tmp, <VCONQ>mode));
   DONE;
 }
 )
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 9398d713044433cd89b2a83db5ae7969feb1dcf7..2451d8c2cd8e2da6ac8339eed9bc975cf203fa4c 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1428,7 +1428,8 @@ (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI")
 			 (V4HF "V8HF") (V8HF "V8HF")
 			 (V2SF "V4SF") (V4SF "V4SF")
 			 (V2DF "V2DF") (SI   "V4SI")
-			 (HI   "V8HI") (QI   "V16QI")])
+			 (HI   "V8HI") (QI   "V16QI")
+			 (SF   "V4SF") (DF   "V2DF")])
 
 ;; Half modes of all vector modes.
 (define_mode_attr VHALF [(V8QI "V4QI")  (V16QI "V8QI")
diff --git a/gcc/testsuite/gcc.target/aarch64/xorsign.c b/gcc/testsuite/gcc.target/aarch64/xorsign.c
index 22c5829449d932bed08de7e453c435ade3b787b2..dfb7ba7f140524507cb79cb06e12c72ad46eb753 100644
--- a/gcc/testsuite/gcc.target/aarch64/xorsign.c
+++ b/gcc/testsuite/gcc.target/aarch64/xorsign.c
@@ -79,8 +79,9 @@ check_l_neg_rev (long double x, long double y)
   return __builtin_copysignl (-1.0, y) * x;
 }
 
-/* { dg-final { scan-assembler "\[ \t\]?eor\[ \t\]?" } } */
-/* { dg-final { scan-assembler "\[ \t\]?and\[ \t\]?" } } */
+/* { dg-final { scan-assembler-times {eor\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+\.16b} 8 } } */
+/* { dg-final { scan-assembler-times {and\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+\.16b} 8 } } */
 /* { dg-final { scan-assembler-not "copysign" } } */
+/* { dg-final { scan-assembler-not "fmov" } } */
 /* { dg-final { scan-assembler-not "\[ \t\]?orr\[ \t\]?" } } */
 /* { dg-final { scan-assembler-not "\[ \t\]?fmul\[ \t\]?" } } */




-- 

[-- Attachment #2: rb17716.patch --]
[-- Type: text/plain, Size: 3723 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index f67eb70577d0c2d9911d8c867d38a4d0b390337c..e955691f1be8830efacc237465119764ce2a4942 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -500,7 +500,7 @@ (define_expand "ctz<mode>2"
   }
 )
 
-(define_expand "xorsign<mode>3"
+(define_expand "@xorsign<mode>3"
   [(match_operand:VHSDF 0 "register_operand")
    (match_operand:VHSDF 1 "register_operand")
    (match_operand:VHSDF 2 "register_operand")]
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 01cf989641fce8e6c3828f6cfef62e101c4142df..9db82347bf891f9bc40aedecdc8462c94bf1a769 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6953,31 +6953,20 @@ (define_insn "copysign<GPF:mode>3_insn"
 ;; EOR   v0.8B, v0.8B, v3.8B
 ;;
 
-(define_expand "xorsign<mode>3"
+(define_expand "@xorsign<mode>3"
   [(match_operand:GPF 0 "register_operand")
    (match_operand:GPF 1 "register_operand")
    (match_operand:GPF 2 "register_operand")]
   "TARGET_SIMD"
 {
-
-  machine_mode imode = <V_INT_EQUIV>mode;
-  rtx mask = gen_reg_rtx (imode);
-  rtx op1x = gen_reg_rtx (imode);
-  rtx op2x = gen_reg_rtx (imode);
-
-  int bits = GET_MODE_BITSIZE (<MODE>mode) - 1;
-  emit_move_insn (mask, GEN_INT (trunc_int_for_mode (HOST_WIDE_INT_M1U << bits,
-						     imode)));
-
-  emit_insn (gen_and<v_int_equiv>3 (op2x, mask,
-				    lowpart_subreg (imode, operands[2],
-						    <MODE>mode)));
-  emit_insn (gen_xor<v_int_equiv>3 (op1x,
-				    lowpart_subreg (imode, operands[1],
-						    <MODE>mode),
-				    op2x));
+  rtx tmp = gen_reg_rtx (<VCONQ>mode);
+  rtx op1 = gen_reg_rtx (<VCONQ>mode);
+  rtx op2 = gen_reg_rtx (<VCONQ>mode);
+  emit_move_insn (op1, lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode));
+  emit_move_insn (op2, lowpart_subreg (<VCONQ>mode, operands[2], <MODE>mode));
+  emit_insn (gen_xorsign3(<VCONQ>mode, tmp, op1, op2));
   emit_move_insn (operands[0],
-		  lowpart_subreg (<MODE>mode, op1x, imode));
+		  lowpart_subreg (<MODE>mode, tmp, <VCONQ>mode));
   DONE;
 }
 )
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 9398d713044433cd89b2a83db5ae7969feb1dcf7..2451d8c2cd8e2da6ac8339eed9bc975cf203fa4c 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1428,7 +1428,8 @@ (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI")
 			 (V4HF "V8HF") (V8HF "V8HF")
 			 (V2SF "V4SF") (V4SF "V4SF")
 			 (V2DF "V2DF") (SI   "V4SI")
-			 (HI   "V8HI") (QI   "V16QI")])
+			 (HI   "V8HI") (QI   "V16QI")
+			 (SF   "V4SF") (DF   "V2DF")])
 
 ;; Half modes of all vector modes.
 (define_mode_attr VHALF [(V8QI "V4QI")  (V16QI "V8QI")
diff --git a/gcc/testsuite/gcc.target/aarch64/xorsign.c b/gcc/testsuite/gcc.target/aarch64/xorsign.c
index 22c5829449d932bed08de7e453c435ade3b787b2..dfb7ba7f140524507cb79cb06e12c72ad46eb753 100644
--- a/gcc/testsuite/gcc.target/aarch64/xorsign.c
+++ b/gcc/testsuite/gcc.target/aarch64/xorsign.c
@@ -79,8 +79,9 @@ check_l_neg_rev (long double x, long double y)
   return __builtin_copysignl (-1.0, y) * x;
 }
 
-/* { dg-final { scan-assembler "\[ \t\]?eor\[ \t\]?" } } */
-/* { dg-final { scan-assembler "\[ \t\]?and\[ \t\]?" } } */
+/* { dg-final { scan-assembler-times {eor\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+\.16b} 8 } } */
+/* { dg-final { scan-assembler-times {and\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+\.16b} 8 } } */
 /* { dg-final { scan-assembler-not "copysign" } } */
+/* { dg-final { scan-assembler-not "fmov" } } */
 /* { dg-final { scan-assembler-not "\[ \t\]?orr\[ \t\]?" } } */
 /* { dg-final { scan-assembler-not "\[ \t\]?fmul\[ \t\]?" } } */




^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH]AArch64 xorsign: Fix scalar xorsign lowering
  2023-09-01 12:55 [PATCH]AArch64 xorsign: Fix scalar xorsign lowering Tamar Christina
@ 2023-09-01 13:35 ` Richard Sandiford
  2023-09-01 14:04   ` Tamar Christina
  2023-09-19 12:31 ` Tamar Christina
  1 sibling, 1 reply; 5+ messages in thread
From: Richard Sandiford @ 2023-09-01 13:35 UTC (permalink / raw)
  To: Tamar Christina
  Cc: gcc-patches, nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov

Tamar Christina <tamar.christina@arm.com> writes:
> Hi All,
>
> In GCC-9 our scalar xorsign pattern broke and we didn't notice it because the
> testcase was not strong enough.  With this commit
>
> 8d2d39587d941a40f25ea0144cceb677df115040 is the first bad commit
> commit 8d2d39587d941a40f25ea0144cceb677df115040
> Author: Segher Boessenkool <segher@kernel.crashing.org>
> Date:   Mon Oct 22 22:23:39 2018 +0200
>
>     combine: Do not combine moves from hard registers
>
> combine started introducing useless moves on hard registers,  when one of the
> arguments to our scalar xorsign is a hardreg we get an additional move inserted.
>
> This leads to combine forming an AND with the immediate inside and using the
> superflous move to do the r->w move, instead of what we wanted before which was
> for the `and` to be a vector and and have reload pick the right alternative.

IMO, the xorsign optab ought to go away.  IIRC it was just a stop-gap
measure that (like most stop-gap measures) never got cleaned up later.

But that's not important now. :)

> To fix this the patch just forces the use of the vector version directly and
> so combine has no chance to mess it up.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	* config/aarch64/aarch64-simd.md (xorsign<mode>3): Renamed to..
> 	(@xorsign<mode>3): ...This.
> 	* config/aarch64/aarch64.md (xorsign<mode>3): Renamed to...
> 	(@xorsign<mode>3): ..This and emit vectors directly
> 	* config/aarch64/iterators.md (VCONQ): Add SF and DF.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/aarch64/xorsign.c:
>
> --- inline copy of patch -- 
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index f67eb70577d0c2d9911d8c867d38a4d0b390337c..e955691f1be8830efacc237465119764ce2a4942 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -500,7 +500,7 @@ (define_expand "ctz<mode>2"
>    }
>  )
>  
> -(define_expand "xorsign<mode>3"
> +(define_expand "@xorsign<mode>3"
>    [(match_operand:VHSDF 0 "register_operand")
>     (match_operand:VHSDF 1 "register_operand")
>     (match_operand:VHSDF 2 "register_operand")]
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 01cf989641fce8e6c3828f6cfef62e101c4142df..9db82347bf891f9bc40aedecdc8462c94bf1a769 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -6953,31 +6953,20 @@ (define_insn "copysign<GPF:mode>3_insn"
>  ;; EOR   v0.8B, v0.8B, v3.8B
>  ;;
>  
> -(define_expand "xorsign<mode>3"
> +(define_expand "@xorsign<mode>3"
>    [(match_operand:GPF 0 "register_operand")
>     (match_operand:GPF 1 "register_operand")
>     (match_operand:GPF 2 "register_operand")]
>    "TARGET_SIMD"
>  {
> -
> -  machine_mode imode = <V_INT_EQUIV>mode;
> -  rtx mask = gen_reg_rtx (imode);
> -  rtx op1x = gen_reg_rtx (imode);
> -  rtx op2x = gen_reg_rtx (imode);
> -
> -  int bits = GET_MODE_BITSIZE (<MODE>mode) - 1;
> -  emit_move_insn (mask, GEN_INT (trunc_int_for_mode (HOST_WIDE_INT_M1U << bits,
> -						     imode)));
> -
> -  emit_insn (gen_and<v_int_equiv>3 (op2x, mask,
> -				    lowpart_subreg (imode, operands[2],
> -						    <MODE>mode)));
> -  emit_insn (gen_xor<v_int_equiv>3 (op1x,
> -				    lowpart_subreg (imode, operands[1],
> -						    <MODE>mode),
> -				    op2x));
> +  rtx tmp = gen_reg_rtx (<VCONQ>mode);
> +  rtx op1 = gen_reg_rtx (<VCONQ>mode);
> +  rtx op2 = gen_reg_rtx (<VCONQ>mode);
> +  emit_move_insn (op1, lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode));
> +  emit_move_insn (op2, lowpart_subreg (<VCONQ>mode, operands[2], <MODE>mode));
> +  emit_insn (gen_xorsign3(<VCONQ>mode, tmp, op1, op2));

Do we need the extra moves into op1 and op2?  I would have expected the
subregs to be acceptable as direct operands of the xorsign3.  Making
them direct operands should be better, since there's then less risk of
having the same value live in different registers at the same time.

OK with that change if it works.

Also, nit: missing space before "(".

Thanks,
Richard

>    emit_move_insn (operands[0],
> -		  lowpart_subreg (<MODE>mode, op1x, imode));
> +		  lowpart_subreg (<MODE>mode, tmp, <VCONQ>mode));
>    DONE;
>  }
>  )
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 9398d713044433cd89b2a83db5ae7969feb1dcf7..2451d8c2cd8e2da6ac8339eed9bc975cf203fa4c 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1428,7 +1428,8 @@ (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI")
>  			 (V4HF "V8HF") (V8HF "V8HF")
>  			 (V2SF "V4SF") (V4SF "V4SF")
>  			 (V2DF "V2DF") (SI   "V4SI")
> -			 (HI   "V8HI") (QI   "V16QI")])
> +			 (HI   "V8HI") (QI   "V16QI")
> +			 (SF   "V4SF") (DF   "V2DF")])
>  
>  ;; Half modes of all vector modes.
>  (define_mode_attr VHALF [(V8QI "V4QI")  (V16QI "V8QI")
> diff --git a/gcc/testsuite/gcc.target/aarch64/xorsign.c b/gcc/testsuite/gcc.target/aarch64/xorsign.c
> index 22c5829449d932bed08de7e453c435ade3b787b2..dfb7ba7f140524507cb79cb06e12c72ad46eb753 100644
> --- a/gcc/testsuite/gcc.target/aarch64/xorsign.c
> +++ b/gcc/testsuite/gcc.target/aarch64/xorsign.c
> @@ -79,8 +79,9 @@ check_l_neg_rev (long double x, long double y)
>    return __builtin_copysignl (-1.0, y) * x;
>  }
>  
> -/* { dg-final { scan-assembler "\[ \t\]?eor\[ \t\]?" } } */
> -/* { dg-final { scan-assembler "\[ \t\]?and\[ \t\]?" } } */
> +/* { dg-final { scan-assembler-times {eor\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+\.16b} 8 } } */
> +/* { dg-final { scan-assembler-times {and\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+\.16b} 8 } } */
>  /* { dg-final { scan-assembler-not "copysign" } } */
> +/* { dg-final { scan-assembler-not "fmov" } } */
>  /* { dg-final { scan-assembler-not "\[ \t\]?orr\[ \t\]?" } } */
>  /* { dg-final { scan-assembler-not "\[ \t\]?fmul\[ \t\]?" } } */

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH]AArch64 xorsign: Fix scalar xorsign lowering
  2023-09-01 13:35 ` Richard Sandiford
@ 2023-09-01 14:04   ` Tamar Christina
  2023-09-01 15:13     ` Richard Sandiford
  0 siblings, 1 reply; 5+ messages in thread
From: Tamar Christina @ 2023-09-01 14:04 UTC (permalink / raw)
  To: Richard Sandiford
  Cc: gcc-patches, nd, Richard Earnshaw, Marcus Shawcroft, Kyrylo Tkachov



> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Friday, September 1, 2023 2:36 PM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; Marcus Shawcroft
> <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
> Subject: Re: [PATCH]AArch64 xorsign: Fix scalar xorsign lowering
> 
> Tamar Christina <tamar.christina@arm.com> writes:
> > Hi All,
> >
> > In GCC-9 our scalar xorsign pattern broke and we didn't notice it
> > because the testcase was not strong enough.  With this commit
> >
> > 8d2d39587d941a40f25ea0144cceb677df115040 is the first bad commit
> > commit 8d2d39587d941a40f25ea0144cceb677df115040
> > Author: Segher Boessenkool <segher@kernel.crashing.org>
> > Date:   Mon Oct 22 22:23:39 2018 +0200
> >
> >     combine: Do not combine moves from hard registers
> >
> > combine started introducing useless moves on hard registers,  when one
> > of the arguments to our scalar xorsign is a hardreg we get an additional move
> inserted.
> >
> > This leads to combine forming an AND with the immediate inside and
> > using the superflous move to do the r->w move, instead of what we
> > wanted before which was for the `and` to be a vector and and have reload
> pick the right alternative.
> 
> IMO, the xorsign optab ought to go away.  IIRC it was just a stop-gap measure
> that (like most stop-gap measures) never got cleaned up later.
> 
> But that's not important now. :)
> 
> > To fix this the patch just forces the use of the vector version
> > directly and so combine has no chance to mess it up.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > 	* config/aarch64/aarch64-simd.md (xorsign<mode>3): Renamed to..
> > 	(@xorsign<mode>3): ...This.
> > 	* config/aarch64/aarch64.md (xorsign<mode>3): Renamed to...
> > 	(@xorsign<mode>3): ..This and emit vectors directly
> > 	* config/aarch64/iterators.md (VCONQ): Add SF and DF.
> >
> > gcc/testsuite/ChangeLog:
> >
> > 	* gcc.target/aarch64/xorsign.c:
> >
> > --- inline copy of patch --
> > diff --git a/gcc/config/aarch64/aarch64-simd.md
> > b/gcc/config/aarch64/aarch64-simd.md
> > index
> >
> f67eb70577d0c2d9911d8c867d38a4d0b390337c..e955691f1be8830efacc2
> 3746511
> > 9764ce2a4942 100644
> > --- a/gcc/config/aarch64/aarch64-simd.md
> > +++ b/gcc/config/aarch64/aarch64-simd.md
> > @@ -500,7 +500,7 @@ (define_expand "ctz<mode>2"
> >    }
> >  )
> >
> > -(define_expand "xorsign<mode>3"
> > +(define_expand "@xorsign<mode>3"
> >    [(match_operand:VHSDF 0 "register_operand")
> >     (match_operand:VHSDF 1 "register_operand")
> >     (match_operand:VHSDF 2 "register_operand")] diff --git
> > a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index
> >
> 01cf989641fce8e6c3828f6cfef62e101c4142df..9db82347bf891f9bc40aede
> cdc84
> > 62c94bf1a769 100644
> > --- a/gcc/config/aarch64/aarch64.md
> > +++ b/gcc/config/aarch64/aarch64.md
> > @@ -6953,31 +6953,20 @@ (define_insn "copysign<GPF:mode>3_insn"
> >  ;; EOR   v0.8B, v0.8B, v3.8B
> >  ;;
> >
> > -(define_expand "xorsign<mode>3"
> > +(define_expand "@xorsign<mode>3"
> >    [(match_operand:GPF 0 "register_operand")
> >     (match_operand:GPF 1 "register_operand")
> >     (match_operand:GPF 2 "register_operand")]
> >    "TARGET_SIMD"
> >  {
> > -
> > -  machine_mode imode = <V_INT_EQUIV>mode;
> > -  rtx mask = gen_reg_rtx (imode);
> > -  rtx op1x = gen_reg_rtx (imode);
> > -  rtx op2x = gen_reg_rtx (imode);
> > -
> > -  int bits = GET_MODE_BITSIZE (<MODE>mode) - 1;
> > -  emit_move_insn (mask, GEN_INT (trunc_int_for_mode
> (HOST_WIDE_INT_M1U << bits,
> > -						     imode)));
> > -
> > -  emit_insn (gen_and<v_int_equiv>3 (op2x, mask,
> > -				    lowpart_subreg (imode, operands[2],
> > -						    <MODE>mode)));
> > -  emit_insn (gen_xor<v_int_equiv>3 (op1x,
> > -				    lowpart_subreg (imode, operands[1],
> > -						    <MODE>mode),
> > -				    op2x));
> > +  rtx tmp = gen_reg_rtx (<VCONQ>mode);  rtx op1 = gen_reg_rtx
> > + (<VCONQ>mode);  rtx op2 = gen_reg_rtx (<VCONQ>mode);
> emit_move_insn
> > + (op1, lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode));
> > + emit_move_insn (op2, lowpart_subreg (<VCONQ>mode, operands[2],
> > + <MODE>mode));  emit_insn (gen_xorsign3(<VCONQ>mode, tmp, op1,
> op2));
> 
> Do we need the extra moves into op1 and op2?  I would have expected the
> subregs to be acceptable as direct operands of the xorsign3.  Making them
> direct operands should be better, since there's then less risk of having the
> same value live in different registers at the same time.
> 

That was the first thing I tried but it doesn't work because validate_subreg seems
to have the invariant that you can either change mode between the same size
or make it paradoxical but not both at the same time.

i.e. it rejects subreg:V2DI (subreg:DI (reg:DF))), and lowpart_subreg folds it to
NULL_RTX. Because the lowering when the input is a subreg takes the mode of
the original RTX. i.e. the above is folder to subreg:V2DI (reg:DF) which is not valid.

I tried creating it directly through gen_rtx_SUBREG but that hits an assert in
RTL validation later with the same restriction.

So the moves are their to maintain the invariant.

Cheers,
Tamar

> OK with that change if it works.
> 
> Also, nit: missing space before "(".
> 
> Thanks,
> Richard
> 
> >    emit_move_insn (operands[0],
> > -		  lowpart_subreg (<MODE>mode, op1x, imode));
> > +		  lowpart_subreg (<MODE>mode, tmp, <VCONQ>mode));
> >    DONE;
> >  }
> >  )
> > diff --git a/gcc/config/aarch64/iterators.md
> > b/gcc/config/aarch64/iterators.md index
> >
> 9398d713044433cd89b2a83db5ae7969feb1dcf7..2451d8c2cd8e2da6ac83
> 39eed9bc
> > 975cf203fa4c 100644
> > --- a/gcc/config/aarch64/iterators.md
> > +++ b/gcc/config/aarch64/iterators.md
> > @@ -1428,7 +1428,8 @@ (define_mode_attr VCONQ [(V8QI "V16QI")
> (V16QI "V16QI")
> >  			 (V4HF "V8HF") (V8HF "V8HF")
> >  			 (V2SF "V4SF") (V4SF "V4SF")
> >  			 (V2DF "V2DF") (SI   "V4SI")
> > -			 (HI   "V8HI") (QI   "V16QI")])
> > +			 (HI   "V8HI") (QI   "V16QI")
> > +			 (SF   "V4SF") (DF   "V2DF")])
> >
> >  ;; Half modes of all vector modes.
> >  (define_mode_attr VHALF [(V8QI "V4QI")  (V16QI "V8QI") diff --git
> > a/gcc/testsuite/gcc.target/aarch64/xorsign.c
> > b/gcc/testsuite/gcc.target/aarch64/xorsign.c
> > index
> >
> 22c5829449d932bed08de7e453c435ade3b787b2..dfb7ba7f140524507cb7
> 9cb06e12
> > c72ad46eb753 100644
> > --- a/gcc/testsuite/gcc.target/aarch64/xorsign.c
> > +++ b/gcc/testsuite/gcc.target/aarch64/xorsign.c
> > @@ -79,8 +79,9 @@ check_l_neg_rev (long double x, long double y)
> >    return __builtin_copysignl (-1.0, y) * x;  }
> >
> > -/* { dg-final { scan-assembler "\[ \t\]?eor\[ \t\]?" } } */
> > -/* { dg-final { scan-assembler "\[ \t\]?and\[ \t\]?" } } */
> > +/* { dg-final { scan-assembler-times {eor\tv[0-9]+\.16b,
> > +v[0-9]+\.16b, v[0-9]+\.16b} 8 } } */
> > +/* { dg-final { scan-assembler-times {and\tv[0-9]+\.16b,
> > +v[0-9]+\.16b, v[0-9]+\.16b} 8 } } */
> >  /* { dg-final { scan-assembler-not "copysign" } } */
> > +/* { dg-final { scan-assembler-not "fmov" } } */
> >  /* { dg-final { scan-assembler-not "\[ \t\]?orr\[ \t\]?" } } */
> >  /* { dg-final { scan-assembler-not "\[ \t\]?fmul\[ \t\]?" } } */

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH]AArch64 xorsign: Fix scalar xorsign lowering
  2023-09-01 14:04   ` Tamar Christina
@ 2023-09-01 15:13     ` Richard Sandiford
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Sandiford @ 2023-09-01 15:13 UTC (permalink / raw)
  To: Tamar Christina
  Cc: gcc-patches, nd, Richard Earnshaw, Marcus Shawcroft, Kyrylo Tkachov

Tamar Christina <Tamar.Christina@arm.com> writes:
>> -----Original Message-----
>> From: Richard Sandiford <richard.sandiford@arm.com>
>> Sent: Friday, September 1, 2023 2:36 PM
>> To: Tamar Christina <Tamar.Christina@arm.com>
>> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw
>> <Richard.Earnshaw@arm.com>; Marcus Shawcroft
>> <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
>> Subject: Re: [PATCH]AArch64 xorsign: Fix scalar xorsign lowering
>> 
>> Tamar Christina <tamar.christina@arm.com> writes:
>> > Hi All,
>> >
>> > In GCC-9 our scalar xorsign pattern broke and we didn't notice it
>> > because the testcase was not strong enough.  With this commit
>> >
>> > 8d2d39587d941a40f25ea0144cceb677df115040 is the first bad commit
>> > commit 8d2d39587d941a40f25ea0144cceb677df115040
>> > Author: Segher Boessenkool <segher@kernel.crashing.org>
>> > Date:   Mon Oct 22 22:23:39 2018 +0200
>> >
>> >     combine: Do not combine moves from hard registers
>> >
>> > combine started introducing useless moves on hard registers,  when one
>> > of the arguments to our scalar xorsign is a hardreg we get an additional move
>> inserted.
>> >
>> > This leads to combine forming an AND with the immediate inside and
>> > using the superflous move to do the r->w move, instead of what we
>> > wanted before which was for the `and` to be a vector and and have reload
>> pick the right alternative.
>> 
>> IMO, the xorsign optab ought to go away.  IIRC it was just a stop-gap measure
>> that (like most stop-gap measures) never got cleaned up later.
>> 
>> But that's not important now. :)
>> 
>> > To fix this the patch just forces the use of the vector version
>> > directly and so combine has no chance to mess it up.
>> >
>> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>> >
>> > Ok for master?
>> >
>> > Thanks,
>> > Tamar
>> >
>> > gcc/ChangeLog:
>> >
>> > 	* config/aarch64/aarch64-simd.md (xorsign<mode>3): Renamed to..
>> > 	(@xorsign<mode>3): ...This.
>> > 	* config/aarch64/aarch64.md (xorsign<mode>3): Renamed to...
>> > 	(@xorsign<mode>3): ..This and emit vectors directly
>> > 	* config/aarch64/iterators.md (VCONQ): Add SF and DF.
>> >
>> > gcc/testsuite/ChangeLog:
>> >
>> > 	* gcc.target/aarch64/xorsign.c:
>> >
>> > --- inline copy of patch --
>> > diff --git a/gcc/config/aarch64/aarch64-simd.md
>> > b/gcc/config/aarch64/aarch64-simd.md
>> > index
>> >
>> f67eb70577d0c2d9911d8c867d38a4d0b390337c..e955691f1be8830efacc2
>> 3746511
>> > 9764ce2a4942 100644
>> > --- a/gcc/config/aarch64/aarch64-simd.md
>> > +++ b/gcc/config/aarch64/aarch64-simd.md
>> > @@ -500,7 +500,7 @@ (define_expand "ctz<mode>2"
>> >    }
>> >  )
>> >
>> > -(define_expand "xorsign<mode>3"
>> > +(define_expand "@xorsign<mode>3"
>> >    [(match_operand:VHSDF 0 "register_operand")
>> >     (match_operand:VHSDF 1 "register_operand")
>> >     (match_operand:VHSDF 2 "register_operand")] diff --git
>> > a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index
>> >
>> 01cf989641fce8e6c3828f6cfef62e101c4142df..9db82347bf891f9bc40aede
>> cdc84
>> > 62c94bf1a769 100644
>> > --- a/gcc/config/aarch64/aarch64.md
>> > +++ b/gcc/config/aarch64/aarch64.md
>> > @@ -6953,31 +6953,20 @@ (define_insn "copysign<GPF:mode>3_insn"
>> >  ;; EOR   v0.8B, v0.8B, v3.8B
>> >  ;;
>> >
>> > -(define_expand "xorsign<mode>3"
>> > +(define_expand "@xorsign<mode>3"
>> >    [(match_operand:GPF 0 "register_operand")
>> >     (match_operand:GPF 1 "register_operand")
>> >     (match_operand:GPF 2 "register_operand")]
>> >    "TARGET_SIMD"
>> >  {
>> > -
>> > -  machine_mode imode = <V_INT_EQUIV>mode;
>> > -  rtx mask = gen_reg_rtx (imode);
>> > -  rtx op1x = gen_reg_rtx (imode);
>> > -  rtx op2x = gen_reg_rtx (imode);
>> > -
>> > -  int bits = GET_MODE_BITSIZE (<MODE>mode) - 1;
>> > -  emit_move_insn (mask, GEN_INT (trunc_int_for_mode
>> (HOST_WIDE_INT_M1U << bits,
>> > -						     imode)));
>> > -
>> > -  emit_insn (gen_and<v_int_equiv>3 (op2x, mask,
>> > -				    lowpart_subreg (imode, operands[2],
>> > -						    <MODE>mode)));
>> > -  emit_insn (gen_xor<v_int_equiv>3 (op1x,
>> > -				    lowpart_subreg (imode, operands[1],
>> > -						    <MODE>mode),
>> > -				    op2x));
>> > +  rtx tmp = gen_reg_rtx (<VCONQ>mode);  rtx op1 = gen_reg_rtx
>> > + (<VCONQ>mode);  rtx op2 = gen_reg_rtx (<VCONQ>mode);
>> emit_move_insn
>> > + (op1, lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode));
>> > + emit_move_insn (op2, lowpart_subreg (<VCONQ>mode, operands[2],
>> > + <MODE>mode));  emit_insn (gen_xorsign3(<VCONQ>mode, tmp, op1,
>> op2));
>> 
>> Do we need the extra moves into op1 and op2?  I would have expected the
>> subregs to be acceptable as direct operands of the xorsign3.  Making them
>> direct operands should be better, since there's then less risk of having the
>> same value live in different registers at the same time.
>> 
>
> That was the first thing I tried but it doesn't work because validate_subreg seems
> to have the invariant that you can either change mode between the same size
> or make it paradoxical but not both at the same time.
>
> i.e. it rejects subreg:V2DI (subreg:DI (reg:DF))), and lowpart_subreg folds it to
> NULL_RTX. Because the lowering when the input is a subreg takes the mode of
> the original RTX. i.e. the above is folder to subreg:V2DI (reg:DF) which is not valid.

Gah, I'd forgotten about that.

I think we should relax that rule.  validate_subreg was originally added
to prevent some pretty egregious uses of subregs, but as the code shows,
it was never possible to lock things down as much as hoped.  And I'm not
sure it makes as much sense to try these days.  I think we have a pretty
settled idea of what subregs mean in a target-independent sense, and we
now have better ways than we did before for targets to say which subregs
they support natively for a given register class.

(subreg:V2DI (reg:DF X)) and (subreg:V2DF (reg:DI X)) are IMO perfectly
well-defined in a target-independent sense, so I think we should relax:

  else if (VECTOR_MODE_P (omode)
	   && GET_MODE_INNER (omode) == GET_MODE_INNER (imode))
    ;

to:

  else if (VECTOR_MODE_P (omode)
	   && GET_MODE_UNIT_SIZE (omode) == GET_MODE_UNIT_SIZE (imode))
    ;

Preapproved if it works & passes bootstrap on aarch64-linux-gnu,
powerpc64el-linux-gnu and x86_64-linux-gnu, unless someone objects
beforehand.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH]AArch64 xorsign: Fix scalar xorsign lowering
  2023-09-01 12:55 [PATCH]AArch64 xorsign: Fix scalar xorsign lowering Tamar Christina
  2023-09-01 13:35 ` Richard Sandiford
@ 2023-09-19 12:31 ` Tamar Christina
  1 sibling, 0 replies; 5+ messages in thread
From: Tamar Christina @ 2023-09-19 12:31 UTC (permalink / raw)
  To: gcc-patches
  Cc: nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov,
	richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 5145 bytes --]

Hi All,

In GCC-9 our scalar xorsign pattern broke and we didn't notice it because the
testcase was not strong enough.  With this commit

8d2d39587d941a40f25ea0144cceb677df115040 is the first bad commit
commit 8d2d39587d941a40f25ea0144cceb677df115040
Author: Segher Boessenkool <segher@kernel.crashing.org>
Date:   Mon Oct 22 22:23:39 2018 +0200

    combine: Do not combine moves from hard registers

combine started introducing useless moves on hard registers,  when one of the
arguments to our scalar xorsign is a hardreg we get an additional move inserted.

This leads to combine forming an AND with the immediate inside and using the
superflous move to do the r->w move, instead of what we wanted before which was
for the `and` to be a vector and and have reload pick the right alternative.

To fix this the patch just forces the use of the vector version directly and
so combine has no chance to mess it up.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Patch was pre-approved, so posting on the list for archival.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2023-September/629119.html

Pushing to master.

Thanks,
Tamar

gcc/ChangeLog:

	* config/aarch64/aarch64-simd.md (xorsign<mode>3): Renamed to..
	(@xorsign<mode>3): ...This.
	* config/aarch64/aarch64.md (xorsign<mode>3): Renamed to...
	(@xorsign<mode>3): ..This and emit vectors directly
	* config/aarch64/iterators.md (VCONQ): Add SF and DF.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/xorsign.c:

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index f67eb70577d0c2d9911d8c867d38a4d0b390337c..e955691f1be8830efacc237465119764ce2a4942 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -500,7 +500,7 @@ (define_expand "ctz<mode>2"
   }
 )
 
-(define_expand "xorsign<mode>3"
+(define_expand "@xorsign<mode>3"
   [(match_operand:VHSDF 0 "register_operand")
    (match_operand:VHSDF 1 "register_operand")
    (match_operand:VHSDF 2 "register_operand")]
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 01cf989641fce8e6c3828f6cfef62e101c4142df..8071422b762955b81546d4325aa277842384da3a 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6953,31 +6953,18 @@ (define_insn "copysign<GPF:mode>3_insn"
 ;; EOR   v0.8B, v0.8B, v3.8B
 ;;
 
-(define_expand "xorsign<mode>3"
+(define_expand "@xorsign<mode>3"
   [(match_operand:GPF 0 "register_operand")
    (match_operand:GPF 1 "register_operand")
    (match_operand:GPF 2 "register_operand")]
   "TARGET_SIMD"
 {
-
-  machine_mode imode = <V_INT_EQUIV>mode;
-  rtx mask = gen_reg_rtx (imode);
-  rtx op1x = gen_reg_rtx (imode);
-  rtx op2x = gen_reg_rtx (imode);
-
-  int bits = GET_MODE_BITSIZE (<MODE>mode) - 1;
-  emit_move_insn (mask, GEN_INT (trunc_int_for_mode (HOST_WIDE_INT_M1U << bits,
-						     imode)));
-
-  emit_insn (gen_and<v_int_equiv>3 (op2x, mask,
-				    lowpart_subreg (imode, operands[2],
-						    <MODE>mode)));
-  emit_insn (gen_xor<v_int_equiv>3 (op1x,
-				    lowpart_subreg (imode, operands[1],
-						    <MODE>mode),
-				    op2x));
+  rtx tmp = gen_reg_rtx (<VCONQ>mode);
+  rtx op1 = lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode);
+  rtx op2 = lowpart_subreg (<VCONQ>mode, operands[2], <MODE>mode);
+  emit_insn (gen_xorsign3 (<VCONQ>mode, tmp, op1, op2));
   emit_move_insn (operands[0],
-		  lowpart_subreg (<MODE>mode, op1x, imode));
+		  lowpart_subreg (<MODE>mode, tmp, <VCONQ>mode));
   DONE;
 }
 )
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 9398d713044433cd89b2a83db5ae7969feb1dcf7..2451d8c2cd8e2da6ac8339eed9bc975cf203fa4c 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1428,7 +1428,8 @@ (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI")
 			 (V4HF "V8HF") (V8HF "V8HF")
 			 (V2SF "V4SF") (V4SF "V4SF")
 			 (V2DF "V2DF") (SI   "V4SI")
-			 (HI   "V8HI") (QI   "V16QI")])
+			 (HI   "V8HI") (QI   "V16QI")
+			 (SF   "V4SF") (DF   "V2DF")])
 
 ;; Half modes of all vector modes.
 (define_mode_attr VHALF [(V8QI "V4QI")  (V16QI "V8QI")
diff --git a/gcc/testsuite/gcc.target/aarch64/xorsign.c b/gcc/testsuite/gcc.target/aarch64/xorsign.c
index 22c5829449d932bed08de7e453c435ade3b787b2..dfb7ba7f140524507cb79cb06e12c72ad46eb753 100644
--- a/gcc/testsuite/gcc.target/aarch64/xorsign.c
+++ b/gcc/testsuite/gcc.target/aarch64/xorsign.c
@@ -79,8 +79,9 @@ check_l_neg_rev (long double x, long double y)
   return __builtin_copysignl (-1.0, y) * x;
 }
 
-/* { dg-final { scan-assembler "\[ \t\]?eor\[ \t\]?" } } */
-/* { dg-final { scan-assembler "\[ \t\]?and\[ \t\]?" } } */
+/* { dg-final { scan-assembler-times {eor\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+\.16b} 8 } } */
+/* { dg-final { scan-assembler-times {and\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+\.16b} 8 } } */
 /* { dg-final { scan-assembler-not "copysign" } } */
+/* { dg-final { scan-assembler-not "fmov" } } */
 /* { dg-final { scan-assembler-not "\[ \t\]?orr\[ \t\]?" } } */
 /* { dg-final { scan-assembler-not "\[ \t\]?fmul\[ \t\]?" } } */




-- 

[-- Attachment #2: rb17716.patch --]
[-- Type: text/plain, Size: 3620 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index f67eb70577d0c2d9911d8c867d38a4d0b390337c..e955691f1be8830efacc237465119764ce2a4942 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -500,7 +500,7 @@ (define_expand "ctz<mode>2"
   }
 )
 
-(define_expand "xorsign<mode>3"
+(define_expand "@xorsign<mode>3"
   [(match_operand:VHSDF 0 "register_operand")
    (match_operand:VHSDF 1 "register_operand")
    (match_operand:VHSDF 2 "register_operand")]
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 01cf989641fce8e6c3828f6cfef62e101c4142df..8071422b762955b81546d4325aa277842384da3a 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6953,31 +6953,18 @@ (define_insn "copysign<GPF:mode>3_insn"
 ;; EOR   v0.8B, v0.8B, v3.8B
 ;;
 
-(define_expand "xorsign<mode>3"
+(define_expand "@xorsign<mode>3"
   [(match_operand:GPF 0 "register_operand")
    (match_operand:GPF 1 "register_operand")
    (match_operand:GPF 2 "register_operand")]
   "TARGET_SIMD"
 {
-
-  machine_mode imode = <V_INT_EQUIV>mode;
-  rtx mask = gen_reg_rtx (imode);
-  rtx op1x = gen_reg_rtx (imode);
-  rtx op2x = gen_reg_rtx (imode);
-
-  int bits = GET_MODE_BITSIZE (<MODE>mode) - 1;
-  emit_move_insn (mask, GEN_INT (trunc_int_for_mode (HOST_WIDE_INT_M1U << bits,
-						     imode)));
-
-  emit_insn (gen_and<v_int_equiv>3 (op2x, mask,
-				    lowpart_subreg (imode, operands[2],
-						    <MODE>mode)));
-  emit_insn (gen_xor<v_int_equiv>3 (op1x,
-				    lowpart_subreg (imode, operands[1],
-						    <MODE>mode),
-				    op2x));
+  rtx tmp = gen_reg_rtx (<VCONQ>mode);
+  rtx op1 = lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode);
+  rtx op2 = lowpart_subreg (<VCONQ>mode, operands[2], <MODE>mode);
+  emit_insn (gen_xorsign3 (<VCONQ>mode, tmp, op1, op2));
   emit_move_insn (operands[0],
-		  lowpart_subreg (<MODE>mode, op1x, imode));
+		  lowpart_subreg (<MODE>mode, tmp, <VCONQ>mode));
   DONE;
 }
 )
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 9398d713044433cd89b2a83db5ae7969feb1dcf7..2451d8c2cd8e2da6ac8339eed9bc975cf203fa4c 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1428,7 +1428,8 @@ (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI")
 			 (V4HF "V8HF") (V8HF "V8HF")
 			 (V2SF "V4SF") (V4SF "V4SF")
 			 (V2DF "V2DF") (SI   "V4SI")
-			 (HI   "V8HI") (QI   "V16QI")])
+			 (HI   "V8HI") (QI   "V16QI")
+			 (SF   "V4SF") (DF   "V2DF")])
 
 ;; Half modes of all vector modes.
 (define_mode_attr VHALF [(V8QI "V4QI")  (V16QI "V8QI")
diff --git a/gcc/testsuite/gcc.target/aarch64/xorsign.c b/gcc/testsuite/gcc.target/aarch64/xorsign.c
index 22c5829449d932bed08de7e453c435ade3b787b2..dfb7ba7f140524507cb79cb06e12c72ad46eb753 100644
--- a/gcc/testsuite/gcc.target/aarch64/xorsign.c
+++ b/gcc/testsuite/gcc.target/aarch64/xorsign.c
@@ -79,8 +79,9 @@ check_l_neg_rev (long double x, long double y)
   return __builtin_copysignl (-1.0, y) * x;
 }
 
-/* { dg-final { scan-assembler "\[ \t\]?eor\[ \t\]?" } } */
-/* { dg-final { scan-assembler "\[ \t\]?and\[ \t\]?" } } */
+/* { dg-final { scan-assembler-times {eor\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+\.16b} 8 } } */
+/* { dg-final { scan-assembler-times {and\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+\.16b} 8 } } */
 /* { dg-final { scan-assembler-not "copysign" } } */
+/* { dg-final { scan-assembler-not "fmov" } } */
 /* { dg-final { scan-assembler-not "\[ \t\]?orr\[ \t\]?" } } */
 /* { dg-final { scan-assembler-not "\[ \t\]?fmul\[ \t\]?" } } */




^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-09-19 12:31 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-01 12:55 [PATCH]AArch64 xorsign: Fix scalar xorsign lowering Tamar Christina
2023-09-01 13:35 ` Richard Sandiford
2023-09-01 14:04   ` Tamar Christina
2023-09-01 15:13     ` Richard Sandiford
2023-09-19 12:31 ` Tamar Christina

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).