[PATCH] aarch64: Add fcsel to cmov integer and csel to float cmov [PR98477]

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] aarch64: Add fcsel to cmov integer and csel to float cmov [PR98477]
@ 2024-05-06 18:27 Andrew Pinski
  2024-06-10 11:26 ` Richard Sandiford
  0 siblings, 1 reply; 2+ messages in thread
From: Andrew Pinski @ 2024-05-06 18:27 UTC (permalink / raw)
  To: gcc-patches; +Cc: Andrew Pinski

This patch adds an alternative to the integer cmov and one to floating
point cmov so we avoid in some more moving

	PR target/98477

gcc/ChangeLog:

	* config/aarch64/aarch64.md (*cmov<mode>_insn[GPI]): Add 'w'
	alternative.
	(*cmov<mode>_insn[GPF]): Add 'r' alternative.
	* config/aarch64/iterators.md (wv): New mode attr.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/csel_1.c: New test.
	* gcc.target/aarch64/fcsel_2.c: New test.

Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
---
 gcc/config/aarch64/aarch64.md              | 13 +++++++----
 gcc/config/aarch64/iterators.md            |  4 ++++
 gcc/testsuite/gcc.target/aarch64/csel_1.c  | 27 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/fcsel_2.c | 20 ++++++++++++++++
 4 files changed, 59 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/csel_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/fcsel_2.c

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 2bdd443e71d..a6cedd0f1b8 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4404,6 +4404,7 @@ (define_insn "*cmov<mode>_insn"
      [ r        , Ui1 , rZ  ; csel        ] csinc\t%<w>0, %<w>4, <w>zr, %M1
      [ r        , UsM , UsM ; mov_imm     ] mov\t%<w>0, -1
      [ r        , Ui1 , Ui1 ; mov_imm     ] mov\t%<w>0, 1
+     [ w        , w   , w   ; fcsel       ] fcsel\t%<wv>0, %<wv>3, %<wv>4, %m1
   }
 )
 
@@ -4464,15 +4465,17 @@ (define_insn "*cmovdi_insn_uxtw"
 )
 
 (define_insn "*cmov<mode>_insn"
-  [(set (match_operand:GPF 0 "register_operand" "=w")
+  [(set (match_operand:GPF 0 "register_operand" "=r,w")
 	(if_then_else:GPF
 	 (match_operator 1 "aarch64_comparison_operator"
 	  [(match_operand 2 "cc_register" "") (const_int 0)])
-	 (match_operand:GPF 3 "register_operand" "w")
-	 (match_operand:GPF 4 "register_operand" "w")))]
+	 (match_operand:GPF 3 "register_operand" "r,w")
+	 (match_operand:GPF 4 "register_operand" "r,w")))]
   "TARGET_FLOAT"
-  "fcsel\\t%<s>0, %<s>3, %<s>4, %m1"
-  [(set_attr "type" "fcsel")]
+  "@
+   csel\t%<single_wx>0, %<single_wx>3, %<single_wx>4, %m1
+   fcsel\\t%<s>0, %<s>3, %<s>4, %m1"
+  [(set_attr "type" "fcsel,csel")]
 )
 
 (define_expand "mov<mode>cc"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 99cde46f1ba..42303f2ec02 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1147,6 +1147,10 @@ (define_mode_attr e [(CCFP "") (CCFPE "e")])
 ;; 32-bit version and "%x0" in the 64-bit version.
 (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
 
+;; For cmov<mode> template to be used with fscel instruction
+(define_mode_attr wv [(QI "s") (HI "s") (SI "s") (DI "d") (SF "s") (DF "d")])
+
+
 ;; The size of access, in bytes.
 (define_mode_attr ldst_sz [(SI "4") (DI "8")])
 ;; Likewise for load/store pair.
diff --git a/gcc/testsuite/gcc.target/aarch64/csel_1.c b/gcc/testsuite/gcc.target/aarch64/csel_1.c
new file mode 100644
index 00000000000..5848e5be2ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/csel_1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-ssa-phiopt" } */
+/* PR target/98477 */
+
+/* We should be able to produce csel followed by a store
+   and not move between the GPRs and simd registers. */
+/* Note -fno-ssa-phiopt is needed, otherwise the tree level
+   does the VCE after the cmov which allowed to use the csel
+   instruction. */
+_Static_assert (sizeof(long long) == sizeof(double));
+void
+foo (int a, double *b, long long c, long long d)
+{
+  double ct;
+  double dt;
+  __builtin_memcpy(&ct, &c, sizeof(long long));
+  __builtin_memcpy(&dt, &d, sizeof(long long));
+  double t = a ? ct : dt;
+  *b = t;
+}
+
+/* { dg-final { scan-assembler-not "\tfcsel\t"  } } */
+/* { dg-final { scan-assembler-times "\tcsel\t" 1 } } */
+/* The store should still happen from the GPRs */
+/* { dg-final { scan-assembler-not "\tstr\td"  } } */
+/* { dg-final { scan-assembler-times "\tstr\tx" 1 } } */
+/* { dg-final { scan-assembler-not "\tfmov\t" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fcsel_2.c b/gcc/testsuite/gcc.target/aarch64/fcsel_2.c
new file mode 100644
index 00000000000..309e8cbe37f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fcsel_2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* PR target/98477 */
+
+#define vector16 __attribute__((vector_size(16)))
+/* We should be able to produce fscel followed by a store
+   and not move between the GPRs and simd registers. */
+void
+foo (int a, int *b, vector16 int c, vector16 int d)
+{
+  int t = a ? c[0] : d[0];
+  *b = t;
+}
+
+/* { dg-final { scan-assembler-times "\tfcsel\t" 1 } } */
+/* { dg-final { scan-assembler-not "\tcsel\t" } } */
+/* The store should still happen from the simd register */
+/* { dg-final { scan-assembler-times "\tstr\ts" 1 } } */
+/* { dg-final { scan-assembler-not "\tstr\tw" } } */
+/* { dg-final { scan-assembler-not "\tfmov\t" } } */
-- 
2.43.0


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] aarch64: Add fcsel to cmov integer and csel to float cmov [PR98477]
  2024-05-06 18:27 [PATCH] aarch64: Add fcsel to cmov integer and csel to float cmov [PR98477] Andrew Pinski
@ 2024-06-10 11:26 ` Richard Sandiford
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Sandiford @ 2024-06-10 11:26 UTC (permalink / raw)
  To: Andrew Pinski; +Cc: gcc-patches

Andrew Pinski <quic_apinski@quicinc.com> writes:
> This patch adds an alternative to the integer cmov and one to floating
> point cmov so we avoid in some more moving
>
> 	PR target/98477
>
> gcc/ChangeLog:
>
> 	* config/aarch64/aarch64.md (*cmov<mode>_insn[GPI]): Add 'w'
> 	alternative.
> 	(*cmov<mode>_insn[GPF]): Add 'r' alternative.
> 	* config/aarch64/iterators.md (wv): New mode attr.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/aarch64/csel_1.c: New test.
> 	* gcc.target/aarch64/fcsel_2.c: New test.

This seems a bit dangerous while PR114766 remains unresolved.
The problem (AIUI) is that adding r and w alternatives to the
same pattern means that, when computing the cost of each register
class, r and w are equally cheap for each operand in isolation,
without the interdependencies being modelled.  (This is because
class preferences are calculated on a per-register basis.)

E.g. if:

- insn I1 is op0 = fn(op1, op2)
- I1 provides r and w alternatives
- other uses of op0 and op1 prefer r
- other uses of op2 prefer w

then I1 will not influence the costs of op0, op1 or op2, and so
I1 effectively will provide a zero-cost cross-over between r and w.

There again, we already have other patterns with this problem.  And I
think we do need to make it work somehow.  It's just a question of
whether we should pause adding more "r or w" alternatives until the
problem is fixed, or whether we should treat adding more alternatives
and fixing the RA problem as parallel work.

> Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> ---
>  gcc/config/aarch64/aarch64.md              | 13 +++++++----
>  gcc/config/aarch64/iterators.md            |  4 ++++
>  gcc/testsuite/gcc.target/aarch64/csel_1.c  | 27 ++++++++++++++++++++++
>  gcc/testsuite/gcc.target/aarch64/fcsel_2.c | 20 ++++++++++++++++
>  4 files changed, 59 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/csel_1.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/fcsel_2.c
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 2bdd443e71d..a6cedd0f1b8 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -4404,6 +4404,7 @@ (define_insn "*cmov<mode>_insn"
>       [ r        , Ui1 , rZ  ; csel        ] csinc\t%<w>0, %<w>4, <w>zr, %M1
>       [ r        , UsM , UsM ; mov_imm     ] mov\t%<w>0, -1
>       [ r        , Ui1 , Ui1 ; mov_imm     ] mov\t%<w>0, 1
> +     [ w        , w   , w   ; fcsel       ] fcsel\t%<wv>0, %<wv>3, %<wv>4, %m1
>    }
>  )
>  
> @@ -4464,15 +4465,17 @@ (define_insn "*cmovdi_insn_uxtw"
>  )
>  
>  (define_insn "*cmov<mode>_insn"
> -  [(set (match_operand:GPF 0 "register_operand" "=w")
> +  [(set (match_operand:GPF 0 "register_operand" "=r,w")
>  	(if_then_else:GPF
>  	 (match_operator 1 "aarch64_comparison_operator"
>  	  [(match_operand 2 "cc_register" "") (const_int 0)])
> -	 (match_operand:GPF 3 "register_operand" "w")
> -	 (match_operand:GPF 4 "register_operand" "w")))]
> +	 (match_operand:GPF 3 "register_operand" "r,w")
> +	 (match_operand:GPF 4 "register_operand" "r,w")))]
>    "TARGET_FLOAT"
> -  "fcsel\\t%<s>0, %<s>3, %<s>4, %m1"
> -  [(set_attr "type" "fcsel")]
> +  "@
> +   csel\t%<single_wx>0, %<single_wx>3, %<single_wx>4, %m1
> +   fcsel\\t%<s>0, %<s>3, %<s>4, %m1"
> +  [(set_attr "type" "fcsel,csel")]
>  )

I think we should use the new syntax for all new insns with more than
one alternative.

Thanks,
Richard

>  
>  (define_expand "mov<mode>cc"
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 99cde46f1ba..42303f2ec02 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1147,6 +1147,10 @@ (define_mode_attr e [(CCFP "") (CCFPE "e")])
>  ;; 32-bit version and "%x0" in the 64-bit version.
>  (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
>  
> +;; For cmov<mode> template to be used with fscel instruction
> +(define_mode_attr wv [(QI "s") (HI "s") (SI "s") (DI "d") (SF "s") (DF "d")])
> +
> +
>  ;; The size of access, in bytes.
>  (define_mode_attr ldst_sz [(SI "4") (DI "8")])
>  ;; Likewise for load/store pair.
> diff --git a/gcc/testsuite/gcc.target/aarch64/csel_1.c b/gcc/testsuite/gcc.target/aarch64/csel_1.c
> new file mode 100644
> index 00000000000..5848e5be2ff
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/csel_1.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-ssa-phiopt" } */
> +/* PR target/98477 */
> +
> +/* We should be able to produce csel followed by a store
> +   and not move between the GPRs and simd registers. */
> +/* Note -fno-ssa-phiopt is needed, otherwise the tree level
> +   does the VCE after the cmov which allowed to use the csel
> +   instruction. */
> +_Static_assert (sizeof(long long) == sizeof(double));
> +void
> +foo (int a, double *b, long long c, long long d)
> +{
> +  double ct;
> +  double dt;
> +  __builtin_memcpy(&ct, &c, sizeof(long long));
> +  __builtin_memcpy(&dt, &d, sizeof(long long));
> +  double t = a ? ct : dt;
> +  *b = t;
> +}
> +
> +/* { dg-final { scan-assembler-not "\tfcsel\t"  } } */
> +/* { dg-final { scan-assembler-times "\tcsel\t" 1 } } */
> +/* The store should still happen from the GPRs */
> +/* { dg-final { scan-assembler-not "\tstr\td"  } } */
> +/* { dg-final { scan-assembler-times "\tstr\tx" 1 } } */
> +/* { dg-final { scan-assembler-not "\tfmov\t" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/fcsel_2.c b/gcc/testsuite/gcc.target/aarch64/fcsel_2.c
> new file mode 100644
> index 00000000000..309e8cbe37f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fcsel_2.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* PR target/98477 */
> +
> +#define vector16 __attribute__((vector_size(16)))
> +/* We should be able to produce fscel followed by a store
> +   and not move between the GPRs and simd registers. */
> +void
> +foo (int a, int *b, vector16 int c, vector16 int d)
> +{
> +  int t = a ? c[0] : d[0];
> +  *b = t;
> +}
> +
> +/* { dg-final { scan-assembler-times "\tfcsel\t" 1 } } */
> +/* { dg-final { scan-assembler-not "\tcsel\t" } } */
> +/* The store should still happen from the simd register */
> +/* { dg-final { scan-assembler-times "\tstr\ts" 1 } } */
> +/* { dg-final { scan-assembler-not "\tstr\tw" } } */
> +/* { dg-final { scan-assembler-not "\tfmov\t" } } */

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-06-10 11:26 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-06 18:27 [PATCH] aarch64: Add fcsel to cmov integer and csel to float cmov [PR98477] Andrew Pinski
2024-06-10 11:26 ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).