public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH]AArch64 relax constraints on FP16 insn PR108172
@ 2022-12-20 21:32 Tamar Christina
  2022-12-21 10:31 ` Richard Sandiford
  0 siblings, 1 reply; 4+ messages in thread
From: Tamar Christina @ 2022-12-20 21:32 UTC (permalink / raw)
  To: gcc-patches
  Cc: nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov,
	richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 3487 bytes --]

Hi All,

The move, load, load, store, dup, basically all the non arithmetic FP16
instructions use baseline armv8-a HF support, and so do not require the
Armv8.2-a extensions.  This relaxes the instructions.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR target/108172
	* config/aarch64/aarch64-simd.md (*aarch64_simd_movv2hf): Relax
	TARGET_SIMD_F16INST to TARGET_SIMD.
	* config/aarch64/aarch64.cc (aarch64_classify_vector_mode): Re-order.
	* config/aarch64/iterators.md (VMOVE): Drop TARGET_SIMD_F16INST.

gcc/testsuite/ChangeLog:

	PR target/108172
	* gcc.target/aarch64/pr108172.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index a62b6deaf4a57e570074d7d894e6fac13779f6fb..8a9f655d547285ec7bdc173086308d7d44a8d482 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -164,7 +164,7 @@ (define_insn "*aarch64_simd_movv2hf"
 		"=w, m,  m,  w, ?r, ?w, ?r, w, w")
 	(match_operand:V2HF 1 "general_operand"
 		"m,  Dz, w,  w,  w,  r,  r, Dz, Dn"))]
-  "TARGET_SIMD_F16INST
+  "TARGET_SIMD
    && (register_operand (operands[0], V2HFmode)
        || aarch64_simd_reg_or_zero (operands[1], V2HFmode))"
    "@
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 73515c174fa4fe7830527e7eabd91c4648130ff4..d1c0476321b79bc6aded350d24ea5d556c796519 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3616,6 +3616,8 @@ aarch64_classify_vector_mode (machine_mode mode)
     case E_V4x2DFmode:
       return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT : 0;
 
+    /* 32-bit Advanced SIMD vectors.  */
+    case E_V2HFmode:
     /* 64-bit Advanced SIMD vectors.  */
     case E_V8QImode:
     case E_V4HImode:
@@ -3634,7 +3636,6 @@ aarch64_classify_vector_mode (machine_mode mode)
     case E_V8BFmode:
     case E_V4SFmode:
     case E_V2DFmode:
-    case E_V2HFmode:
       return TARGET_FLOAT ? VEC_ADVSIMD : 0;
 
     default:
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index a521dbde1ec42c0c442a9ca3dd52c9727d116399..70742520984d30158e62a38c92abea82b2dac059 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -204,8 +204,7 @@ (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
 ;; All Advanced SIMD modes suitable for moving, loading, and storing
 ;; including V2HF
 (define_mode_iterator VMOVE [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
-			     V4HF V8HF V4BF V8BF V2SF V4SF V2DF
-			     (V2HF "TARGET_SIMD_F16INST")])
+			     V2HF V4HF V8HF V4BF V8BF V2SF V4SF V2DF])
 
 
 ;; The VALL_F16 modes except the 128-bit 2-element ones.
diff --git a/gcc/testsuite/gcc.target/aarch64/pr108172.c b/gcc/testsuite/gcc.target/aarch64/pr108172.c
new file mode 100644
index 0000000000000000000000000000000000000000..b29054fdb1d6e602755bc93089f1edec4eb53b8e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr108172.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+
+typedef _Float16 v4hf __attribute__ ((vector_size (8)));
+typedef _Float16 v2hf __attribute__ ((vector_size (4)));
+
+v4hf
+v4hf_abi_1 (v4hf a)
+{
+  return a;
+}
+
+v4hf
+v4hf_abi_3 (v4hf a, v4hf b, v4hf c)
+{
+  return c;
+}
+
+v4hf
+v4hf_abi_4 (v4hf a, v4hf b, v4hf c, v4hf d)
+{
+  return d;
+}
+
+v2hf
+v2hf_test (v2hf a, v2hf b, v2hf c, v2hf d)
+{
+  return b;
+}
+




-- 

[-- Attachment #2: rb16717.patch --]
[-- Type: text/plain, Size: 2786 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index a62b6deaf4a57e570074d7d894e6fac13779f6fb..8a9f655d547285ec7bdc173086308d7d44a8d482 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -164,7 +164,7 @@ (define_insn "*aarch64_simd_movv2hf"
 		"=w, m,  m,  w, ?r, ?w, ?r, w, w")
 	(match_operand:V2HF 1 "general_operand"
 		"m,  Dz, w,  w,  w,  r,  r, Dz, Dn"))]
-  "TARGET_SIMD_F16INST
+  "TARGET_SIMD
    && (register_operand (operands[0], V2HFmode)
        || aarch64_simd_reg_or_zero (operands[1], V2HFmode))"
    "@
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 73515c174fa4fe7830527e7eabd91c4648130ff4..d1c0476321b79bc6aded350d24ea5d556c796519 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3616,6 +3616,8 @@ aarch64_classify_vector_mode (machine_mode mode)
     case E_V4x2DFmode:
       return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT : 0;
 
+    /* 32-bit Advanced SIMD vectors.  */
+    case E_V2HFmode:
     /* 64-bit Advanced SIMD vectors.  */
     case E_V8QImode:
     case E_V4HImode:
@@ -3634,7 +3636,6 @@ aarch64_classify_vector_mode (machine_mode mode)
     case E_V8BFmode:
     case E_V4SFmode:
     case E_V2DFmode:
-    case E_V2HFmode:
       return TARGET_FLOAT ? VEC_ADVSIMD : 0;
 
     default:
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index a521dbde1ec42c0c442a9ca3dd52c9727d116399..70742520984d30158e62a38c92abea82b2dac059 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -204,8 +204,7 @@ (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
 ;; All Advanced SIMD modes suitable for moving, loading, and storing
 ;; including V2HF
 (define_mode_iterator VMOVE [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
-			     V4HF V8HF V4BF V8BF V2SF V4SF V2DF
-			     (V2HF "TARGET_SIMD_F16INST")])
+			     V2HF V4HF V8HF V4BF V8BF V2SF V4SF V2DF])
 
 
 ;; The VALL_F16 modes except the 128-bit 2-element ones.
diff --git a/gcc/testsuite/gcc.target/aarch64/pr108172.c b/gcc/testsuite/gcc.target/aarch64/pr108172.c
new file mode 100644
index 0000000000000000000000000000000000000000..b29054fdb1d6e602755bc93089f1edec4eb53b8e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr108172.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+
+typedef _Float16 v4hf __attribute__ ((vector_size (8)));
+typedef _Float16 v2hf __attribute__ ((vector_size (4)));
+
+v4hf
+v4hf_abi_1 (v4hf a)
+{
+  return a;
+}
+
+v4hf
+v4hf_abi_3 (v4hf a, v4hf b, v4hf c)
+{
+  return c;
+}
+
+v4hf
+v4hf_abi_4 (v4hf a, v4hf b, v4hf c, v4hf d)
+{
+  return d;
+}
+
+v2hf
+v2hf_test (v2hf a, v2hf b, v2hf c, v2hf d)
+{
+  return b;
+}
+




^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH]AArch64 relax constraints on FP16 insn PR108172
  2022-12-20 21:32 [PATCH]AArch64 relax constraints on FP16 insn PR108172 Tamar Christina
@ 2022-12-21 10:31 ` Richard Sandiford
  2022-12-21 10:49   ` Tamar Christina
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Sandiford @ 2022-12-21 10:31 UTC (permalink / raw)
  To: Tamar Christina
  Cc: gcc-patches, nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov

Tamar Christina <tamar.christina@arm.com> writes:
> Hi All,
>
> The move, load, load, store, dup, basically all the non arithmetic FP16
> instructions use baseline armv8-a HF support, and so do not require the
> Armv8.2-a extensions.  This relaxes the instructions.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	PR target/108172
> 	* config/aarch64/aarch64-simd.md (*aarch64_simd_movv2hf): Relax
> 	TARGET_SIMD_F16INST to TARGET_SIMD.
> 	* config/aarch64/aarch64.cc (aarch64_classify_vector_mode): Re-order.
> 	* config/aarch64/iterators.md (VMOVE): Drop TARGET_SIMD_F16INST.
>
> gcc/testsuite/ChangeLog:
>
> 	PR target/108172
> 	* gcc.target/aarch64/pr108172.c: New test.

OK, thanks.

I think we need better tests for this area though.  The VMOVE uses
include aarch64_dup_lane<mode>, which uses <Vtype>, which has no
definition for V2HF, so we get:

    return "dup\t%0.<Vtype>, %1.h[%2]";

The original patch defined Vtype to "2h", but using that here would
have generated an invalid instruction.

We also have unexpanded <Vtype>s in the pairwise operations:

    "faddp\t%h0, %1.<Vtype>",
    "fmaxp\t%h0, %1.<Vtype>",
    "fminp\t%h0, %1.<Vtype>",
    "fmaxnmp\t%h0, %1.<Vtype>",
    "fminnmp\t%h0, %1.<Vtype>",

Would it be easy (using a combination of this patch and a follow-on patch)
to wind the V2HF support back to a state that makes sense on its own,
without the postponed pairwise support?  Or would it be simpler to
revert 2cba118e538ba0b7582af7f9fb5ba2dfbb772f8e for GCC 13 and revisit
this in GCC 14, alongside the original motivating use case?

Richard

> --- inline copy of patch -- 
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index a62b6deaf4a57e570074d7d894e6fac13779f6fb..8a9f655d547285ec7bdc173086308d7d44a8d482 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -164,7 +164,7 @@ (define_insn "*aarch64_simd_movv2hf"
>  		"=w, m,  m,  w, ?r, ?w, ?r, w, w")
>  	(match_operand:V2HF 1 "general_operand"
>  		"m,  Dz, w,  w,  w,  r,  r, Dz, Dn"))]
> -  "TARGET_SIMD_F16INST
> +  "TARGET_SIMD
>     && (register_operand (operands[0], V2HFmode)
>         || aarch64_simd_reg_or_zero (operands[1], V2HFmode))"
>     "@
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 73515c174fa4fe7830527e7eabd91c4648130ff4..d1c0476321b79bc6aded350d24ea5d556c796519 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -3616,6 +3616,8 @@ aarch64_classify_vector_mode (machine_mode mode)
>      case E_V4x2DFmode:
>        return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT : 0;
>  
> +    /* 32-bit Advanced SIMD vectors.  */
> +    case E_V2HFmode:
>      /* 64-bit Advanced SIMD vectors.  */
>      case E_V8QImode:
>      case E_V4HImode:
> @@ -3634,7 +3636,6 @@ aarch64_classify_vector_mode (machine_mode mode)
>      case E_V8BFmode:
>      case E_V4SFmode:
>      case E_V2DFmode:
> -    case E_V2HFmode:
>        return TARGET_FLOAT ? VEC_ADVSIMD : 0;
>  
>      default:
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index a521dbde1ec42c0c442a9ca3dd52c9727d116399..70742520984d30158e62a38c92abea82b2dac059 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -204,8 +204,7 @@ (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
>  ;; All Advanced SIMD modes suitable for moving, loading, and storing
>  ;; including V2HF
>  (define_mode_iterator VMOVE [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
> -			     V4HF V8HF V4BF V8BF V2SF V4SF V2DF
> -			     (V2HF "TARGET_SIMD_F16INST")])
> +			     V2HF V4HF V8HF V4BF V8BF V2SF V4SF V2DF])
>  
>  
>  ;; The VALL_F16 modes except the 128-bit 2-element ones.
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr108172.c b/gcc/testsuite/gcc.target/aarch64/pr108172.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..b29054fdb1d6e602755bc93089f1edec4eb53b8e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr108172.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0" } */
> +
> +typedef _Float16 v4hf __attribute__ ((vector_size (8)));
> +typedef _Float16 v2hf __attribute__ ((vector_size (4)));
> +
> +v4hf
> +v4hf_abi_1 (v4hf a)
> +{
> +  return a;
> +}
> +
> +v4hf
> +v4hf_abi_3 (v4hf a, v4hf b, v4hf c)
> +{
> +  return c;
> +}
> +
> +v4hf
> +v4hf_abi_4 (v4hf a, v4hf b, v4hf c, v4hf d)
> +{
> +  return d;
> +}
> +
> +v2hf
> +v2hf_test (v2hf a, v2hf b, v2hf c, v2hf d)
> +{
> +  return b;
> +}
> +

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH]AArch64 relax constraints on FP16 insn PR108172
  2022-12-21 10:31 ` Richard Sandiford
@ 2022-12-21 10:49   ` Tamar Christina
  2022-12-21 11:09     ` Richard Sandiford
  0 siblings, 1 reply; 4+ messages in thread
From: Tamar Christina @ 2022-12-21 10:49 UTC (permalink / raw)
  To: Richard Sandiford
  Cc: gcc-patches, nd, Richard Earnshaw, Marcus Shawcroft, Kyrylo Tkachov

> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Wednesday, December 21, 2022 10:31 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; Marcus Shawcroft
> <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
> Subject: Re: [PATCH]AArch64 relax constraints on FP16 insn PR108172
> 
> Tamar Christina <tamar.christina@arm.com> writes:
> > Hi All,
> >
> > The move, load, load, store, dup, basically all the non arithmetic
> > FP16 instructions use baseline armv8-a HF support, and so do not
> > require the Armv8.2-a extensions.  This relaxes the instructions.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > 	PR target/108172
> > 	* config/aarch64/aarch64-simd.md (*aarch64_simd_movv2hf): Relax
> > 	TARGET_SIMD_F16INST to TARGET_SIMD.
> > 	* config/aarch64/aarch64.cc (aarch64_classify_vector_mode): Re-
> order.
> > 	* config/aarch64/iterators.md (VMOVE): Drop
> TARGET_SIMD_F16INST.
> >
> > gcc/testsuite/ChangeLog:
> >
> > 	PR target/108172
> > 	* gcc.target/aarch64/pr108172.c: New test.
> 
> OK, thanks.
> 
> I think we need better tests for this area though.  The VMOVE uses include
> aarch64_dup_lane<mode>, which uses <Vtype>, which has no definition for
> V2HF, so we get:
> 
>     return "dup\t%0.<Vtype>, %1.h[%2]";
> 
> The original patch defined Vtype to "2h", but using that here would have
> generated an invalid instruction.
> 
> We also have unexpanded <Vtype>s in the pairwise operations:
> 
>     "faddp\t%h0, %1.<Vtype>",
>     "fmaxp\t%h0, %1.<Vtype>",
>     "fminp\t%h0, %1.<Vtype>",
>     "fmaxnmp\t%h0, %1.<Vtype>",
>     "fminnmp\t%h0, %1.<Vtype>",
> 

Right, the pairwise bit should have been reverted, the tests were all In that patch.

> Would it be easy (using a combination of this patch and a follow-on patch) to
> wind the V2HF support back to a state that makes sense on its own, without
> the postponed pairwise support?  Or would it be simpler to revert
> 2cba118e538ba0b7582af7f9fb5ba2dfbb772f8e for GCC 13 and revisit this in
> GCC 14, alongside the original motivating use case?

The pairwise and the dup should be undone, as evidence that they don't trigger
currently. The mov make sense on their own already and improve various codegen
around shorts on their own.

I won't be revisiting pairwise operations again, so I'll remove the remnants from this
Patch.

> 
> Richard
> 
> > --- inline copy of patch --
> > diff --git a/gcc/config/aarch64/aarch64-simd.md
> > b/gcc/config/aarch64/aarch64-simd.md
> > index
> >
> a62b6deaf4a57e570074d7d894e6fac13779f6fb..8a9f655d547285ec7bdc173086
> 30
> > 8d7d44a8d482 100644
> > --- a/gcc/config/aarch64/aarch64-simd.md
> > +++ b/gcc/config/aarch64/aarch64-simd.md
> > @@ -164,7 +164,7 @@ (define_insn "*aarch64_simd_movv2hf"
> >  		"=w, m,  m,  w, ?r, ?w, ?r, w, w")
> >  	(match_operand:V2HF 1 "general_operand"
> >  		"m,  Dz, w,  w,  w,  r,  r, Dz, Dn"))]
> > -  "TARGET_SIMD_F16INST
> > +  "TARGET_SIMD
> >     && (register_operand (operands[0], V2HFmode)
> >         || aarch64_simd_reg_or_zero (operands[1], V2HFmode))"
> >     "@
> > diff --git a/gcc/config/aarch64/aarch64.cc
> > b/gcc/config/aarch64/aarch64.cc index
> >
> 73515c174fa4fe7830527e7eabd91c4648130ff4..d1c0476321b79bc6aded350d24
> ea
> > 5d556c796519 100644
> > --- a/gcc/config/aarch64/aarch64.cc
> > +++ b/gcc/config/aarch64/aarch64.cc
> > @@ -3616,6 +3616,8 @@ aarch64_classify_vector_mode (machine_mode
> mode)
> >      case E_V4x2DFmode:
> >        return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT : 0;
> >
> > +    /* 32-bit Advanced SIMD vectors.  */
> > +    case E_V2HFmode:
> >      /* 64-bit Advanced SIMD vectors.  */
> >      case E_V8QImode:
> >      case E_V4HImode:
> > @@ -3634,7 +3636,6 @@ aarch64_classify_vector_mode (machine_mode
> mode)
> >      case E_V8BFmode:
> >      case E_V4SFmode:
> >      case E_V2DFmode:
> > -    case E_V2HFmode:
> >        return TARGET_FLOAT ? VEC_ADVSIMD : 0;
> >
> >      default:
> > diff --git a/gcc/config/aarch64/iterators.md
> > b/gcc/config/aarch64/iterators.md index
> >
> a521dbde1ec42c0c442a9ca3dd52c9727d116399..70742520984d30158e62a38c9
> 2ab
> > ea82b2dac059 100644
> > --- a/gcc/config/aarch64/iterators.md
> > +++ b/gcc/config/aarch64/iterators.md
> > @@ -204,8 +204,7 @@ (define_mode_iterator VALL_F16 [V8QI V16QI V4HI
> > V8HI V2SI V4SI V2DI  ;; All Advanced SIMD modes suitable for moving,
> > loading, and storing  ;; including V2HF  (define_mode_iterator VMOVE
> > [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
> > -			     V4HF V8HF V4BF V8BF V2SF V4SF V2DF
> > -			     (V2HF "TARGET_SIMD_F16INST")])
> > +			     V2HF V4HF V8HF V4BF V8BF V2SF V4SF V2DF])
> >
> >
> >  ;; The VALL_F16 modes except the 128-bit 2-element ones.
> > diff --git a/gcc/testsuite/gcc.target/aarch64/pr108172.c
> > b/gcc/testsuite/gcc.target/aarch64/pr108172.c
> > new file mode 100644
> > index
> >
> 0000000000000000000000000000000000000000..b29054fdb1d6e602755bc9308
> 9f1
> > edec4eb53b8e
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/pr108172.c
> > @@ -0,0 +1,30 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O0" } */
> > +
> > +typedef _Float16 v4hf __attribute__ ((vector_size (8))); typedef
> > +_Float16 v2hf __attribute__ ((vector_size (4)));
> > +
> > +v4hf
> > +v4hf_abi_1 (v4hf a)
> > +{
> > +  return a;
> > +}
> > +
> > +v4hf
> > +v4hf_abi_3 (v4hf a, v4hf b, v4hf c)
> > +{
> > +  return c;
> > +}
> > +
> > +v4hf
> > +v4hf_abi_4 (v4hf a, v4hf b, v4hf c, v4hf d) {
> > +  return d;
> > +}
> > +
> > +v2hf
> > +v2hf_test (v2hf a, v2hf b, v2hf c, v2hf d) {
> > +  return b;
> > +}
> > +

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH]AArch64 relax constraints on FP16 insn PR108172
  2022-12-21 10:49   ` Tamar Christina
@ 2022-12-21 11:09     ` Richard Sandiford
  0 siblings, 0 replies; 4+ messages in thread
From: Richard Sandiford @ 2022-12-21 11:09 UTC (permalink / raw)
  To: Tamar Christina
  Cc: gcc-patches, nd, Richard Earnshaw, Marcus Shawcroft, Kyrylo Tkachov

Tamar Christina <Tamar.Christina@arm.com> writes:
>> -----Original Message-----
>> From: Richard Sandiford <richard.sandiford@arm.com>
>> Sent: Wednesday, December 21, 2022 10:31 AM
>> To: Tamar Christina <Tamar.Christina@arm.com>
>> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw
>> <Richard.Earnshaw@arm.com>; Marcus Shawcroft
>> <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
>> Subject: Re: [PATCH]AArch64 relax constraints on FP16 insn PR108172
>> 
>> Tamar Christina <tamar.christina@arm.com> writes:
>> > Hi All,
>> >
>> > The move, load, load, store, dup, basically all the non arithmetic
>> > FP16 instructions use baseline armv8-a HF support, and so do not
>> > require the Armv8.2-a extensions.  This relaxes the instructions.
>> >
>> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>> >
>> > Ok for master?
>> >
>> > Thanks,
>> > Tamar
>> >
>> > gcc/ChangeLog:
>> >
>> > 	PR target/108172
>> > 	* config/aarch64/aarch64-simd.md (*aarch64_simd_movv2hf): Relax
>> > 	TARGET_SIMD_F16INST to TARGET_SIMD.
>> > 	* config/aarch64/aarch64.cc (aarch64_classify_vector_mode): Re-
>> order.
>> > 	* config/aarch64/iterators.md (VMOVE): Drop
>> TARGET_SIMD_F16INST.
>> >
>> > gcc/testsuite/ChangeLog:
>> >
>> > 	PR target/108172
>> > 	* gcc.target/aarch64/pr108172.c: New test.
>> 
>> OK, thanks.
>> 
>> I think we need better tests for this area though.  The VMOVE uses include
>> aarch64_dup_lane<mode>, which uses <Vtype>, which has no definition for
>> V2HF, so we get:
>> 
>>     return "dup\t%0.<Vtype>, %1.h[%2]";
>> 
>> The original patch defined Vtype to "2h", but using that here would have
>> generated an invalid instruction.
>> 
>> We also have unexpanded <Vtype>s in the pairwise operations:
>> 
>>     "faddp\t%h0, %1.<Vtype>",
>>     "fmaxp\t%h0, %1.<Vtype>",
>>     "fminp\t%h0, %1.<Vtype>",
>>     "fmaxnmp\t%h0, %1.<Vtype>",
>>     "fminnmp\t%h0, %1.<Vtype>",
>> 
>
> Right, the pairwise bit should have been reverted, the tests were all In that patch.
>
>> Would it be easy (using a combination of this patch and a follow-on patch) to
>> wind the V2HF support back to a state that makes sense on its own, without
>> the postponed pairwise support?  Or would it be simpler to revert
>> 2cba118e538ba0b7582af7f9fb5ba2dfbb772f8e for GCC 13 and revisit this in
>> GCC 14, alongside the original motivating use case?
>
> The pairwise and the dup should be undone, as evidence that they don't trigger
> currently. The mov make sense on their own already and improve various codegen
> around shorts on their own.

OK, sounds good, thanks.

> I won't be revisiting pairwise operations again, so I'll remove the remnants from this
> Patch.

Ack.  Sorry for causing frustration in the review for that series.

Richard

>> Richard
>> 
>> > --- inline copy of patch --
>> > diff --git a/gcc/config/aarch64/aarch64-simd.md
>> > b/gcc/config/aarch64/aarch64-simd.md
>> > index
>> >
>> a62b6deaf4a57e570074d7d894e6fac13779f6fb..8a9f655d547285ec7bdc173086
>> 30
>> > 8d7d44a8d482 100644
>> > --- a/gcc/config/aarch64/aarch64-simd.md
>> > +++ b/gcc/config/aarch64/aarch64-simd.md
>> > @@ -164,7 +164,7 @@ (define_insn "*aarch64_simd_movv2hf"
>> >  		"=w, m,  m,  w, ?r, ?w, ?r, w, w")
>> >  	(match_operand:V2HF 1 "general_operand"
>> >  		"m,  Dz, w,  w,  w,  r,  r, Dz, Dn"))]
>> > -  "TARGET_SIMD_F16INST
>> > +  "TARGET_SIMD
>> >     && (register_operand (operands[0], V2HFmode)
>> >         || aarch64_simd_reg_or_zero (operands[1], V2HFmode))"
>> >     "@
>> > diff --git a/gcc/config/aarch64/aarch64.cc
>> > b/gcc/config/aarch64/aarch64.cc index
>> >
>> 73515c174fa4fe7830527e7eabd91c4648130ff4..d1c0476321b79bc6aded350d24
>> ea
>> > 5d556c796519 100644
>> > --- a/gcc/config/aarch64/aarch64.cc
>> > +++ b/gcc/config/aarch64/aarch64.cc
>> > @@ -3616,6 +3616,8 @@ aarch64_classify_vector_mode (machine_mode
>> mode)
>> >      case E_V4x2DFmode:
>> >        return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT : 0;
>> >
>> > +    /* 32-bit Advanced SIMD vectors.  */
>> > +    case E_V2HFmode:
>> >      /* 64-bit Advanced SIMD vectors.  */
>> >      case E_V8QImode:
>> >      case E_V4HImode:
>> > @@ -3634,7 +3636,6 @@ aarch64_classify_vector_mode (machine_mode
>> mode)
>> >      case E_V8BFmode:
>> >      case E_V4SFmode:
>> >      case E_V2DFmode:
>> > -    case E_V2HFmode:
>> >        return TARGET_FLOAT ? VEC_ADVSIMD : 0;
>> >
>> >      default:
>> > diff --git a/gcc/config/aarch64/iterators.md
>> > b/gcc/config/aarch64/iterators.md index
>> >
>> a521dbde1ec42c0c442a9ca3dd52c9727d116399..70742520984d30158e62a38c9
>> 2ab
>> > ea82b2dac059 100644
>> > --- a/gcc/config/aarch64/iterators.md
>> > +++ b/gcc/config/aarch64/iterators.md
>> > @@ -204,8 +204,7 @@ (define_mode_iterator VALL_F16 [V8QI V16QI V4HI
>> > V8HI V2SI V4SI V2DI  ;; All Advanced SIMD modes suitable for moving,
>> > loading, and storing  ;; including V2HF  (define_mode_iterator VMOVE
>> > [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
>> > -			     V4HF V8HF V4BF V8BF V2SF V4SF V2DF
>> > -			     (V2HF "TARGET_SIMD_F16INST")])
>> > +			     V2HF V4HF V8HF V4BF V8BF V2SF V4SF V2DF])
>> >
>> >
>> >  ;; The VALL_F16 modes except the 128-bit 2-element ones.
>> > diff --git a/gcc/testsuite/gcc.target/aarch64/pr108172.c
>> > b/gcc/testsuite/gcc.target/aarch64/pr108172.c
>> > new file mode 100644
>> > index
>> >
>> 0000000000000000000000000000000000000000..b29054fdb1d6e602755bc9308
>> 9f1
>> > edec4eb53b8e
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.target/aarch64/pr108172.c
>> > @@ -0,0 +1,30 @@
>> > +/* { dg-do compile } */
>> > +/* { dg-options "-O0" } */
>> > +
>> > +typedef _Float16 v4hf __attribute__ ((vector_size (8))); typedef
>> > +_Float16 v2hf __attribute__ ((vector_size (4)));
>> > +
>> > +v4hf
>> > +v4hf_abi_1 (v4hf a)
>> > +{
>> > +  return a;
>> > +}
>> > +
>> > +v4hf
>> > +v4hf_abi_3 (v4hf a, v4hf b, v4hf c)
>> > +{
>> > +  return c;
>> > +}
>> > +
>> > +v4hf
>> > +v4hf_abi_4 (v4hf a, v4hf b, v4hf c, v4hf d) {
>> > +  return d;
>> > +}
>> > +
>> > +v2hf
>> > +v2hf_test (v2hf a, v2hf b, v2hf c, v2hf d) {
>> > +  return b;
>> > +}
>> > +

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-12-21 11:09 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-20 21:32 [PATCH]AArch64 relax constraints on FP16 insn PR108172 Tamar Christina
2022-12-21 10:31 ` Richard Sandiford
2022-12-21 10:49   ` Tamar Christina
2022-12-21 11:09     ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).