public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-3437] AVX512FP16: Enable _Float16 autovectorization
@ 2021-09-10 7:00 hongtao Liu
0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2021-09-10 7:00 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:d959312b429971d69521e91506e304f8fa271a2d
commit r12-3437-gd959312b429971d69521e91506e304f8fa271a2d
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Jan 27 19:38:02 2019 -0800
AVX512FP16: Enable _Float16 autovectorization
gcc/ChangeLog:
* config/i386/i386-expand.c
(ix86_avx256_split_vector_move_misalign): Handle V16HF mode.
* config/i386/i386.c
(ix86_preferred_simd_mode): Handle HF mode.
* config/i386/sse.md (V_256H): New mode iterator.
(avx_vextractf128<mode>): Use it.
(VEC_INIT_MODE): Align vector HFmode condition to vector
HImodes since there're no real HF instruction used.
(VEC_INIT_HALF_MODE): Ditto.
(VIHF): Ditto.
(VIHF_AVX512BW): Ditto.
(*vec_extracthf): Ditto.
(VEC_EXTRACT_MODE): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/vect-float16-1.c: New test.
* gcc.target/i386/vect-float16-10.c: Ditto.
* gcc.target/i386/vect-float16-11.c: Ditto.
* gcc.target/i386/vect-float16-12.c: Ditto.
* gcc.target/i386/vect-float16-2.c: Ditto.
* gcc.target/i386/vect-float16-3.c: Ditto.
* gcc.target/i386/vect-float16-4.c: Ditto.
* gcc.target/i386/vect-float16-5.c: Ditto.
* gcc.target/i386/vect-float16-6.c: Ditto.
* gcc.target/i386/vect-float16-7.c: Ditto.
* gcc.target/i386/vect-float16-8.c: Ditto.
* gcc.target/i386/vect-float16-9.c: Ditto.
Diff:
---
gcc/config/i386/i386-expand.c | 4 ++++
gcc/config/i386/i386.c | 14 ++++++++++++++
gcc/config/i386/sse.md | 24 ++++++++++++------------
gcc/testsuite/gcc.target/i386/vect-float16-1.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/vect-float16-10.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/vect-float16-11.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/vect-float16-12.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/vect-float16-2.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/vect-float16-3.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/vect-float16-4.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/vect-float16-5.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/vect-float16-6.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/vect-float16-7.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/vect-float16-8.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/vect-float16-9.c | 14 ++++++++++++++
15 files changed, 198 insertions(+), 12 deletions(-)
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 0c1aec585fe..cac8354a067 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -678,6 +678,10 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
extract = gen_avx_vextractf128v32qi;
mode = V16QImode;
break;
+ case E_V16HFmode:
+ extract = gen_avx_vextractf128v16hf;
+ mode = V8HFmode;
+ break;
case E_V8SFmode:
extract = gen_avx_vextractf128v8sf;
mode = V4SFmode;
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index dc649f96d0d..7b173bc0beb 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -22641,6 +22641,20 @@ ix86_preferred_simd_mode (scalar_mode mode)
else
return V2DImode;
+ case E_HFmode:
+ if (TARGET_AVX512FP16)
+ {
+ if (TARGET_AVX512VL)
+ {
+ if (TARGET_PREFER_AVX128)
+ return V8HFmode;
+ else if (TARGET_PREFER_AVX256)
+ return V16HFmode;
+ }
+ return V32HFmode;
+ }
+ return word_mode;
+
case E_SFmode:
if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
return V16SFmode;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 06339163bc5..26024609e2b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -279,6 +279,10 @@
(define_mode_iterator V_256
[V32QI V16HI V8SI V4DI V8SF V4DF])
+;; All 256bit vector modes including HF vector mode
+(define_mode_iterator V_256H
+ [V32QI V16HI V8SI V4DI V8SF V4DF V16HF])
+
;; All 128bit and 256bit vector modes
(define_mode_iterator V_128_256
[V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
@@ -406,8 +410,7 @@
(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
(V8SI "TARGET_AVX") V4SI
(V4DI "TARGET_AVX") V2DI
- (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
- (V8HF "TARGET_AVX512FP16")])
+ (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF])
(define_mode_iterator VI_AVX2
[(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
@@ -752,7 +755,7 @@
[V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
(define_mode_iterator VIHF_AVX512BW
[V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")
- (V32HF "TARGET_AVX512FP16")])
+ (V32HF "TARGET_AVX512BW")])
;; Int-float size matches
(define_mode_iterator VI4F_128 [V4SI V4SF])
@@ -9381,7 +9384,7 @@
(define_expand "avx_vextractf128<mode>"
[(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
- (match_operand:V_256 1 "register_operand")
+ (match_operand:V_256H 1 "register_operand")
(match_operand:SI 2 "const_0_to_1_operand")]
"TARGET_AVX"
{
@@ -9868,7 +9871,7 @@
(match_operand:V8HF 1 "register_operand" "v,v")
(parallel
[(match_operand:SI 2 "const_0_to_7_operand")])))]
- "TARGET_AVX512FP16"
+ "TARGET_SSE2"
"@
vpextrw\t{%2, %1, %k0|%k0, %1, %2}
vpextrw\t{%2, %1, %0|%0, %1, %2}"
@@ -9882,8 +9885,7 @@
(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
- (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
- (V8HF "TARGET_AVX512FP16")
+ (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
(V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
@@ -15615,7 +15617,7 @@
;; Modes handled by pinsr patterns.
(define_mode_iterator PINSR_MODE
- [(V16QI "TARGET_SSE4_1") V8HI (V8HF "TARGET_AVX512FP16")
+ [(V16QI "TARGET_SSE4_1") V8HI V8HF
(V4SI "TARGET_SSE4_1")
(V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
@@ -23723,8 +23725,7 @@
(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
- (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
- (V8HF "TARGET_AVX512FP16")
+ (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
(V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
@@ -23736,8 +23737,7 @@
(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
- (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
- (V8HF "TARGET_AVX512FP16")
+ (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
(V4TI "TARGET_AVX512F")])
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-1.c b/gcc/testsuite/gcc.target/i386/vect-float16-1.c
new file mode 100644
index 00000000000..0f82cf94932
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types. */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+ _Float16 *__restrict__ c)
+{
+ for (int i = 0; i < 256; i++)
+ a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times "vaddph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-10.c b/gcc/testsuite/gcc.target/i386/vect-float16-10.c
new file mode 100644
index 00000000000..217645692ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-10.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types. */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+ _Float16 *__restrict__ c)
+{
+ for (int i = 0; i < 256; i++)
+ a[i] = b[i] / c[i];
+}
+
+/* { dg-final { scan-assembler-times "vdivph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-11.c b/gcc/testsuite/gcc.target/i386/vect-float16-11.c
new file mode 100644
index 00000000000..e0409ce9d3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-11.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types. */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+ _Float16 *__restrict__ c)
+{
+ for (int i = 0; i < 128; i++)
+ a[i] = b[i] / c[i];
+}
+
+/* { dg-final { scan-assembler-times "vdivph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-12.c b/gcc/testsuite/gcc.target/i386/vect-float16-12.c
new file mode 100644
index 00000000000..d92a25dc255
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-12.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types. */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+ _Float16 *__restrict__ c)
+{
+ for (int i = 0; i < 256; i++)
+ a[i] = b[i] / c[i];
+}
+
+/* { dg-final { scan-assembler-times "vdivph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-2.c b/gcc/testsuite/gcc.target/i386/vect-float16-2.c
new file mode 100644
index 00000000000..974fca4ce09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types. */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+ _Float16 *__restrict__ c)
+{
+ for (int i = 0; i < 128; i++)
+ a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times "vaddph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-3.c b/gcc/testsuite/gcc.target/i386/vect-float16-3.c
new file mode 100644
index 00000000000..9bca9142df7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-3.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types. */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+ _Float16 *__restrict__ c)
+{
+ for (int i = 0; i < 256; i++)
+ a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times "vaddph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-4.c b/gcc/testsuite/gcc.target/i386/vect-float16-4.c
new file mode 100644
index 00000000000..e6f26f0aa40
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-4.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types. */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+ _Float16 *__restrict__ c)
+{
+ for (int i = 0; i < 256; i++)
+ a[i] = b[i] - c[i];
+}
+
+/* { dg-final { scan-assembler-times "vsubph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-5.c b/gcc/testsuite/gcc.target/i386/vect-float16-5.c
new file mode 100644
index 00000000000..38f287b1dc0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-5.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types. */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+ _Float16 *__restrict__ c)
+{
+ for (int i = 0; i < 128; i++)
+ a[i] = b[i] - c[i];
+}
+
+/* { dg-final { scan-assembler-times "vsubph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-6.c b/gcc/testsuite/gcc.target/i386/vect-float16-6.c
new file mode 100644
index 00000000000..bc9f7870061
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-6.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types. */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+ _Float16 *__restrict__ c)
+{
+ for (int i = 0; i < 256; i++)
+ a[i] = b[i] - c[i];
+}
+
+/* { dg-final { scan-assembler-times "vsubph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-7.c b/gcc/testsuite/gcc.target/i386/vect-float16-7.c
new file mode 100644
index 00000000000..b4849cf77c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-7.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types. */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+ _Float16 *__restrict__ c)
+{
+ for (int i = 0; i < 256; i++)
+ a[i] = b[i] * c[i];
+}
+
+/* { dg-final { scan-assembler-times "vmulph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-8.c b/gcc/testsuite/gcc.target/i386/vect-float16-8.c
new file mode 100644
index 00000000000..71631b17cc3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-8.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types. */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+ _Float16 *__restrict__ c)
+{
+ for (int i = 0; i < 128; i++)
+ a[i] = b[i] * c[i];
+}
+
+/* { dg-final { scan-assembler-times "vmulph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-9.c b/gcc/testsuite/gcc.target/i386/vect-float16-9.c
new file mode 100644
index 00000000000..1be5c7f022f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-9.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types. */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+ _Float16 *__restrict__ c)
+{
+ for (int i = 0; i < 256; i++)
+ a[i] = b[i] * c[i];
+}
+
+/* { dg-final { scan-assembler-times "vmulph" 16 } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-09-10 7:00 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-10 7:00 [gcc r12-3437] AVX512FP16: Enable _Float16 autovectorization hongtao Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).