From 276076d8d88f9f84361a500135ba61be30611a2a Mon Sep 17 00:00:00 2001 From: Hongyu Wang Date: Thu, 15 Jul 2021 13:31:24 +0800 Subject: [PATCH] AVX512FP16: Support basic 64/32bit vector type and operation. For 32bit target, V4HF vector is parsed same as __m64 type, V2HF is parsed by stack and returned from GPR since it is not specified by ABI. gcc/ChangeLog: PR target/102230 * config/i386/i386.h (VALID_AVX512FP16_REG_MODE): Add V2HF mode check. (VALID_SSE2_REG_VHF_MODE): Add V4HFmode and V2HFmode. (VALID_MMX_REG_MODE): Add V4HFmode. (SSE_REG_MODE_P): Replace VALID_AVX512FP16_REG_MODE with vector mode condition. * config/i386/i386.c (classify_argument): Parse V4HF/V2HF via sse regs. (function_arg_32): Add V4HFmode. (function_arg_advance_32): Likewise. * config/i386/i386.md (mode): Add V4HF/V2HF. (MODE_SIZE): Likewise. * config/i386/mmx.md (MMXMODE): Add V4HF mode. (V_32): Add V2HF mode. (VHF_32_64): New mode iterator. (*mov_internal): Adjust sse alternatives to support V4HF mode move. (*mov_internal): Adjust sse alternatives to support V2HF mode move. (3): New define_insn for add/sub/mul/div. gcc/testsuite/ChangeLog: PR target/102230 * gcc.target/i386/avx512fp16-floatvnhf.c: Remove xfail. * gcc.target/i386/avx512fp16-trunc-extendvnhf.c: Ditto. * gcc.target/i386/avx512fp16-truncvnhf.c: Ditto. * gcc.target/i386/avx512fp16-64-32-vecop-1.c: New test. * gcc.target/i386/avx512fp16-64-32-vecop-2.c: Ditto. * gcc.target/i386/pr102230.c: Ditto. --- gcc/config/i386/i386.c | 4 + gcc/config/i386/i386.h | 13 +++- gcc/config/i386/i386.md | 5 +- gcc/config/i386/mmx.md | 52 ++++++++++--- .../i386/avx512fp16-64-32-vecop-1.c | 30 ++++++++ .../i386/avx512fp16-64-32-vecop-2.c | 75 +++++++++++++++++++ .../gcc.target/i386/avx512fp16-floatvnhf.c | 12 +-- .../i386/avx512fp16-trunc-extendvnhf.c | 12 +-- .../gcc.target/i386/avx512fp16-truncvnhf.c | 12 +-- gcc/testsuite/gcc.target/i386/pr102230.c | 38 ++++++++++ 10 files changed, 220 insertions(+), 33 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-64-32-vecop-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-64-32-vecop-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr102230.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ba89e111d28..b3e4add4b9e 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2462,6 +2462,8 @@ classify_argument (machine_mode mode, const_tree type, case E_V2SFmode: case E_V2SImode: case E_V4HImode: + case E_V4HFmode: + case E_V2HFmode: case E_V8QImode: classes[0] = X86_64_SSE_CLASS; return 1; @@ -2902,6 +2904,7 @@ pass_in_reg: case E_V8QImode: case E_V4HImode: + case E_V4HFmode: case E_V2SImode: case E_V2SFmode: case E_V1TImode: @@ -3149,6 +3152,7 @@ pass_in_reg: case E_V8QImode: case E_V4HImode: + case E_V4HFmode: case E_V2SImode: case E_V2SFmode: case E_V1TImode: diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 8a4251b4926..cba6d835910 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1033,7 +1033,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || (MODE) == TImode) #define VALID_AVX512FP16_REG_MODE(MODE) \ - ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode) + ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode \ + || (MODE) == V2HFmode) #define VALID_SSE2_REG_MODE(MODE) \ ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ @@ -1041,7 +1042,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || (MODE) == V2DImode || (MODE) == DFmode || (MODE) == HFmode) #define VALID_SSE2_REG_VHF_MODE(MODE) \ - (VALID_SSE2_REG_MODE (MODE) || (MODE) == V8HFmode) + (VALID_SSE2_REG_MODE (MODE) || (MODE) == V8HFmode \ + || (MODE) == V4HFmode || (MODE) == V2HFmode) #define VALID_SSE_REG_MODE(MODE) \ ((MODE) == V1TImode || (MODE) == TImode \ @@ -1051,10 +1053,12 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define VALID_MMX_REG_MODE_3DNOW(MODE) \ ((MODE) == V2SFmode || (MODE) == SFmode) +/* To match ia32 psABI, V4HFmode should be added here. */ #define VALID_MMX_REG_MODE(MODE) \ ((MODE) == V1DImode || (MODE) == DImode \ || (MODE) == V2SImode || (MODE) == SImode \ - || (MODE) == V4HImode || (MODE) == V8QImode) + || (MODE) == V4HImode || (MODE) == V8QImode \ + || (MODE) == V4HFmode) #define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode) @@ -1087,7 +1091,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode \ || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode \ || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode \ - || (MODE) == V16SFmode || VALID_AVX512FP16_REG_MODE (MODE)) + || (MODE) == V16SFmode || (MODE) == V32HFmode || (MODE) == V16HFmode \ + || (MODE) == V8HFmode) #define X87_FLOAT_MODE_P(MODE) \ (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode)) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c6279e620c9..758d7d1e3c0 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -498,7 +498,7 @@ ;; Main data type used by the insn (define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,SF,DF,XF,TF,V32HF,V16HF,V8HF, - V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF" + V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V2HF" (const_string "unknown")) ;; The CPU unit operations uses. @@ -1106,7 +1106,8 @@ (V1TI "16") (V2TI "32") (V4TI "64") (V2DF "16") (V4DF "32") (V8DF "64") (V4SF "16") (V8SF "32") (V16SF "64") - (V8HF "16") (V16HF "32") (V32HF "64")]) + (V8HF "16") (V16HF "32") (V32HF "64") + (V4HF "8") (V2HF "4")]) ;; Double word integer modes as mode attribute. (define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")]) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index b0093778fc6..c9467bc095a 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -48,7 +48,7 @@ (define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")]) ;; All 8-byte vector modes handled by MMX -(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF]) +(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF V4HF]) (define_mode_iterator MMXMODE124 [V8QI V4HI V2SI V2SF]) ;; Mix-n-match @@ -57,8 +57,8 @@ (define_mode_iterator MMXMODE24 [V4HI V2SI]) (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI]) -;; All 4-byte integer vector modes -(define_mode_iterator V_32 [V4QI V2HI V1SI]) +;; All 4-byte integer/float16 vector modes +(define_mode_iterator V_32 [V4QI V2HI V1SI V2HF]) ;; 4-byte integer vector modes (define_mode_iterator VI_32 [V4QI V2HI]) @@ -66,6 +66,9 @@ ;; V2S* modes (define_mode_iterator V2FI [V2SF V2SI]) +;; 4-byte and 8-byte float16 vector modes +(define_mode_iterator VHF_32_64 [V4HF V2HF]) + ;; Mapping from integer vector mode to mnemonic suffix (define_mode_attr mmxvecsize [(V8QI "b") (V4QI "b") (V4HI "w") (V2HI "w") (V2SI "d") (V1DI "q")]) @@ -191,6 +194,8 @@ (eq_attr "alternative" "11,12") (cond [(match_test "mode == V2SFmode") (const_string "V4SF") + (match_test "mode == V4HFmode") + (const_string "V4SF") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") @@ -198,14 +203,16 @@ (const_string "TI")) (and (eq_attr "alternative" "13") - (ior (and (match_test "mode == V2SFmode") - (not (match_test "TARGET_MMX_WITH_SSE"))) - (not (match_test "TARGET_SSE2")))) + (ior (ior (and (match_test "mode == V2SFmode") + (not (match_test "TARGET_MMX_WITH_SSE"))) + (not (match_test "TARGET_SSE2"))) + (match_test "mode == V4HFmode"))) (const_string "V2SF") (and (eq_attr "alternative" "14") - (ior (match_test "mode == V2SFmode") - (not (match_test "TARGET_SSE2")))) + (ior (ior (match_test "mode == V2SFmode") + (not (match_test "TARGET_SSE2"))) + (match_test "mode == V4HFmode"))) (const_string "V2SF") ] (const_string "DI"))) @@ -289,12 +296,17 @@ (const_string "*"))) (set (attr "mode") (cond [(eq_attr "alternative" "2,3") - (cond [(match_test "TARGET_AVX") + (cond [(match_test "mode == V2HFmode") + (const_string "V4SF") + (match_test "TARGET_AVX") (const_string "TI") (match_test "optimize_function_for_size_p (cfun)") (const_string "V4SF") ] (const_string "TI")) + (and (eq_attr "alternative" "4,5") + (match_test "mode == V2HFmode")) + (const_string "SF") ] (const_string "SI"))) (set (attr "preferred_for_speed") @@ -1391,6 +1403,28 @@ DONE; }) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel half-precision floating point arithmetic +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "3" + [(set (match_operand:VHF_32_64 0 "register_operand" "=v") + (plusminusmultdiv:VHF_32_64 + (match_operand:VHF_32_64 1 "register_operand" "v") + (match_operand:VHF_32_64 2 "register_operand" "v")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vph\t{%2, %1, %0|%0, %1, %2}" + [(set (attr "type") + (cond [(match_test " == MULT") + (const_string "ssemul") + (match_test " == DIV") + (const_string "ssediv")] + (const_string "sseadd"))) + (set_attr "prefix" "evex") + (set_attr "mode" "V8HF")]) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel integral arithmetic diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-64-32-vecop-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-64-32-vecop-1.c new file mode 100644 index 00000000000..754e909d77b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-64-32-vecop-1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ + +/* { dg-final { scan-assembler-times "vaddph" 2 } } */ +/* { dg-final { scan-assembler-times "vsubph" 2 } } */ +/* { dg-final { scan-assembler-times "vmulph" 2 } } */ +/* { dg-final { scan-assembler-times "vdivph" 2 } } */ + +#define DO_PRAGMA(X) _Pragma(#X) + +#define VEC_OP_VV(size, op, name) \ +void \ +__attribute__ ((noinline, noclone, optimize("tree-slp-vectorize"))) \ +vecop_v##size##hf##name (_Float16 * restrict dst, \ + _Float16 * restrict src1, _Float16 * restrict src2) \ +{ \ + int i; \ + DO_PRAGMA (GCC unroll size) \ + for (i = 0; i < size; i++) \ + dst[i] = src1[i] op src2[i]; \ +} + +VEC_OP_VV(4, +, add) +VEC_OP_VV(2, +, add) +VEC_OP_VV(4, -, sub) +VEC_OP_VV(2, -, sub) +VEC_OP_VV(4, *, mul) +VEC_OP_VV(2, *, mul) +VEC_OP_VV(4, /, div) +VEC_OP_VV(2, /, div) diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-64-32-vecop-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16-64-32-vecop-2.c new file mode 100644 index 00000000000..4dc6f9fb92e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-64-32-vecop-2.c @@ -0,0 +1,75 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ + +static void vec_op_test (void); +#define DO_TEST vec_op_test +#define AVX512FP16 +#define AVX512VL +#include "avx512f-check.h" +#include "avx512fp16-64-32-vecop-1.c" + +_Float16 a[4], b[4], fexp[4], fref[4]; + +#define EMULATE_VEC_OP_VV(size, op, name) \ +void \ +__attribute__ ((noinline, noclone)) \ +scalar_vecop_v##size##hf##name ( \ + _Float16 * restrict dst, _Float16 * restrict src1, \ + _Float16 * restrict src2) \ +{ \ + int i; \ + for (i = 0; i < size; i++) \ + dst[i] = src1[i] op src2[i]; \ +} + +EMULATE_VEC_OP_VV (4, +, add) +EMULATE_VEC_OP_VV (2, +, add) +EMULATE_VEC_OP_VV (4, -, sub) +EMULATE_VEC_OP_VV (2, -, sub) +EMULATE_VEC_OP_VV (4, *, mul) +EMULATE_VEC_OP_VV (2, *, mul) +EMULATE_VEC_OP_VV (4, /, div) +EMULATE_VEC_OP_VV (2, /, div) + +void init() +{ + int i; + for (i = 0; i < 4; i++) + { + a[i] = i + 0.5; + b[i] = i * 1.5; + fexp[i] = fref[i] = 2.75 * i; + } +} + +int check_cond(void *a, void *b, int size) +{ + int i; + unsigned short *pa = (unsigned short *)a, + *pb = (unsigned short *)b; + for (i = 0; i < size; i++) + if (pa[i] != pb[i]) + return 0; + return 1; +} + +#define TEST_VEC_OP_VV(size, name) \ +{ \ + init (); \ + scalar_vecop_v##size##hf##name (a, b, fexp); \ + vecop_v##size##hf##name (a, b, fref); \ + if (!check_cond ((void *)fexp, (void *)fref, size)) \ + abort(); \ +} + +static void vec_op_test() +{ + TEST_VEC_OP_VV (4, add) + TEST_VEC_OP_VV (2, add) + TEST_VEC_OP_VV (4, sub) + TEST_VEC_OP_VV (2, sub) + TEST_VEC_OP_VV (4, mul) + TEST_VEC_OP_VV (2, mul) + TEST_VEC_OP_VV (4, div) + TEST_VEC_OP_VV (2, div) +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-floatvnhf.c b/gcc/testsuite/gcc.target/i386/avx512fp16-floatvnhf.c index 112ac3e74d5..8471a1d1d10 100644 --- a/gcc/testsuite/gcc.target/i386/avx512fp16-floatvnhf.c +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-floatvnhf.c @@ -43,16 +43,16 @@ FLOATHFVV(2, udi) /* { dg-final { scan-assembler-times "vcvtqq2phz\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtuqq2phz\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtqq2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "vcvtuqq2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "vcvtqq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "vcvtuqq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvtqq2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuqq2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtqq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuqq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtdq2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtudq2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtdq2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtudq2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtdq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "vcvtudq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvtdq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtudq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtuw2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-trunc-extendvnhf.c b/gcc/testsuite/gcc.target/i386/avx512fp16-trunc-extendvnhf.c index 286ea9f2624..2ef901a0375 100644 --- a/gcc/testsuite/gcc.target/i386/avx512fp16-trunc-extendvnhf.c +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-trunc-extendvnhf.c @@ -41,15 +41,15 @@ EXTENDHFVV(8, sf) EXTENDHFVV(4, sf) /* { dg-final { scan-assembler-times "vcvtpd2phz\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtpd2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "vcvtpd2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvtpd2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtpd2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtps2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtps2phxy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtps2phxx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvtps2phxx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c b/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c index ee55cd12300..7a51c9dd077 100644 --- a/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c @@ -43,16 +43,16 @@ FIX_TRUNCHFVV(2, udi) /* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr102230.c b/gcc/testsuite/gcc.target/i386/pr102230.c new file mode 100644 index 00000000000..60cf1c32afe --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr102230.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16" } */ + +typedef _Float16 v4hf __attribute__ ((vector_size (8))); +typedef _Float16 v2hf __attribute__ ((vector_size (4))); + +v4hf +v4hf_abi_1 (v4hf a) +{ + return a; +} + +v4hf +v4hf_abi_3 (v4hf a, v4hf b, v4hf c) +{ + return c; +} + +/* { dg-final { scan-assembler-times "movq\[[\\t \]*%mm2, %mm0" 1 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vmovaps\[[\\t \]*%xmm2, %xmm0" 1 { target { ! ia32 } } } } */ + +v4hf +v4hf_abi_4 (v4hf a, v4hf b, v4hf c, v4hf d) +{ + return d; +} + +/* { dg-final { scan-assembler-times "movq\[[\\t \]*4\[(\]%esp\[)\], %mm0" 1 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vmovaps\[[\\t \]*%xmm3, %xmm0" 1 { target { ! ia32 } } } } */ + +v2hf +v2hf_test (v2hf a, v2hf b, v2hf c, v2hf d) +{ + return b; +} + +/* { dg-final { scan-assembler-times "movl\[[\\t \]*8\[(\]%esp\[)\], %eax" 1 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vmovaps\[[\\t \]*%xmm1, %xmm0" 1 { target { ! ia32 } } } } */ -- 2.27.1