From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1652) id F01AE3890435; Mon, 17 May 2021 12:32:14 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org F01AE3890435 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Christophe Lyon To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-835] arm: Auto-vectorization for MVE: add __fp16 support to VCMP X-Act-Checkin: gcc X-Git-Author: Christophe Lyon X-Git-Refname: refs/heads/master X-Git-Oldrev: a6eacbf1055520e968d1a25f6d30d6ff4b66272d X-Git-Newrev: 7606865198b241b4c944f66761d6506b02ead951 Message-Id: <20210517123214.F01AE3890435@sourceware.org> Date: Mon, 17 May 2021 12:32:14 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 17 May 2021 12:32:15 -0000 https://gcc.gnu.org/g:7606865198b241b4c944f66761d6506b02ead951 commit r12-835-g7606865198b241b4c944f66761d6506b02ead951 Author: Christophe Lyon Date: Mon May 17 12:31:58 2021 +0000 arm: Auto-vectorization for MVE: add __fp16 support to VCMP This patch adds __fp16 support to the previous patch that added vcmp support with MVE. For this we update existing expanders to use VDQWH iterator, and add a new expander vcond. In the process we need to create suitable iterators, and update v_cmp_result as needed. 2021-05-17 Christophe Lyon gcc/ * config/arm/iterators.md (V16): New iterator. (VH_cvtto): New iterator. (v_cmp_result): Added V4HF and V8HF support. * config/arm/vec-common.md (vec_cmp): Use VDQWH. (vcond): Likewise. (vcond_mask_): Likewise. (vcond): New expander. gcc/testsuite/ * gcc.target/arm/simd/mve-compare-3.c: New test with GCC vectors. * gcc.target/arm/simd/mve-vcmp-f16.c: New test for auto-vectorization. * gcc.target/arm/armv8_2-fp16-arith-1.c: Adjust since we now vectorize float16_t vectors. Diff: --- gcc/config/arm/iterators.md | 6 ++++ gcc/config/arm/vec-common.md | 40 +++++++++++++++------- .../gcc.target/arm/armv8_2-fp16-arith-1.c | 16 +++++++-- gcc/testsuite/gcc.target/arm/simd/mve-compare-3.c | 38 ++++++++++++++++++++ gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f16.c | 30 ++++++++++++++++ 5 files changed, 116 insertions(+), 14 deletions(-) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index a128465feea..3042bafc6c6 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -231,6 +231,9 @@ ;; Vector modes for 16-bit floating-point support. (define_mode_iterator VH [V8HF V4HF]) +;; Modes with 16-bit elements only. +(define_mode_iterator V16 [V4HI V4HF V8HI V8HF]) + ;; 16-bit floating-point vector modes suitable for moving (includes BFmode). (define_mode_iterator VHFBF [V8HF V4HF V4BF V8BF]) @@ -571,6 +574,8 @@ ;; (Opposite) mode to convert to/from for vector-half mode conversions. (define_mode_attr VH_CVTTO [(V4HI "V4HF") (V4HF "V4HI") (V8HI "V8HF") (V8HF "V8HI")]) +(define_mode_attr VH_cvtto [(V4HI "v4hf") (V4HF "v4hi") + (V8HI "v8hf") (V8HF "v8hi")]) ;; Define element mode for each vector mode. (define_mode_attr V_elem [(V8QI "QI") (V16QI "QI") @@ -720,6 +725,7 @@ (define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi") (V4HI "v4hi") (V8HI "v8hi") (V2SI "v2si") (V4SI "v4si") + (V4HF "v4hi") (V8HF "v8hi") (DI "di") (V2DI "v2di") (V2SF "v2si") (V4SF "v4si")]) diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 448731f7be9..265fa40e747 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -366,8 +366,8 @@ (define_expand "vec_cmp" [(set (match_operand: 0 "s_register_operand") (match_operator: 1 "comparison_operator" - [(match_operand:VDQW 2 "s_register_operand") - (match_operand:VDQW 3 "reg_or_zero_operand")]))] + [(match_operand:VDQWH 2 "s_register_operand") + (match_operand:VDQWH 3 "reg_or_zero_operand")]))] "ARM_HAVE__ARITH && !TARGET_REALLY_IWMMXT && (! || flag_unsafe_math_optimizations)" @@ -399,13 +399,13 @@ ;; element-wise. (define_expand "vcond" - [(set (match_operand:VDQW 0 "s_register_operand") - (if_then_else:VDQW + [(set (match_operand:VDQWH 0 "s_register_operand") + (if_then_else:VDQWH (match_operator 3 "comparison_operator" - [(match_operand:VDQW 4 "s_register_operand") - (match_operand:VDQW 5 "reg_or_zero_operand")]) - (match_operand:VDQW 1 "s_register_operand") - (match_operand:VDQW 2 "s_register_operand")))] + [(match_operand:VDQWH 4 "s_register_operand") + (match_operand:VDQWH 5 "reg_or_zero_operand")]) + (match_operand:VDQWH 1 "s_register_operand") + (match_operand:VDQWH 2 "s_register_operand")))] "ARM_HAVE__ARITH && !TARGET_REALLY_IWMMXT && (! || flag_unsafe_math_optimizations)" @@ -430,6 +430,22 @@ DONE; }) +(define_expand "vcond" + [(set (match_operand: 0 "s_register_operand") + (if_then_else: + (match_operator 3 "comparison_operator" + [(match_operand:V16 4 "s_register_operand") + (match_operand:V16 5 "reg_or_zero_operand")]) + (match_operand: 1 "s_register_operand") + (match_operand: 2 "s_register_operand")))] + "ARM_HAVE__ARITH + && !TARGET_REALLY_IWMMXT + && (! || flag_unsafe_math_optimizations)" +{ + arm_expand_vcond (operands, mode); + DONE; +}) + (define_expand "vcondu" [(set (match_operand:VDQW 0 "s_register_operand") (if_then_else:VDQW @@ -446,11 +462,11 @@ }) (define_expand "vcond_mask_" - [(set (match_operand:VDQW 0 "s_register_operand") - (if_then_else:VDQW + [(set (match_operand:VDQWH 0 "s_register_operand") + (if_then_else:VDQWH (match_operand: 3 "s_register_operand") - (match_operand:VDQW 1 "s_register_operand") - (match_operand:VDQW 2 "s_register_operand")))] + (match_operand:VDQWH 1 "s_register_operand") + (match_operand:VDQWH 2 "s_register_operand")))] "ARM_HAVE__ARITH && !TARGET_REALLY_IWMMXT && (! || flag_unsafe_math_optimizations)" diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c index 921d26e2ed4..52b87376dc7 100644 --- a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c +++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c @@ -104,8 +104,20 @@ TEST_CMP (greaterthanqual, >=, int16x8_t, float16x8_t) /* { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ /* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ -/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 26 } } */ -/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 52 } } */ + +/* For float16_t. */ +/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 4 } } */ + +/* For float16x4_t. */ +/* { dg-final { scan-assembler-times {vceq\.f16\td[0-9]+, d[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vcge\.f16\td[0-9]+, d[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vcgt\.f16\td[0-9]+, d[0-9]+} 2 } } */ + +/* For float16x8_t. */ +/* { dg-final { scan-assembler-times {vceq\.f16\tq[0-9]+, q[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vcge\.f16\tq[0-9]+, q[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vcgt\.f16\tq[0-9]+, q[0-9]+} 2 } } */ /* { dg-final { scan-assembler-not {vadd\.f32} } } */ /* { dg-final { scan-assembler-not {vsub\.f32} } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-compare-3.c b/gcc/testsuite/gcc.target/arm/simd/mve-compare-3.c new file mode 100644 index 00000000000..76f81e8df92 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-compare-3.c @@ -0,0 +1,38 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */ + +/* float 16 tests. */ + +#ifndef ELEM_TYPE +#define ELEM_TYPE __fp16 +#endif +#ifndef INT_ELEM_TYPE +#define INT_ELEM_TYPE __INT16_TYPE__ +#endif + +#define COMPARE(NAME, OP) \ + int_vec \ + cmp_##NAME##_reg (vec a, vec b) \ + { \ + return a OP b; \ + } + +typedef INT_ELEM_TYPE int_vec __attribute__((vector_size(16))); +typedef ELEM_TYPE vec __attribute__((vector_size(16))); + +COMPARE (eq, ==) +COMPARE (ne, !=) +COMPARE (lt, <) +COMPARE (le, <=) +COMPARE (gt, >) +COMPARE (ge, >=) + +/* eq, ne, lt, le, gt, ge. +/* { dg-final { scan-assembler-times {\tvcmp.f16\teq, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tne, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tlt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tle, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tgt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tge, q[0-9]+, q[0-9]+\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f16.c b/gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f16.c new file mode 100644 index 00000000000..dbae2d17515 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f16.c @@ -0,0 +1,30 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */ + +#include + +#define NB 8 + +#define FUNC(OP, NAME) \ + void test_ ## NAME ##_f (__fp16 * __restrict__ dest, __fp16 *a, __fp16 *b) { \ + int i; \ + for (i=0; i, vcmpgt) +FUNC(>=, vcmpge) + +/* { dg-final { scan-assembler-times {\tvcmp.f16\teq, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tne, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tlt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tle, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tgt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tge, q[0-9]+, q[0-9]+\n} 1 } } */