[gcc r14-422] aarch64: PR target/99195 annotate simple floating-point patterns for vec-concat with zero

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc r14-422] aarch64: PR target/99195 annotate simple floating-point patterns for vec-concat with zero
@ 2023-05-03 10:16 Kyrylo Tkachov
  0 siblings, 0 replies; only message in thread
From: Kyrylo Tkachov @ 2023-05-03 10:16 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:1133cfab47258c147fcb2d453465d10e72acbfd9

commit r14-422-g1133cfab47258c147fcb2d453465d10e72acbfd9
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Wed May 3 11:15:34 2023 +0100

    aarch64: PR target/99195 annotate simple floating-point patterns for vec-concat with zero
    
    Continuing the, almost mechanical, series this patch adds annotation for some of the simple
    floating-point patterns we have, and adds testing to ensure that redundant zeroing instructions
    are eliminated.
    
    Bootstrapped and tested on aarch64-none-linux-gnu and also aarch64_be-none-elf.
    
    gcc/ChangeLog:
    
            PR target/99195
            * config/aarch64/aarch64-simd.md (add<mode>3): Rename to...
            (add<mode>3<vczle><vczbe>): ... This.
            (sub<mode>3): Rename to...
            (sub<mode>3<vczle><vczbe>): ... This.
            (mul<mode>3): Rename to...
            (mul<mode>3<vczle><vczbe>): ... This.
            (*div<mode>3): Rename to...
            (*div<mode>3<vczle><vczbe>): ... This.
            (neg<mode>2): Rename to...
            (neg<mode>2<vczle><vczbe>): ... This.
            (abs<mode>2): Rename to...
            (abs<mode>2<vczle><vczbe>): ... This.
            (<frint_pattern><mode>2): Rename to...
            (<frint_pattern><mode>2<vczle><vczbe>): ... This.
            (<fmaxmin><mode>3): Rename to...
            (<fmaxmin><mode>3<vczle><vczbe>): ... This.
            (*sqrt<mode>2): Rename to...
            (*sqrt<mode>2<vczle><vczbe>): ... This.
    
    gcc/testsuite/ChangeLog:
    
            PR target/99195
            * gcc.target/aarch64/simd/pr99195_1.c: Add testing for some unary
            and binary floating-point ops.
            * gcc.target/aarch64/simd/pr99195_2.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md                | 18 +++---
 gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c | 11 ++++
 gcc/testsuite/gcc.target/aarch64/simd/pr99195_2.c | 72 +++++++++++++++++++++++
 3 files changed, 92 insertions(+), 9 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 0a1f12cc3d0..9ba435fd252 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2846,7 +2846,7 @@
 
 ;; FP arithmetic operations.
 
-(define_insn "add<mode>3"
+(define_insn "add<mode>3<vczle><vczbe>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
 		   (match_operand:VHSDF 2 "register_operand" "w")))]
@@ -2855,7 +2855,7 @@
   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
 )
 
-(define_insn "sub<mode>3"
+(define_insn "sub<mode>3<vczle><vczbe>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
 		    (match_operand:VHSDF 2 "register_operand" "w")))]
@@ -2864,7 +2864,7 @@
   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
 )
 
-(define_insn "mul<mode>3"
+(define_insn "mul<mode>3<vczle><vczbe>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
 		   (match_operand:VHSDF 2 "register_operand" "w")))]
@@ -2885,7 +2885,7 @@
   operands[1] = force_reg (<MODE>mode, operands[1]);
 })
 
-(define_insn "*div<mode>3"
+(define_insn "*div<mode>3<vczle><vczbe>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
 		 (match_operand:VHSDF 2 "register_operand" "w")))]
@@ -2915,7 +2915,7 @@
   }
 )
 
-(define_insn "neg<mode>2"
+(define_insn "neg<mode>2<vczle><vczbe>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
  "TARGET_SIMD"
@@ -2923,7 +2923,7 @@
   [(set_attr "type" "neon_fp_neg_<stype><q>")]
 )
 
-(define_insn "abs<mode>2"
+(define_insn "abs<mode>2<vczle><vczbe>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
  "TARGET_SIMD"
@@ -3228,7 +3228,7 @@
 
 ;; Vector versions of the floating-point frint patterns.
 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
-(define_insn "<frint_pattern><mode>2"
+(define_insn "<frint_pattern><mode>2<vczle><vczbe>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
 		       FRINT))]
@@ -3583,7 +3583,7 @@
 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
 ;; which implement the IEEE fmax ()/fmin () functions.
-(define_insn "<fmaxmin><mode>3"
+(define_insn "<fmaxmin><mode>3<vczle><vczbe>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
 		      (match_operand:VHSDF 2 "register_operand" "w")]
@@ -6960,7 +6960,7 @@
     DONE;
 })
 
-(define_insn "*sqrt<mode>2"
+(define_insn "*sqrt<mode>2<vczle><vczbe>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 	(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
   "TARGET_SIMD"
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
index 96834425d51..29a2e90e92a 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
@@ -41,6 +41,14 @@ OPFIVE (T, IS, OS, S, OP2, OP3, OP4, OP5, OP6)
 FUNC (T, IS, OS, OP1, S)                \
 OPSIX (T, IS, OS, S, OP2, OP3, OP4, OP5, OP6, OP7)
 
+#define OPEIGHT(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8)        \
+OPTHREE (T, IS, OS, S, OP1, OP2, OP3)                \
+OPFIVE (T, IS, OS, S, OP4, OP5, OP6, OP7, OP8)
+
+#define OPTEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10)        \
+OPFIVE (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5)                \
+OPFIVE (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10)
+
 #define OPELEVEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11)        \
 OPFIVE (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5)                \
 OPSIX (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10, OP11)
@@ -53,6 +61,8 @@ OPELEVEN (uint8, 8, 16, u8, padd, add, sub, mul, and, orr, eor, orn, bic, max, m
 OPELEVEN (uint16, 4, 8, u16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min)
 OPELEVEN (uint32, 2, 4, u32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min)
 
+OPEIGHT (float32, 2, 4, f32, add, sub, mul, div, max, maxnm, min, minnm)
+
 #define UNARY(OT,IT,OP,S)			\
 OT                                              \
 foo_##IT##OP##_##S (IT a)                     \
@@ -71,6 +81,7 @@ OPFIVE (uint8, 8, 16, u8, rbit, clz, cnt, cls, mvn)
 OPTHREE (uint16, 4, 8, u16, clz, cls, mvn)
 OPTHREE (uint32, 2, 4, u32, clz, cls, mvn)
 
+OPTEN (float32, 2, 4, f32, neg, abs, sqrt, rnd, rndi, rndm, rnda, rndn, rndp, rndx)
 /* { dg-final { scan-assembler-not {\tfmov\t} } }  */
 /* { dg-final { scan-assembler-not {\tmov\t} } }  */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_2.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_2.c
new file mode 100644
index 00000000000..603c5ab1439
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_2.c
@@ -0,0 +1,72 @@
+/* PR target/99195.  */
+/*  Check that we take advantage of 64-bit Advanced SIMD operations clearing
+    the top half of the vector register and no explicit zeroing instructions
+    are emitted.  */
+/* { dg-do compile } */
+/* { dg-options "-O -march=armv8.2-a+fp16" } */
+
+#include <arm_neon.h>
+
+#define BINARY(OT,IT,OP,S)                         \
+OT                                              \
+foo_##OP##_##S (IT a, IT b)                     \
+{                                               \
+  IT zeros = vcreate_##S (0);                   \
+  return vcombine_##S (v##OP##_##S (a, b), zeros);      \
+}
+
+#define FUNC(T,IS,OS,OP,S) BINARY (T##x##OS##_t, T##x##IS##_t, OP, S)
+
+#define OPTWO(T,IS,OS,S,OP1,OP2)        \
+FUNC (T, IS, OS, OP1, S)                \
+FUNC (T, IS, OS, OP2, S)
+
+#define OPTHREE(T, IS, OS, S, OP1, OP2, OP3)    \
+FUNC (T, IS, OS, OP1, S)        \
+OPTWO (T, IS, OS, S, OP2, OP3)
+
+#define OPFOUR(T,IS,OS,S,OP1,OP2,OP3,OP4)       \
+FUNC (T, IS, OS, OP1, S)                \
+OPTHREE (T, IS, OS, S, OP2, OP3, OP4)
+
+#define OPFIVE(T,IS,OS,S,OP1,OP2,OP3,OP4, OP5)  \
+FUNC (T, IS, OS, OP1, S)                \
+OPFOUR (T, IS, OS, S, OP2, OP3, OP4, OP5)
+
+#define OPSIX(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6)        \
+FUNC (T, IS, OS, OP1, S)                \
+OPFIVE (T, IS, OS, S, OP2, OP3, OP4, OP5, OP6)
+
+#define OPSEVEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7)        \
+FUNC (T, IS, OS, OP1, S)                \
+OPSIX (T, IS, OS, S, OP2, OP3, OP4, OP5, OP6, OP7)
+
+#define OPEIGHT(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8)        \
+OPTHREE (T, IS, OS, S, OP1, OP2, OP3)                \
+OPFIVE (T, IS, OS, S, OP4, OP5, OP6, OP7, OP8)
+
+#define OPTEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10)        \
+OPFIVE (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5)                \
+OPFIVE (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10)
+
+#define OPELEVEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11)        \
+OPFIVE (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5)                \
+OPSIX (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10, OP11)
+
+OPEIGHT (float16, 4, 8, f16, add, sub, mul, div, max, maxnm, min, minnm)
+
+#define UNARY(OT,IT,OP,S)			\
+OT                                              \
+foo_##IT##OP##_##S (IT a)                     \
+{                                               \
+  IT zeros = vcreate_##S (0);                   \
+  return vcombine_##S ((IT) v##OP##_##S (a), zeros);      \
+}
+
+#undef FUNC
+#define FUNC(T,IS,OS,OP,S) UNARY (T##x##OS##_t, T##x##IS##_t, OP, S)
+
+OPTEN (float16, 4, 8, f16, neg, abs, sqrt, rnd, rndi, rndm, rnda, rndn, rndp, rndx)
+/* { dg-final { scan-assembler-not {\tfmov\t} } }  */
+/* { dg-final { scan-assembler-not {\tmov\t} } }  */
+

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-05-03 10:16 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-03 10:16 [gcc r14-422] aarch64: PR target/99195 annotate simple floating-point patterns for vec-concat with zero Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).