[gcc r12-2299] AArch32: Add support for sign differing dot-product usdot for NEON.

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc r12-2299] AArch32: Add support for sign differing dot-product usdot for NEON.
@ 2021-07-14 14:44 Tamar Christina
  0 siblings, 0 replies; only message in thread
From: Tamar Christina @ 2021-07-14 14:44 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:6412c58c781f64b60e7353e762cd5cec62a863e7

commit r12-2299-g6412c58c781f64b60e7353e762cd5cec62a863e7
Author: Tamar Christina <tamar.christina@arm.com>
Date:   Wed Jul 14 15:20:45 2021 +0100

    AArch32: Add support for sign differing dot-product usdot for NEON.
    
    This adds optabs implementing usdot_prod.
    
    The following testcase:
    
    #define N 480
    #define SIGNEDNESS_1 unsigned
    #define SIGNEDNESS_2 signed
    #define SIGNEDNESS_3 signed
    #define SIGNEDNESS_4 unsigned
    
    SIGNEDNESS_1 int __attribute__ ((noipa))
    f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
       SIGNEDNESS_4 char *restrict b)
    {
      for (__INTPTR_TYPE__ i = 0; i < N; ++i)
        {
          int av = a[i];
          int bv = b[i];
          SIGNEDNESS_2 short mult = av * bv;
          res += mult;
        }
      return res;
    }
    
    Generates
    
    f:
            vmov.i32        q8, #0  @ v4si
            add     r3, r2, #480
    .L2:
            vld1.8  {q10}, [r2]!
            vld1.8  {q9}, [r1]!
            vusdot.s8       q8, q9, q10
            cmp     r3, r2
            bne     .L2
            vadd.i32        d16, d16, d17
            vpadd.i32       d16, d16, d16
            vmov.32 r3, d16[0]
            add     r0, r0, r3
            bx      lr
    
    instead of
    
    f:
            vmov.i32        q8, #0  @ v4si
            add     r3, r2, #480
    .L2:
            vld1.8  {q9}, [r2]!
            vld1.8  {q11}, [r1]!
            cmp     r3, r2
            vmull.s8 q10, d18, d22
            vmull.s8 q9, d19, d23
            vaddw.s16       q8, q8, d20
            vaddw.s16       q8, q8, d21
            vaddw.s16       q8, q8, d18
            vaddw.s16       q8, q8, d19
            bne     .L2
            vadd.i32        d16, d16, d17
            vpadd.i32       d16, d16, d16
            vmov.32 r3, d16[0]
            add     r0, r0, r3
            bx      lr
    
    For NEON.  I couldn't figure out if the MVE instruction vmlaldav.s16 could be
    used to emulate this.  Because it would require additional widening to work I
    left MVE out of this patch set but perhaps someone should take a look.
    
    gcc/ChangeLog:
    
            * config/arm/neon.md (usdot_prod<vsi2qi>): New.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/arm/simd/vusdot-autovec.c: New test.

Diff:
---
 gcc/config/arm/neon.md                             | 12 +++++++
 gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c | 38 ++++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 64365e0a909..8b0a396947c 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -2969,6 +2969,18 @@
   DONE;
 })
 
+;; Auto-vectorizer pattern for usdot
+(define_expand "usdot_prod<vsi2qi>"
+  [(set (match_operand:VCVTI 0 "register_operand")
+	(plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
+							"register_operand")
+				   (match_operand:<VSI2QI> 2
+							"register_operand")]
+		     UNSPEC_DOT_US)
+		    (match_operand:VCVTI 3 "register_operand")))]
+  "TARGET_I8MM"
+)
+
 (define_expand "neon_copysignf<mode>"
   [(match_operand:VCVTF 0 "register_operand")
    (match_operand:VCVTF 1 "register_operand")
diff --git a/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c
new file mode 100644
index 00000000000..7cc56f68817
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */
+
+#define N 480
+#define SIGNEDNESS_1 unsigned
+#define SIGNEDNESS_2 signed
+#define SIGNEDNESS_3 signed
+#define SIGNEDNESS_4 unsigned
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
+   SIGNEDNESS_4 char *restrict b)
+{
+  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+    {
+      int av = a[i];
+      int bv = b[i];
+      SIGNEDNESS_2 short mult = av * bv;
+      res += mult;
+    }
+  return res;
+}
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b,
+   SIGNEDNESS_4 char *restrict a)
+{
+  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+    {
+      int av = a[i];
+      int bv = b[i];
+      SIGNEDNESS_2 short mult = av * bv;
+      res += mult;
+    }
+  return res;
+}
+
+/* { dg-final { scan-assembler-times {vusdot.s8} 2 { target { arm-*-*-gnueabihf } } } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-07-14 14:44 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-14 14:44 [gcc r12-2299] AArch32: Add support for sign differing dot-product usdot for NEON Tamar Christina

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).