public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Tamar Christina <tamar.christina@arm.com>
To: gcc-patches@gcc.gnu.org
Cc: nd@arm.com
Subject: [PATCH 3/4][AArch32]: Add support for sign differing dot-product usdot for NEON.
Date: Wed, 5 May 2021 18:39:25 +0100	[thread overview]
Message-ID: <20210505173923.GA20719@arm.com> (raw)
In-Reply-To: <patch-14433-tamar@arm.com>

[-- Attachment #1: Type: text/plain, Size: 4164 bytes --]

Hi All,

This adds optabs implementing usdot_prod.

The following testcase:

#define N 480
#define SIGNEDNESS_1 unsigned
#define SIGNEDNESS_2 signed
#define SIGNEDNESS_3 signed
#define SIGNEDNESS_4 unsigned

SIGNEDNESS_1 int __attribute__ ((noipa))
f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
   SIGNEDNESS_4 char *restrict b)
{
  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
    {
      int av = a[i];
      int bv = b[i];
      SIGNEDNESS_2 short mult = av * bv;
      res += mult;
    }
  return res;
}

Generates

f:
        vmov.i32        q8, #0  @ v4si
        add     r3, r2, #480
.L2:
        vld1.8  {q10}, [r2]!
        vld1.8  {q9}, [r1]!
        vusdot.s8       q8, q9, q10
        cmp     r3, r2
        bne     .L2
        vadd.i32        d16, d16, d17
        vpadd.i32       d16, d16, d16
        vmov.32 r3, d16[0]
        add     r0, r0, r3
        bx      lr

instead of

f:
        vmov.i32        q8, #0  @ v4si
        add     r3, r2, #480
.L2:
        vld1.8  {q9}, [r2]!
        vld1.8  {q11}, [r1]!
        cmp     r3, r2
        vmull.s8 q10, d18, d22
        vmull.s8 q9, d19, d23
        vaddw.s16       q8, q8, d20
        vaddw.s16       q8, q8, d21
        vaddw.s16       q8, q8, d18
        vaddw.s16       q8, q8, d19
        bne     .L2
        vadd.i32        d16, d16, d17
        vpadd.i32       d16, d16, d16
        vmov.32 r3, d16[0]
        add     r0, r0, r3
        bx      lr

For NEON.  I couldn't figure out if the MVE instruction vmlaldav.s16 could be
used to emulate this.  Because it would require additional widening to work I
left MVE out of this patch set but perhaps someone should take a look.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* config/arm/neon.md (usdot_prod<vsi2qi>): New.

gcc/testsuite/ChangeLog:

	* gcc.target/arm/simd/vusdot-autovec.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index fec2cc91d24b6eff7b6fc8fdd54f39b3d646c468..23ad411178db77c5d19bee7452bc1070331c1aa0 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -3075,6 +3075,24 @@ (define_expand "<sup>dot_prod<vsi2qi>"
   DONE;
 })
 
+;; Auto-vectorizer pattern for usdot
+(define_expand "usdot_prod<vsi2qi>"
+  [(set (match_operand:VCVTI 0 "register_operand")
+	(plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
+							"register_operand")
+				   (match_operand:<VSI2QI> 2
+							"register_operand")]
+		     UNSPEC_DOT_US)
+		    (match_operand:VCVTI 3 "register_operand")))]
+  "TARGET_I8MM"
+{
+  emit_insn (
+    gen_neon_usdot<vsi2qi> (operands[3], operands[3], operands[1],
+			    operands[2]));
+  emit_insn (gen_rtx_SET (operands[0], operands[3]));
+  DONE;
+})
+
 (define_expand "neon_copysignf<mode>"
   [(match_operand:VCVTF 0 "register_operand")
    (match_operand:VCVTF 1 "register_operand")
diff --git a/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c
new file mode 100644
index 0000000000000000000000000000000000000000..7cc56f68817d77d6950df0ab372d6fbaad6b3813
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */
+
+#define N 480
+#define SIGNEDNESS_1 unsigned
+#define SIGNEDNESS_2 signed
+#define SIGNEDNESS_3 signed
+#define SIGNEDNESS_4 unsigned
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
+   SIGNEDNESS_4 char *restrict b)
+{
+  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+    {
+      int av = a[i];
+      int bv = b[i];
+      SIGNEDNESS_2 short mult = av * bv;
+      res += mult;
+    }
+  return res;
+}
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b,
+   SIGNEDNESS_4 char *restrict a)
+{
+  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+    {
+      int av = a[i];
+      int bv = b[i];
+      SIGNEDNESS_2 short mult = av * bv;
+      res += mult;
+    }
+  return res;
+}
+
+/* { dg-final { scan-assembler-times {vusdot.s8} 2 { target { arm-*-*-gnueabihf } } } } */


-- 

[-- Attachment #2: rb14435.patch --]
[-- Type: text/x-diff, Size: 2232 bytes --]

diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index fec2cc91d24b6eff7b6fc8fdd54f39b3d646c468..23ad411178db77c5d19bee7452bc1070331c1aa0 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -3075,6 +3075,24 @@ (define_expand "<sup>dot_prod<vsi2qi>"
   DONE;
 })
 
+;; Auto-vectorizer pattern for usdot
+(define_expand "usdot_prod<vsi2qi>"
+  [(set (match_operand:VCVTI 0 "register_operand")
+	(plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
+							"register_operand")
+				   (match_operand:<VSI2QI> 2
+							"register_operand")]
+		     UNSPEC_DOT_US)
+		    (match_operand:VCVTI 3 "register_operand")))]
+  "TARGET_I8MM"
+{
+  emit_insn (
+    gen_neon_usdot<vsi2qi> (operands[3], operands[3], operands[1],
+			    operands[2]));
+  emit_insn (gen_rtx_SET (operands[0], operands[3]));
+  DONE;
+})
+
 (define_expand "neon_copysignf<mode>"
   [(match_operand:VCVTF 0 "register_operand")
    (match_operand:VCVTF 1 "register_operand")
diff --git a/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c
new file mode 100644
index 0000000000000000000000000000000000000000..7cc56f68817d77d6950df0ab372d6fbaad6b3813
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */
+
+#define N 480
+#define SIGNEDNESS_1 unsigned
+#define SIGNEDNESS_2 signed
+#define SIGNEDNESS_3 signed
+#define SIGNEDNESS_4 unsigned
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
+   SIGNEDNESS_4 char *restrict b)
+{
+  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+    {
+      int av = a[i];
+      int bv = b[i];
+      SIGNEDNESS_2 short mult = av * bv;
+      res += mult;
+    }
+  return res;
+}
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b,
+   SIGNEDNESS_4 char *restrict a)
+{
+  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+    {
+      int av = a[i];
+      int bv = b[i];
+      SIGNEDNESS_2 short mult = av * bv;
+      res += mult;
+    }
+  return res;
+}
+
+/* { dg-final { scan-assembler-times {vusdot.s8} 2 { target { arm-*-*-gnueabihf } } } } */


  parent reply	other threads:[~2021-05-05 17:39 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-05 17:38 [PATCH 1/4]middle-end Vect: Add support for dot-product where the sign for the multiplicant changes Tamar Christina
2021-05-05 17:38 ` [PATCH 2/4]AArch64: Add support for sign differing dot-product usdot for NEON and SVE Tamar Christina
2021-05-10 16:49   ` Richard Sandiford
2021-05-25 14:57     ` Tamar Christina
2021-05-26  8:50       ` Richard Sandiford
2021-05-05 17:39 ` Tamar Christina [this message]
2021-05-05 17:42   ` FW: [PATCH 3/4][AArch32]: Add support for sign differing dot-product usdot for NEON Tamar Christina
     [not found]     ` <VI1PR08MB5325B832EE3BB6139886C0E9FF259@VI1PR08MB5325.eurprd08.prod.outlook.com>
2021-05-25 15:02       ` Tamar Christina
2021-05-26 10:45         ` Kyrylo Tkachov
2021-05-06  9:23   ` Christophe Lyon
2021-05-06  9:27     ` Tamar Christina
2021-05-05 17:39 ` [PATCH 4/4]middle-end: Add tests middle end generic tests for sign differing dotproduct Tamar Christina
     [not found]   ` <VI1PR08MB532511701573C18A33AC6291FF259@VI1PR08MB5325.eurprd08.prod.outlook.com>
2021-05-25 15:01     ` FW: " Tamar Christina
     [not found]     ` <11s2181-8856-30rq-26or-84q8o7qrr2o@fhfr.qr>
2021-05-26  8:48       ` Tamar Christina
2021-06-14 12:08       ` Tamar Christina
2021-05-07 11:45 ` [PATCH 1/4]middle-end Vect: Add support for dot-product where the sign for the multiplicant changes Richard Biener
2021-05-07 12:42   ` Tamar Christina
2021-05-10 11:39     ` Richard Biener
2021-05-10 12:58       ` Tamar Christina
2021-05-10 13:29         ` Richard Biener
2021-05-25 14:57           ` Tamar Christina
2021-05-26  8:56             ` Richard Biener
2021-06-02  9:28               ` Tamar Christina
2021-06-04 10:12                 ` Tamar Christina
2021-06-07 10:10                   ` Richard Sandiford
2021-06-14 12:06                     ` Tamar Christina
2021-06-21  8:11                       ` Tamar Christina
2021-06-22 10:56                       ` Richard Sandiford
2021-06-22 11:16                         ` Richard Sandiford
2021-07-12  9:18                           ` Tamar Christina
2021-07-12  9:39                             ` Richard Sandiford
2021-07-12  9:56                               ` Tamar Christina
2021-07-12 10:25                                 ` Richard Sandiford
2021-07-12 12:29                                   ` Tamar Christina
2021-07-12 14:55                                     ` Richard Sandiford

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210505173923.GA20719@arm.com \
    --to=tamar.christina@arm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=nd@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).