[gcc r12-2321] Revert "AArch64: Correct dot-product auto-vect optab RTL"

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc r12-2321] Revert "AArch64: Correct dot-product auto-vect optab RTL"
@ 2021-07-15 12:17 Tamar Christina
  0 siblings, 0 replies; only message in thread
From: Tamar Christina @ 2021-07-15 12:17 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:5402023f05e8fc28c2f1cfd7107264403b118a17

commit r12-2321-g5402023f05e8fc28c2f1cfd7107264403b118a17
Author: Tamar Christina <tamar.christina@arm.com>
Date:   Thu Jul 15 13:16:00 2021 +0100

    Revert "AArch64: Correct dot-product auto-vect optab RTL"
    
    This reverts commit 6d1cdb27828d2ef1ae1ab0209836646a269b9610.

Diff:
---
 gcc/config/aarch64/aarch64-simd-builtins.def |  4 +-
 gcc/config/aarch64/aarch64-simd.md           | 62 +++++++++++++++++-----------
 gcc/config/aarch64/arm_neon.h                |  8 ++--
 3 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 99e7348c5d0..063f503ebd9 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -375,8 +375,8 @@
   BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0, NONE)
 
   /* Implemented by <sur><dotprod>_prod<dot_mode>.  */
-  BUILTIN_VB (TERNOP, sdot_prod, 10, NONE)
-  BUILTIN_VB (TERNOPU, udot_prod, 10, NONE)
+  BUILTIN_VB (TERNOP, sdot, 0, NONE)
+  BUILTIN_VB (TERNOPU, udot, 0, NONE)
   BUILTIN_VB (TERNOP_SSUS, usdot_prod, 10, NONE)
   /* Implemented by aarch64_<sur><dotprod>_lane{q}<dot_mode>.  */
   BUILTIN_VB (QUADOP_LANE, sdot_lane, 0, NONE)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 88fa5ba5a44..74890989cb3 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -587,28 +587,8 @@
   DONE;
 })
 
-;; These expands map to the Dot Product optab the vectorizer checks for
-;; and to the intrinsics patttern.
-;; The auto-vectorizer expects a dot product builtin that also does an
-;; accumulation into the provided register.
-;; Given the following pattern
-;;
-;; for (i=0; i<len; i++) {
-;;     c = a[i] * b[i];
-;;     r += c;
-;; }
-;; return result;
-;;
-;; This can be auto-vectorized to
-;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
-;;
-;; given enough iterations.  However the vectorizer can keep unrolling the loop
-;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
-;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
-;; ...
-;;
-;; and so the vectorizer provides r, in which the result has to be accumulated.
-(define_insn "<sur>dot_prod<vsi2qi>"
+;; These instructions map to the __builtins for the Dot Product operations.
+(define_insn "aarch64_<sur>dot<vsi2qi>"
   [(set (match_operand:VS 0 "register_operand" "=w")
 	(plus:VS (match_operand:VS 1 "register_operand" "0")
 		(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
@@ -633,6 +613,41 @@
   [(set_attr "type" "neon_dot<q>")]
 )
 
+;; These expands map to the Dot Product optab the vectorizer checks for.
+;; The auto-vectorizer expects a dot product builtin that also does an
+;; accumulation into the provided register.
+;; Given the following pattern
+;;
+;; for (i=0; i<len; i++) {
+;;     c = a[i] * b[i];
+;;     r += c;
+;; }
+;; return result;
+;;
+;; This can be auto-vectorized to
+;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
+;;
+;; given enough iterations.  However the vectorizer can keep unrolling the loop
+;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
+;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
+;; ...
+;;
+;; and so the vectorizer provides r, in which the result has to be accumulated.
+(define_expand "<sur>dot_prod<vsi2qi>"
+  [(set (match_operand:VS 0 "register_operand")
+	(plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
+			    (match_operand:<VSI2QI> 2 "register_operand")]
+		 DOTPROD)
+		(match_operand:VS 3 "register_operand")))]
+  "TARGET_DOTPROD"
+{
+  emit_insn (
+    gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
+				    operands[2]));
+  emit_insn (gen_rtx_SET (operands[0], operands[3]));
+  DONE;
+})
+
 ;; These instructions map to the __builtins for the Dot Product
 ;; indexed operations.
 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
@@ -929,7 +944,8 @@
 	rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
 	rtx abd = gen_reg_rtx (V16QImode);
 	emit_insn (gen_aarch64_<sur>abdv16qi (abd, operands[1], operands[2]));
-	emit_insn (gen_udot_prodv16qi (operands[0], operands[3], abd, ones));
+	emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
+					  abd, ones));
 	DONE;
       }
     rtx reduc = gen_reg_rtx (V8HImode);
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 597f44ce106..00d76ea937a 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -31767,28 +31767,28 @@ __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vdot_u32 (uint32x2_t __r, uint8x8_t __a, uint8x8_t __b)
 {
-  return __builtin_aarch64_udot_prodv8qi_uuuu (__r, __a, __b);
+  return __builtin_aarch64_udotv8qi_uuuu (__r, __a, __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vdotq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b)
 {
-  return __builtin_aarch64_udot_prodv16qi_uuuu (__r, __a, __b);
+  return __builtin_aarch64_udotv16qi_uuuu (__r, __a, __b);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vdot_s32 (int32x2_t __r, int8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_sdot_prodv8qi (__r, __a, __b);
+  return __builtin_aarch64_sdotv8qi (__r, __a, __b);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vdotq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b)
 {
-  return __builtin_aarch64_sdot_prodv16qi (__r, __a, __b);
+  return __builtin_aarch64_sdotv16qi (__r, __a, __b);
 }
 
 __extension__ extern __inline uint32x2_t


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-07-15 12:17 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-15 12:17 [gcc r12-2321] Revert "AArch64: Correct dot-product auto-vect optab RTL" Tamar Christina

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).