public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r11-6160] AArch64: Add NEON, SVE and SVE2 RTL patterns for Complex Addition.
@ 2020-12-16 20:47 Tamar Christina
  0 siblings, 0 replies; only message in thread
From: Tamar Christina @ 2020-12-16 20:47 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:84747acf8da36425f7e36cf99b251ee047b2e3a5

commit r11-6160-g84747acf8da36425f7e36cf99b251ee047b2e3a5
Author: Tamar Christina <tamar.christina@arm.com>
Date:   Wed Dec 16 20:43:47 2020 +0000

    AArch64: Add NEON, SVE and SVE2 RTL patterns for Complex Addition.
    
    This adds implementation for the optabs for add complex operations.  With this
    the following C code:
    
      void f90 (float complex a[restrict N], float complex b[restrict N],
                float complex c[restrict N])
      {
        for (int i=0; i < N; i++)
          c[i] = a[i] + (b[i] * I);
      }
    
    generates
    
      f90:
              mov     x3, 0
              .p2align 3,,7
      .L2:
              ldr     q0, [x0, x3]
              ldr     q1, [x1, x3]
              fcadd   v0.4s, v0.4s, v1.4s, #90
              str     q0, [x2, x3]
              add     x3, x3, 16
              cmp     x3, 1600
              bne     .L2
              ret
    
    instead of
    
      f90:
              add     x3, x1, 1600
              .p2align 3,,7
      .L2:
              ld2     {v4.4s - v5.4s}, [x0], 32
              ld2     {v2.4s - v3.4s}, [x1], 32
              fsub    v0.4s, v4.4s, v3.4s
              fadd    v1.4s, v5.4s, v2.4s
              st2     {v0.4s - v1.4s}, [x2], 32
              cmp     x3, x1
              bne     .L2
              ret
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-simd.md (cadd<rot><mode>3): New.
            * config/aarch64/iterators.md (SVE2_INT_CADD_OP): New.
            * config/aarch64/aarch64-sve.md (cadd<rot><mode>3): New.
            * config/aarch64/aarch64-sve2.md (cadd<rot><mode>3): New.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md |  8 ++++++++
 gcc/config/aarch64/aarch64-sve.md  | 14 ++++++++++++++
 gcc/config/aarch64/aarch64-sve2.md | 10 ++++++++++
 gcc/config/aarch64/iterators.md    |  4 ++++
 4 files changed, 36 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 68baf416045..05d18f8bd3a 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -449,6 +449,14 @@
   [(set_attr "type" "neon_fcadd")]
 )
 
+(define_expand "cadd<rot><mode>3"
+  [(set (match_operand:VHSDF 0 "register_operand")
+	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
+		       (match_operand:VHSDF 2 "register_operand")]
+		       FCADD))]
+  "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
+)
+
 (define_insn "aarch64_fcmla<rot><mode>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 	(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 6359c40bdec..6a5194f54f9 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -5480,6 +5480,20 @@
   "TARGET_SVE"
 )
 
+;; Predicated FCADD using ptrue for unpredicated optab for auto-vectorizer
+(define_expand "@cadd<rot><mode>3"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_dup 3)
+	   (const_int SVE_RELAXED_GP)
+	   (match_operand:SVE_FULL_F 1 "register_operand")
+	   (match_operand:SVE_FULL_F 2 "register_operand")]
+	  SVE_COND_FCADD))]
+  "TARGET_SVE"
+{
+  operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+})
+
 ;; Predicated FCADD, merging with the first input.
 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 772c35079c9..1897ddf69c3 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1799,6 +1799,16 @@
   [(set_attr "movprfx" "*,yes")]
 )
 
+;; unpredicated optab pattern for auto-vectorizer
+(define_expand "cadd<rot><mode>3"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(match_operand:SVE_FULL_I 1 "register_operand")
+	   (match_operand:SVE_FULL_I 2 "register_operand")]
+	  SVE2_INT_CADD_OP))]
+  "TARGET_SVE2"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Complex ternary operations
 ;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index fb1426b7752..b8ee4220603 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2598,6 +2598,10 @@
 				    UNSPEC_SQRDCMLAH180
 				    UNSPEC_SQRDCMLAH270])
 
+;; Same as SVE2_INT_CADD but exclude the saturating instructions
+(define_int_iterator SVE2_INT_CADD_OP [UNSPEC_CADD90
+				       UNSPEC_CADD270])
+
 (define_int_iterator SVE2_INT_CDOT [UNSPEC_CDOT
 				    UNSPEC_CDOT90
 				    UNSPEC_CDOT180


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-12-16 20:47 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-16 20:47 [gcc r11-6160] AArch64: Add NEON, SVE and SVE2 RTL patterns for Complex Addition Tamar Christina

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).