public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-4577] AArch64: Add pattern for sshr to cmlt
@ 2021-10-20 16:09 Tamar Christina
  0 siblings, 0 replies; only message in thread
From: Tamar Christina @ 2021-10-20 16:09 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:ea464fd2d4fc093fe723466e3d14524a967cefdc

commit r12-4577-gea464fd2d4fc093fe723466e3d14524a967cefdc
Author: Tamar Christina <tamar.christina@arm.com>
Date:   Wed Oct 20 17:09:00 2021 +0100

    AArch64: Add pattern for sshr to cmlt
    
    This optimizes signed right shift by BITSIZE-1 into a cmlt operation which is
    more optimal because generally compares have a higher throughput than shifts.
    
    On AArch64 the result of the shift would have been either -1 or 0 which is the
    results of the compare.
    
    i.e.
    
    void e (int * restrict a, int *b, int n)
    {
        for (int i = 0; i < n; i++)
          b[i] = a[i] >> 31;
    }
    
    now generates:
    
    .L4:
            ldr     q0, [x0, x3]
            cmlt    v0.4s, v0.4s, #0
            str     q0, [x1, x3]
            add     x3, x3, 16
            cmp     x4, x3
            bne     .L4
    
    instead of:
    
    .L4:
            ldr     q0, [x0, x3]
            sshr    v0.4s, v0.4s, 31
            str     q0, [x1, x3]
            add     x3, x3, 16
            cmp     x4, x3
            bne     .L4
    
    Thanks,
    Tamar
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-simd.md (aarch64_simd_ashr<mode>): Add case cmp
            case.
            * config/aarch64/constraints.md (D1): New.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/shl-combine-2.c: New test.
            * gcc.target/aarch64/shl-combine-3.c: New test.
            * gcc.target/aarch64/shl-combine-4.c: New test.
            * gcc.target/aarch64/shl-combine-5.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md               | 12 +++++++-----
 gcc/config/aarch64/constraints.md                |  8 ++++++++
 gcc/testsuite/gcc.target/aarch64/shl-combine-2.c | 14 ++++++++++++++
 gcc/testsuite/gcc.target/aarch64/shl-combine-3.c | 14 ++++++++++++++
 gcc/testsuite/gcc.target/aarch64/shl-combine-4.c | 14 ++++++++++++++
 gcc/testsuite/gcc.target/aarch64/shl-combine-5.c | 14 ++++++++++++++
 6 files changed, 71 insertions(+), 5 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 7f0888ee2f8..0b340b49fa0 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1127,12 +1127,14 @@
 )
 
 (define_insn "aarch64_simd_ashr<mode>"
- [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-       (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
-		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
+       (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,w")
+		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "D1,Dr")))]
  "TARGET_SIMD"
- "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
-  [(set_attr "type" "neon_shift_imm<q>")]
+ "@
+  cmlt\t%0.<Vtype>, %1.<Vtype>, #0
+  sshr\t%0.<Vtype>, %1.<Vtype>, %2"
+  [(set_attr "type" "neon_compare<q>,neon_shift_imm<q>")]
 )
 
 (define_insn "*aarch64_simd_sra<mode>"
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 3b49b452119..18630815ffc 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -437,6 +437,14 @@
       (match_test "aarch64_simd_shift_imm_p (op, GET_MODE (op),
 						 true)")))
 
+(define_constraint "D1"
+  "@internal
+ A constraint that matches vector of immediates that is bits(mode)-1."
+ (and (match_code "const,const_vector")
+      (match_test "aarch64_const_vec_all_same_in_range_p (op,
+			GET_MODE_UNIT_BITSIZE (mode) - 1,
+			GET_MODE_UNIT_BITSIZE (mode) - 1)")))
+
 (define_constraint "Dr"
   "@internal
  A constraint that matches vector of immediates for right shifts."
diff --git a/gcc/testsuite/gcc.target/aarch64/shl-combine-2.c b/gcc/testsuite/gcc.target/aarch64/shl-combine-2.c
new file mode 100644
index 00000000000..6a0331fbe60
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/shl-combine-2.c
@@ -0,0 +1,14 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
+
+#define TYPE char
+
+void e (signed TYPE * restrict a, signed TYPE *b, int n)
+{
+    for (int i = 0; i < n; i++)
+      b[i] = a[i] >> (sizeof(TYPE)*8)-1;
+}
+
+/* { dg-final { scan-assembler-times {\tcmlt\t} 1 } } */
+/* { dg-final { scan-assembler-not {\tsshr\t} } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/shl-combine-3.c b/gcc/testsuite/gcc.target/aarch64/shl-combine-3.c
new file mode 100644
index 00000000000..2086b24a3cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/shl-combine-3.c
@@ -0,0 +1,14 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
+
+#define TYPE short
+
+void e (signed TYPE * restrict a, signed TYPE *b, int n)
+{
+    for (int i = 0; i < n; i++)
+      b[i] = a[i] >> (sizeof(TYPE)*8)-1;
+}
+
+/* { dg-final { scan-assembler-times {\tcmlt\t} 1 } } */
+/* { dg-final { scan-assembler-not {\tsshr\t} } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/shl-combine-4.c b/gcc/testsuite/gcc.target/aarch64/shl-combine-4.c
new file mode 100644
index 00000000000..083181071f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/shl-combine-4.c
@@ -0,0 +1,14 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
+
+#define TYPE int
+
+void e (signed TYPE * restrict a, signed TYPE *b, int n)
+{
+    for (int i = 0; i < n; i++)
+      b[i] = a[i] >> (sizeof(TYPE)*8)-1;
+}
+
+/* { dg-final { scan-assembler-times {\tcmlt\t} 1 } } */
+/* { dg-final { scan-assembler-not {\tsshr\t} } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/shl-combine-5.c b/gcc/testsuite/gcc.target/aarch64/shl-combine-5.c
new file mode 100644
index 00000000000..6b2a6bd86b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/shl-combine-5.c
@@ -0,0 +1,14 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
+
+#define TYPE long
+
+void e (signed TYPE * restrict a, signed TYPE *b, int n)
+{
+    for (int i = 0; i < n; i++)
+      b[i] = a[i] >> (sizeof(TYPE)*8)-1;
+}
+
+/* { dg-final { scan-assembler-times {\tcmlt\t} 1 } } */
+/* { dg-final { scan-assembler-not {\tsshr\t} } } */
+


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-10-20 16:09 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-20 16:09 [gcc r12-4577] AArch64: Add pattern for sshr to cmlt Tamar Christina

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).