[gcc r12-4922] middle-end: convert negate + right shift into compare greater.

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc r12-4922] middle-end: convert negate + right shift into compare greater.
@ 2021-11-04 17:37 Tamar Christina
  0 siblings, 0 replies; only message in thread
From: Tamar Christina @ 2021-11-04 17:37 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d70720c2382e687e192a9d666e80acb41bfda856

commit r12-4922-gd70720c2382e687e192a9d666e80acb41bfda856
Author: Tamar Christina <tamar.christina@arm.com>
Date:   Thu Nov 4 17:32:09 2021 +0000

    middle-end: convert negate + right shift into compare greater.
    
    This turns an inversion of the sign bit + arithmetic right shift into a
    comparison with 0.
    
    i.e.
    
    void fun1(int32_t *x, int n)
    {
        for (int i = 0; i < (n & -16); i++)
          x[i] = (-x[i]) >> 31;
    }
    
    now generates:
    
    .L3:
            ldr     q0, [x0]
            cmgt    v0.4s, v0.4s, #0
            str     q0, [x0], 16
            cmp     x0, x1
            bne     .L3
    
    instead of:
    
    .L3:
            ldr     q0, [x0]
            neg     v0.4s, v0.4s
            sshr    v0.4s, v0.4s, 31
            str     q0, [x0], 16
            cmp     x0, x1
            bne     .L3
    
    gcc/ChangeLog:
    
            * match.pd: New negate+shift pattern.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.dg/signbit-2.c: New test.
            * gcc.dg/signbit-3.c: New test.
            * gcc.dg/signbit-4.c: New test.
            * gcc.dg/signbit-5.c: New test.
            * gcc.dg/signbit-6.c: New test.
            * gcc.target/aarch64/signbit-1.c: New test.

Diff:
---
 gcc/match.pd                                 | 36 +++++++++++++-
 gcc/testsuite/gcc.dg/signbit-2.c             | 19 ++++++++
 gcc/testsuite/gcc.dg/signbit-3.c             | 13 +++++
 gcc/testsuite/gcc.dg/signbit-4.c             | 65 +++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/signbit-5.c             | 65 +++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/signbit-6.c             | 72 ++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/signbit-1.c | 20 ++++++++
 7 files changed, 289 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 651982c28fe..917833dd14b 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -37,7 +37,8 @@ along with GCC; see the file COPYING3.  If not see
    integer_pow2p
    uniform_integer_cst_p
    HONOR_NANS
-   uniform_vector_p)
+   uniform_vector_p
+   expand_vec_cmp_expr_p)
 
 /* Operator lists.  */
 (define_operator_list tcc_comparison
@@ -831,6 +832,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     { tree utype = unsigned_type_for (type); }
     (convert (rshift (lshift (convert:utype @0) @2) @3))))))
 
+/* Fold (-x >> C) into -(x > 0) where C = precision(type) - 1.  */
+(for cst (INTEGER_CST VECTOR_CST)
+ (simplify
+  (rshift (negate:s @0) cst@1)
+   (if (!TYPE_UNSIGNED (type)
+        && TYPE_OVERFLOW_UNDEFINED (type))
+    (with { tree stype = TREE_TYPE (@1);
+	    tree bt = truth_type_for (type);
+	    tree zeros = build_zero_cst (type);
+	    tree cst = NULL_TREE; }
+     (switch
+      /* Handle scalar case.  */
+      (if (INTEGRAL_TYPE_P (type)
+	   /* If we apply the rule to the scalar type before vectorization
+	      we will enforce the result of the comparison being a bool
+	      which will require an extra AND on the result that will be
+	      indistinguishable from when the user did actually want 0
+	      or 1 as the result so it can't be removed.  */
+	   && canonicalize_math_after_vectorization_p ()
+	   && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (type) - 1))
+       (negate (convert (gt @0 { zeros; }))))
+      /* Handle vector case.  */
+      (if (VECTOR_INTEGER_TYPE_P (type)
+	   /* First check whether the target has the same mode for vector
+	      comparison results as it's operands do.  */
+	   && TYPE_MODE (bt) == TYPE_MODE (type)
+	   /* Then check to see if the target is able to expand the comparison
+	      with the given type later on, otherwise we may ICE.  */
+	   && expand_vec_cmp_expr_p (type, bt, GT_EXPR)
+	   && (cst = uniform_integer_cst_p (@1)) != NULL
+	   && wi::eq_p (wi::to_wide (cst), element_precision (type) - 1))
+       (view_convert (gt:bt @0 { zeros; }))))))))
+
 /* Fold (C1/X)*C2 into (C1*C2)/X.  */
 (simplify
  (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2)
diff --git a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-2.c
new file mode 100644
index 00000000000..fc0157cbc5c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-2.c
@@ -0,0 +1,19 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */
+
+#include <stdint.h>
+
+void fun1(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+void fun2(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 30;
+}
+
+/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized } } */
+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
diff --git a/gcc/testsuite/gcc.dg/signbit-3.c b/gcc/testsuite/gcc.dg/signbit-3.c
new file mode 100644
index 00000000000..19e9c06c349
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-3.c
@@ -0,0 +1,13 @@
+/* { dg-do assemble } */
+/* { dg-options "-O1 --save-temps -fdump-tree-optimized" } */
+
+#include <stdint.h>
+
+void fun1(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+/* { dg-final { scan-tree-dump-times {\s+>\s+0;} 1 optimized } } */
+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
diff --git a/gcc/testsuite/gcc.dg/signbit-4.c b/gcc/testsuite/gcc.dg/signbit-4.c
new file mode 100644
index 00000000000..bc459ba60a7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-4.c
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -fwrapv" } */
+
+#include <stdint.h>
+#include <limits.h>
+#include <stdio.h>
+
+#ifndef N
+#define N 65
+#endif
+
+#ifndef TYPE
+#define TYPE int32_t
+#endif
+
+#ifndef DEBUG
+#define DEBUG 1
+#endif
+
+#define BASE ((TYPE) -1 < 0 ? -126 : 4)
+
+__attribute__ ((noinline, noipa))
+void fun1(TYPE *x, int n)
+{
+    for (int i = 0; i < n; i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+__attribute__ ((noinline, noipa, optimize("O0")))
+void fun2(TYPE *x, int n)
+{
+    for (int i = 0; i < n; i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+int main ()
+{
+  TYPE a[N];
+  TYPE b[N];
+
+  a[0] = INT_MIN;
+  b[0] = INT_MIN;
+
+  for (int i = 1; i < N; ++i)
+    {
+      a[i] = BASE + i * 13;
+      b[i] = BASE + i * 13;
+      if (DEBUG)
+        printf ("%d: 0x%x\n", i, a[i]);
+    }
+
+  fun1 (a, N);
+  fun2 (b, N);
+
+  for (int i = 0; i < N; ++i)
+    {
+      if (DEBUG)
+        printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]);
+
+      if (a[i] != b[i])
+        __builtin_abort ();
+    }
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/signbit-5.c b/gcc/testsuite/gcc.dg/signbit-5.c
new file mode 100644
index 00000000000..22a92704773
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-5.c
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#include <stdint.h>
+#include <limits.h>
+#include <stdio.h>
+
+#ifndef N
+#define N 65
+#endif
+
+#ifndef TYPE
+#define TYPE int32_t
+#endif
+
+#ifndef DEBUG
+#define DEBUG 1
+#endif
+
+#define BASE ((TYPE) -1 < 0 ? -126 : 4)
+
+__attribute__ ((noinline, noipa))
+void fun1(TYPE *x, int n)
+{
+    for (int i = 0; i < n; i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+__attribute__ ((noinline, noipa, optimize("O1")))
+void fun2(TYPE *x, int n)
+{
+    for (int i = 0; i < n; i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+int main ()
+{
+  TYPE a[N];
+  TYPE b[N];
+
+  a[0] = INT_MIN;
+  b[0] = INT_MIN;
+
+  for (int i = 1; i < N; ++i)
+    {
+      a[i] = BASE + i * 13;
+      b[i] = BASE + i * 13;
+      if (DEBUG)
+        printf ("%d: 0x%x\n", i, a[i]);
+    }
+
+  fun1 (a, N);
+  fun2 (b, N);
+
+  for (int i = 0; i < N; ++i)
+    {
+      if (DEBUG)
+        printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]);
+
+      if (a[i] != b[i])
+        __builtin_abort ();
+    }
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/signbit-6.c b/gcc/testsuite/gcc.dg/signbit-6.c
new file mode 100644
index 00000000000..da186624cfa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-6.c
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+
+#include <stdint.h>
+#include <limits.h>
+#include <stdio.h>
+
+#ifndef N
+#define N 65
+#endif
+
+#ifndef TYPE
+#define TYPE int32_t
+#endif
+
+#ifndef DEBUG
+#define DEBUG 1
+#endif
+
+#define BASE ((TYPE) -1 < 0 ? -126 : 4)
+
+__attribute__ ((noinline, noipa))
+void fun1(TYPE *x, int n)
+{
+    for (int i = 0; i < n; i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+__attribute__ ((noinline, noipa, optimize("O0")))
+void fun2(TYPE *x, int n)
+{
+    for (int i = 0; i < n; i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+int main ()
+{
+  TYPE a[N];
+  TYPE b[N];
+
+  a[0] = INT_MIN;
+  b[0] = INT_MIN;
+
+  for (int i = 1; i < N; ++i)
+    {
+      a[i] = BASE + i * 13;
+      b[i] = BASE + i * 13;
+      if (DEBUG)
+        printf ("%d: 0x%x\n", i, a[i]);
+    }
+
+  fun1 (a, N);
+  fun2 (b, N);
+
+  if (DEBUG)
+    printf ("%d = 0x%x == 0x%x\n", 0, a[0], b[0]);
+
+  if (a[0] != 0x0 || b[0] != -1)
+        __builtin_abort ();
+
+
+  for (int i = 1; i < N; ++i)
+    {
+      if (DEBUG)
+        printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]);
+
+      if (a[i] != b[i])
+        __builtin_abort ();
+    }
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/signbit-1.c b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
new file mode 100644
index 00000000000..a5654d139cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
@@ -0,0 +1,20 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+#pragma GCC target "+nosve"
+
+void fun1(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+void fun2(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 30;
+}
+
+/* { dg-final { scan-assembler-times {\tcmgt\t} 1 } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-11-04 17:37 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-04 17:37 [gcc r12-4922] middle-end: convert negate + right shift into compare greater Tamar Christina

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).