public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-5486] Enhance optimize_atomic_bit_test_and to handle truncation.
@ 2021-11-24  1:01 hongtao Liu
  0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2021-11-24  1:01 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:7df89377a7ae3906255e38a79be8e5d962c3a0df

commit r12-5486-g7df89377a7ae3906255e38a79be8e5d962c3a0df
Author: liuhongt <hongtao.liu@intel.com>
Date:   Tue Nov 16 13:36:36 2021 +0800

    Enhance optimize_atomic_bit_test_and to handle truncation.
    
    r12-5102-gfb161782545224f5 improves integer bit test on
    __atomic_fetch_[or|and]_* returns only for nop_convert, .i.e.
    
    transfrom
    
      mask_5 = 1 << bit_4(D);
      mask.0_1 = (unsigned int) mask_5;
      _2 = __atomic_fetch_or_4 (a_7(D), mask.0_1, 0);
      t1_9 = (int) _2;
      t2_10 = mask_5 & t1_9;
    
    to
    
      mask_5 = 1 << n_4(D);
      mask.1_1 = (unsigned int) mask_5;
      _11 = .ATOMIC_BIT_TEST_AND_SET (&pscc_a_1_4, n_4(D), 0);
      _8 = (int) _11;
    
    And this patch extend the original patch to handle truncation.
    .i.e.
    
    transform
    
      long int mask;
      mask_8 = 1 << n_7(D);
      mask.0_1 = (long unsigned int) mask_8;
      _2 = __sync_fetch_and_or_8 (&pscc_a_2_3, mask.0_1);
      _3 = (unsigned int) _2;
      _4 = (unsigned int) mask_8;
      _5 = _3 & _4;
      _6 = (int) _5;
    
    to
    
      long int mask;
      mask_8 = 1 << n_7(D);
      mask.0_1 = (long unsigned int) mask_8;
      _14 = .ATOMIC_BIT_TEST_AND_SET (&pscc_a_2_3, n_7(D), 0);
      _5 = (unsigned int) _14;
      _6 = (int) _5;
    
    2021-11-17  Hongtao Liu  <hongtao.liu@intel.com>
                H.J. Lu  <hongjiu.lu@intel.com>
    
    gcc/ChangeLog:
    
            PR tree-optimization/103194
            * match.pd (gimple_nop_atomic_bit_test_and_p): Extended to
            match truncation.
            * tree-ssa-ccp.c (gimple_nop_convert): Declare.
            (optimize_atomic_bit_test_and): Enhance
            optimize_atomic_bit_test_and to handle truncation.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/pr103194-2.c: New test.
            * gcc.target/i386/pr103194-3.c: New test.
            * gcc.target/i386/pr103194-4.c: New test.
            * gcc.target/i386/pr103194-5.c: New test.
            * gcc.target/i386/pr103194.c: New test.

Diff:
---
 gcc/match.pd                               | 48 +++++++++------
 gcc/testsuite/gcc.target/i386/pr103194-2.c | 64 +++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr103194-3.c | 64 +++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr103194-4.c | 61 ++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr103194-5.c | 61 ++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr103194.c   | 16 +++++
 gcc/tree-ssa-ccp.c                         | 99 +++++++++++++++---------------
 7 files changed, 345 insertions(+), 68 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 886f807ac04..60b4ad5f706 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4044,39 +4044,43 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
 #if GIMPLE
 (match (nop_atomic_bit_test_and_p @0 @1 @4)
- (bit_and (nop_convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3))
+ (bit_and (convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3))
 	   INTEGER_CST@1)
  (with {
 	 int ibit = tree_log2 (@0);
 	 int ibit2 = tree_log2 (@1);
        }
   (if (ibit == ibit2
-      && ibit >= 0))))
+      && ibit >= 0
+      && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))))))
 
 (match (nop_atomic_bit_test_and_p @0 @1 @3)
- (bit_and (nop_convert?@3 (SYNC_FETCH_OR_XOR_N @2 INTEGER_CST@0))
+ (bit_and (convert?@3 (SYNC_FETCH_OR_XOR_N @2 INTEGER_CST@0))
 	  INTEGER_CST@1)
  (with {
 	 int ibit = tree_log2 (@0);
 	 int ibit2 = tree_log2 (@1);
        }
   (if (ibit == ibit2
-      && ibit >= 0))))
+      && ibit >= 0
+      && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))))))
 
 (match (nop_atomic_bit_test_and_p @0 @0 @4)
  (bit_and:c
-  (nop_convert?@4
+  (convert1?@4
    (ATOMIC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@5 @6)) @3))
-  @0))
+  (convert2? @0))
+ (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)))))
 
 (match (nop_atomic_bit_test_and_p @0 @0 @4)
  (bit_and:c
-  (nop_convert?@4
+  (convert1?@4
    (SYNC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@3 @5))))
-  @0))
+  (convert2? @0))
+ (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)))))
 
 (match (nop_atomic_bit_test_and_p @0 @1 @3)
- (bit_and@4 (nop_convert?@3 (ATOMIC_FETCH_AND_N @2 INTEGER_CST@0 @5))
+ (bit_and@4 (convert?@3 (ATOMIC_FETCH_AND_N @2 INTEGER_CST@0 @5))
 	    INTEGER_CST@1)
  (with {
 	 int ibit = wi::exact_log2 (wi::zext (wi::bit_not (wi::to_wide (@0)),
@@ -4084,11 +4088,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	 int ibit2 = tree_log2 (@1);
        }
   (if (ibit == ibit2
-      && ibit >= 0))))
+      && ibit >= 0
+      && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))))))
 
 (match (nop_atomic_bit_test_and_p @0 @1 @3)
  (bit_and@4
-  (nop_convert?@3 (SYNC_FETCH_AND_AND_N @2 INTEGER_CST@0))
+  (convert?@3 (SYNC_FETCH_AND_AND_N @2 INTEGER_CST@0))
   INTEGER_CST@1)
  (with {
 	 int ibit = wi::exact_log2 (wi::zext (wi::bit_not (wi::to_wide (@0)),
@@ -4096,19 +4101,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	 int ibit2 = tree_log2 (@1);
        }
   (if (ibit == ibit2
-      && ibit >= 0))))
+      && ibit >= 0
+      && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))))))
 
-(match (nop_atomic_bit_test_and_p @0 @0 @3)
+(match (nop_atomic_bit_test_and_p @4 @0 @3)
  (bit_and:c
-  (nop_convert?@3
-   (ATOMIC_FETCH_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7))) @5))
-   @0))
+  (convert1?@3
+   (ATOMIC_FETCH_AND_N @2 (nop_convert?@4 (bit_not (lshift@0 integer_onep@6 @7))) @5))
+  (convert2? @0))
+ (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)))))
 
-(match (nop_atomic_bit_test_and_p @0 @0 @3)
+(match (nop_atomic_bit_test_and_p @4 @0 @3)
  (bit_and:c
-  (nop_convert?@3
-   (SYNC_FETCH_AND_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7)))))
-   @0))
+  (convert1?@3
+   (SYNC_FETCH_AND_AND_N @2 (nop_convert?@4 (bit_not (lshift@0 integer_onep@6 @7)))))
+  (convert2? @0))
+  (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)))))
 
 #endif
 
diff --git a/gcc/testsuite/gcc.target/i386/pr103194-2.c b/gcc/testsuite/gcc.target/i386/pr103194-2.c
new file mode 100644
index 00000000000..1a991fe0199
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103194-2.c
@@ -0,0 +1,64 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define FOO(RTYPE,TYPE,MASK)						\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_or (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_xor (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_xor_and_fetch (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_and (a, ~mask) & mask;			\
+  }									\
+
+FOO(char, short, 0);
+FOO(char, short, 7);
+FOO(short, int, 0);
+FOO(short, int, 15);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 8 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 16 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 8 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr103194-3.c b/gcc/testsuite/gcc.target/i386/pr103194-3.c
new file mode 100644
index 00000000000..4907598bbd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103194-3.c
@@ -0,0 +1,64 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+typedef long long int64;
+
+#define FOO(RTYPE, TYPE,MASK)						\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_or (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_xor (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_xor_and_fetch (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_and (a, ~mask) & mask;			\
+  }									\
+
+
+FOO(int, int64, 1);
+FOO(int, int64, 31);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 4 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 8 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 4 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr103194-4.c b/gcc/testsuite/gcc.target/i386/pr103194-4.c
new file mode 100644
index 00000000000..8573016c5d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103194-4.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define FOO(RTYPE,TYPE)							\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)		\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)		\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)		\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)		\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)	\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_or (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)	\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_xor (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)	\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_xor_and_fetch (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)	\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_and (a, ~mask) & mask;			\
+  }									\
+
+FOO(short, int);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 2 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 4 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 2 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr103194-5.c b/gcc/testsuite/gcc.target/i386/pr103194-5.c
new file mode 100644
index 00000000000..dfaddf0aa6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103194-5.c
@@ -0,0 +1,61 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define FOO(RTYPE,TYPE)							\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)		\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)		\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)		\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)		\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)	\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_or (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)	\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_xor (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)	\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_xor_and_fetch (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)	\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_and (a, ~mask) & mask;			\
+  }									\
+
+FOO(int, long);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 2 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 4 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 2 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr103194.c b/gcc/testsuite/gcc.target/i386/pr103194.c
new file mode 100644
index 00000000000..a6d84332e4d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103194.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+long pscc_a_2_3;
+int pscc_a_1_4;
+void pscc()
+{
+  pscc_a_1_4 = __sync_fetch_and_and(&pscc_a_2_3, 1);
+}
+
+static int si;
+long
+test_types (long n)
+{
+  unsigned int u2 = __atomic_fetch_xor (&si, 0, 5);
+  return u2;
+}
diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index 18d57729d8a..9e12da8f011 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -3326,6 +3326,7 @@ convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
  */
 extern bool gimple_nop_atomic_bit_test_and_p (tree, tree *,
 					      tree (*) (tree));
+extern bool gimple_nop_convert (tree, tree*, tree (*) (tree));
 
 /* Optimize
      mask_2 = 1 << cnt_1;
@@ -3462,16 +3463,16 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
 	  ibit = 0;
 	}
       else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
-	       == TYPE_PRECISION (TREE_TYPE (use_rhs)))
+	       <= TYPE_PRECISION (TREE_TYPE (use_rhs)))
 	{
 	  gimple *use_nop_stmt;
 	  if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
 	      || !is_gimple_assign (use_nop_stmt))
 	    return;
+	  tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
 	  rhs_code = gimple_assign_rhs_code (use_nop_stmt);
 	  if (rhs_code != BIT_AND_EXPR)
 	    {
-	      tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
 	      if (TREE_CODE (use_nop_lhs) == SSA_NAME
 		  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
 		return;
@@ -3584,24 +3585,23 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
 	    }
 	  else
 	    {
-	      tree and_expr = gimple_assign_lhs (use_nop_stmt);
 	      tree match_op[3];
 	      gimple *g;
-	      if (!gimple_nop_atomic_bit_test_and_p (and_expr,
+	      if (!gimple_nop_atomic_bit_test_and_p (use_nop_lhs,
 						     &match_op[0], NULL)
 		  || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (match_op[2])
 		  || !single_imm_use (match_op[2], &use_p, &g)
 		  || !is_gimple_assign (g))
 		return;
-	      mask = match_op[1];
-	      if (TREE_CODE (mask) == INTEGER_CST)
+	      mask = match_op[0];
+	      if (TREE_CODE (match_op[1]) == INTEGER_CST)
 		{
-		  ibit = tree_log2 (mask);
+		  ibit = tree_log2 (match_op[1]);
 		  gcc_assert (ibit >= 0);
 		}
 	      else
 		{
-		  g = SSA_NAME_DEF_STMT (mask);
+		  g = SSA_NAME_DEF_STMT (match_op[1]);
 		  gcc_assert (is_gimple_assign (g));
 		  bit = gimple_assign_rhs2 (g);
 		}
@@ -3623,19 +3623,30 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
 		 _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
 		 _12 = _3 & mask_7;
 		 _5 = (int) _12;
-	       */
-	      replace_uses_by (use_lhs, lhs);
-	      tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
-	      var = make_ssa_name (TREE_TYPE (use_nop_lhs));
-	      gimple_assign_set_lhs (use_nop_stmt, var);
+
+		 and Convert
+		 _1 = __atomic_fetch_and_4 (ptr_6, ~mask, _3);
+		 _2 = (short int) _1;
+		 _5 = _2 & mask;
+		 to
+		 _1 = __atomic_fetch_and_4 (ptr_6, ~mask, _3);
+		 _8 = _1 & mask;
+		 _5 = (short int) _8;
+	      */
+	      gimple_seq stmts = NULL;
+	      match_op[1] = gimple_convert (&stmts,
+					    TREE_TYPE (use_rhs),
+					    match_op[1]);
+	      var = gimple_build (&stmts, BIT_AND_EXPR,
+				  TREE_TYPE (use_rhs), use_rhs, match_op[1]);
 	      gsi = gsi_for_stmt (use_stmt);
 	      gsi_remove (&gsi, true);
 	      release_defs (use_stmt);
-	      gsi_remove (gsip, true);
-	      g = gimple_build_assign (use_nop_lhs, NOP_EXPR, var);
+	      use_stmt = gimple_seq_last_stmt (stmts);
 	      gsi = gsi_for_stmt (use_nop_stmt);
-	      gsi_insert_after (&gsi, g, GSI_NEW_STMT);
-	      use_stmt = use_nop_stmt;
+	      gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+	      gimple_assign_set_rhs_with_ops (&gsi, CONVERT_EXPR, var);
+	      update_stmt (use_nop_stmt);
 	    }
 	}
       else
@@ -3671,55 +3682,47 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
       else if (TREE_CODE (mask) == SSA_NAME)
 	{
 	  gimple *g = SSA_NAME_DEF_STMT (mask);
-	  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+	  tree match_op;
+	  if (gimple_nop_convert (mask, &match_op, NULL))
 	    {
-	      if (!is_gimple_assign (g)
-		  || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
-		return;
-	      mask = gimple_assign_rhs1 (g);
+	      mask = match_op;
 	      if (TREE_CODE (mask) != SSA_NAME)
 		return;
 	      g = SSA_NAME_DEF_STMT (mask);
 	    }
 	  if (!is_gimple_assign (g))
 	    return;
-	  rhs_code = gimple_assign_rhs_code (g);
-	  if (rhs_code != LSHIFT_EXPR)
-	    {
-	      if (rhs_code != NOP_EXPR)
-		return;
 
-	      /* Handle
-		 _1 = 1 << bit_4(D);
-		 mask_5 = (unsigned int) _1;
-		 _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
-		 _3 = _2 & mask_5;
-		 */
-	      tree nop_lhs = gimple_assign_lhs (g);
-	      tree nop_rhs = gimple_assign_rhs1 (g);
-	      if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
-		  != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
+	  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+	    {
+	      if (gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
 		return;
-	      g = SSA_NAME_DEF_STMT (nop_rhs);
-	      if (!is_gimple_assign (g)
-		  || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
+	      mask = gimple_assign_rhs1 (g);
+	      if (TREE_CODE (mask) != SSA_NAME)
 		return;
+	      g = SSA_NAME_DEF_STMT (mask);
 	    }
-	  if (!integer_onep (gimple_assign_rhs1 (g)))
+
+	  rhs_code = gimple_assign_rhs_code (g);
+	  if (rhs_code != LSHIFT_EXPR
+	      || !integer_onep (gimple_assign_rhs1 (g)))
 	    return;
 	  bit = gimple_assign_rhs2 (g);
 	}
       else
 	return;
 
+      tree cmp_mask;
       if (gimple_assign_rhs1 (use_stmt) == lhs)
-	{
-	  if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
-	    return;
-	}
-      else if (gimple_assign_rhs2 (use_stmt) != lhs
-	       || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
-				    mask, 0))
+	cmp_mask = gimple_assign_rhs2 (use_stmt);
+      else
+	cmp_mask = gimple_assign_rhs1 (use_stmt);
+
+      tree match_op;
+      if (gimple_nop_convert (cmp_mask, &match_op, NULL))
+	cmp_mask = match_op;
+
+      if (!operand_equal_p (cmp_mask, mask, 0))
 	return;
     }


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-11-24  1:01 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-24  1:01 [gcc r12-5486] Enhance optimize_atomic_bit_test_and to handle truncation hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).