public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Improve integer bit test on atomic builtin return
@ 2021-10-04 13:53 H.J. Lu
  2021-10-05 10:07 ` Richard Biener
  0 siblings, 1 reply; 10+ messages in thread
From: H.J. Lu @ 2021-10-04 13:53 UTC (permalink / raw)
  To: gcc-patches; +Cc: Jakub Jelinek, Richard Biener

commit adedd5c173388ae505470df152b9cb3947339566
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Tue May 3 13:37:25 2016 +0200

    re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')

optimized bit test on atomic builtin return with lock bts/btr/btc.  But
it works only for unsigned integers since atomic builtins operate on the
'uintptr_t' type.  It fails on bool:

  _1 = atomic builtin;
  _4 = (_Bool) _1;

and signed integers:

  _1 = atomic builtin;
  _2 = (int) _1;
  _5 = _2 & (1 << N);

Improve bit test on atomic builtin return by converting:

  _1 = atomic builtin;
  _4 = (_Bool) _1;

to

  _1 = atomic builtin;
  _5 = _1 & (1 << 0);
  _4 = (_Bool) _5;

and converting:

  _1 = atomic builtin;
  _2 = (int) _1;
  _5 = _2 & (1 << N);

to
  _1 = atomic builtin;
  _6 = _1 & (1 << N);
  _5 = (int) _6;

gcc/

	PR middle-end/102566
	* tree-ssa-ccp.c (optimize_atomic_bit_test_and): Handle cast
	between atomic builtin and bit test.

gcc/testsuite/

	PR middle-end/102566
	* g++.target/i386/pr102566-1.C: New test.
	* gcc.target/i386/pr102566-1a.c: Likewise.
	* gcc.target/i386/pr102566-1b.c: Likewise.
	* gcc.target/i386/pr102566-2.c: Likewise.
---
 gcc/testsuite/g++.target/i386/pr102566-1.C  |  12 ++
 gcc/testsuite/gcc.target/i386/pr102566-1a.c | 188 ++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr102566-1b.c | 107 +++++++++++
 gcc/testsuite/gcc.target/i386/pr102566-2.c  |  14 ++
 gcc/tree-ssa-ccp.c                          | 136 +++++++++++++-
 5 files changed, 452 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c

diff --git a/gcc/testsuite/g++.target/i386/pr102566-1.C b/gcc/testsuite/g++.target/i386/pr102566-1.C
new file mode 100644
index 00000000000..6e33298d8bf
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-1.C
@@ -0,0 +1,12 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool tbit(std::atomic<int> &i)
+{
+  return i.fetch_or(1, std::memory_order_relaxed) & 1;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1a.c b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
new file mode 100644
index 00000000000..a915de354e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
@@ -0,0 +1,188 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void bar (void);
+
+__attribute__((noinline, noclone)) int
+f1 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f2 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
+  int t2 = t1 & mask;
+  return t2 != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f3 (long int *a, int bit)
+{
+  long int mask = 1l << bit;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
+}
+
+__attribute__((noinline, noclone)) int
+f4 (int *a)
+{
+  int mask = 1 << 7;
+  return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f5 (int *a)
+{
+  int mask = 1 << 13;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f6 (int *a)
+{
+  int mask = 1 << 0;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) void
+f7 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
+    bar ();
+}
+
+__attribute__((noinline, noclone)) void
+f8 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
+    bar ();
+}
+
+__attribute__((noinline, noclone)) int
+f9 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f10 (int *a)
+{
+  int mask = 1 << 7;
+  return (__sync_fetch_and_xor (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f11 (int *a)
+{
+  int mask = 1 << 13;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f12 (int *a)
+{
+  int mask = 1 << 0;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f13 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f14 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f15 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f16 (int *a)
+{
+  int mask = 1 << 7;
+  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f17 (int *a)
+{
+  int mask = 1 << 13;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f18 (int *a)
+{
+  int mask = 1 << 0;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f19 (long int *a, int bit)
+{
+  long int mask = 1l << bit;
+  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f20 (long int *a)
+{
+  long int mask = 1l << 7;
+  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
+}
+
+__attribute__((noinline, noclone)) int
+f21 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__sync_fetch_and_or (a, mask) & mask);
+}
+
+__attribute__((noinline, noclone)) long int
+f22 (long int *a)
+{
+  long int mask = 1l << 7;
+  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
+}
+
+__attribute__((noinline, noclone)) long int
+f23 (long int *a)
+{
+  long int mask = 1l << 7;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
+}
+
+__attribute__((noinline, noclone)) short int
+f24 (short int *a)
+{
+  short int mask = 1 << 7;
+  return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) short int
+f25 (short int *a)
+{
+  short int mask = 1 << 7;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1b.c b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
new file mode 100644
index 00000000000..c4dab8135c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
@@ -0,0 +1,107 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -g" } */
+
+int cnt;
+
+__attribute__((noinline, noclone)) void
+bar (void)
+{
+  cnt++;
+}
+
+#include "pr102566-1a.c"
+
+int a;
+long int b;
+unsigned long int c;
+unsigned short int d;
+
+int
+main ()
+{
+  __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
+  if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
+      || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
+    __builtin_abort ();
+  if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
+      || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
+    __builtin_abort ();
+  __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
+  if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
+      || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
+    __builtin_abort ();
+  __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
+  if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
+      || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
+    __builtin_abort ();
+  if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
+      || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
+    __builtin_abort ();
+  if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
+      || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (cnt != 0
+      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
+      || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
+      || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
+      || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
+    __builtin_abort ();
+  if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
+      || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
+      || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
+  if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
+    __builtin_abort ();
+  if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
+      || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
+      || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
+      || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
+      || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
+  if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
+      || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
+    __builtin_abort ();
+  __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
+  if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
+      || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
+      || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
+      || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
+    __builtin_abort ();
+  __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
+  if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
+      || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
+      || cnt != 2)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-2.c b/gcc/testsuite/gcc.target/i386/pr102566-2.c
new file mode 100644
index 00000000000..d1c30315353
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v)
+{
+  return atomic_fetch_or_explicit (v, 1, memory_order_relaxed) & 1;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index 70ce6a4d5b8..a3f7b7f233e 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -3279,10 +3279,115 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
       || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
       || !single_imm_use (lhs, &use_p, &use_stmt)
       || !is_gimple_assign (use_stmt)
-      || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
       || !gimple_vdef (call))
     return;
 
+  mask = gimple_call_arg (call, 1);
+  tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
+  if (rhs_code != BIT_AND_EXPR)
+    {
+      if (rhs_code != NOP_EXPR)
+	return;
+
+      tree nop_lhs = gimple_assign_lhs (use_stmt);
+      if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (nop_lhs))
+	return;
+
+      tree nop_rhs = gimple_assign_rhs1 (use_stmt);
+
+      gimple *g;
+      gimple_stmt_iterator gsi;
+      tree var;
+
+      if (TREE_CODE (TREE_TYPE (nop_lhs)) == BOOLEAN_TYPE)
+	{
+	  /* Convert
+	     _1 = atomic bit op;
+	     _4 = (_Bool) _1;
+	     to
+	     _1 = atomic bit op;
+	     _5 = _1 & 1;
+	     _4 = (_Bool) _5;
+	   */
+	  var = make_ssa_name (TREE_TYPE (nop_rhs));
+	  replace_uses_by (nop_rhs, var);
+	  g = gimple_build_assign (var, BIT_AND_EXPR, nop_rhs,
+				   build_int_cst (TREE_TYPE (lhs), 1));
+	  gsi = gsi_for_stmt (use_stmt);
+	  gsi_insert_before (&gsi, g, GSI_NEW_STMT);
+	  use_stmt = g;
+	}
+      else if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
+	       == TYPE_PRECISION (TREE_TYPE (nop_rhs)))
+	{
+	  gimple *use_nop_stmt;
+	  if (!single_imm_use (nop_lhs, &use_p, &use_nop_stmt)
+	      || !is_gimple_assign (use_nop_stmt)
+	      || gimple_assign_rhs_code (use_nop_stmt) != BIT_AND_EXPR)
+	    return;
+
+	  tree op_mask = mask;
+	  if (TREE_CODE (op_mask) == SSA_NAME)
+	    {
+	      g = SSA_NAME_DEF_STMT (op_mask);
+	      if (gimple_assign_rhs_code (g) == NOP_EXPR)
+		{
+		  tree mask_nop_lhs = gimple_assign_lhs (g);
+
+		  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (mask_nop_lhs))
+		    return;
+
+		  tree mask_nop_rhs = gimple_assign_rhs1 (g);
+		  if (TYPE_PRECISION (TREE_TYPE (mask_nop_lhs))
+		      != TYPE_PRECISION (TREE_TYPE (mask_nop_rhs)))
+		    return;
+		  op_mask = mask_nop_rhs;
+		  g = SSA_NAME_DEF_STMT (op_mask);
+		}
+
+	      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+		{
+		  if (!is_gimple_assign (g)
+		      || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
+		    return;
+		  tree reset_mask = gimple_assign_rhs1 (g);
+		  if (TREE_CODE (op_mask) != SSA_NAME)
+		    return;
+		  g = SSA_NAME_DEF_STMT (reset_mask);
+		}
+
+	      if (!is_gimple_assign (g)
+		  || gimple_assign_rhs_code (g) != LSHIFT_EXPR
+		  || !integer_onep (gimple_assign_rhs1 (g)))
+		return;
+	    }
+
+	  /* Convert
+	     _1 = atomic bit op;
+	     _2 = (int) _1;
+	     _5 = _2 & N;
+	     to
+	     _1 = atomic bit op;
+	     _6 = _1 & N;
+	     _5 = (int) _6;
+	   */
+	  replace_uses_by (nop_lhs, lhs);
+	  tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
+	  var = make_ssa_name (TREE_TYPE (use_nop_lhs));
+	  gimple_assign_set_lhs (use_nop_stmt, var);
+	  gsi = gsi_for_stmt (use_stmt);
+	  gsi_remove (&gsi, true);
+	  release_defs (use_stmt);
+	  gsi_remove (gsip, true);
+	  var = build1 (NOP_EXPR, TREE_TYPE (use_nop_lhs), var);
+	  gsi = gsi_for_stmt (use_nop_stmt);
+	  g = gimple_build_assign (use_nop_lhs, var);
+	  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+	  use_stmt = use_nop_stmt;
+	  mask = op_mask;
+	}
+    }
+
   switch (fn)
     {
     case IFN_ATOMIC_BIT_TEST_AND_SET:
@@ -3301,7 +3406,6 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
   if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
     return;
 
-  mask = gimple_call_arg (call, 1);
   tree use_lhs = gimple_assign_lhs (use_stmt);
   if (!use_lhs)
     return;
@@ -3434,18 +3538,40 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
 	 of the specified bit after the atomic operation (makes only sense
 	 for xor, otherwise the bit content is compile time known),
 	 we need to invert the bit.  */
+      tree mask_convert = mask;
+      gimple *g_convert = nullptr;
+      if (!use_bool && TREE_TYPE (lhs) != TREE_TYPE (mask))
+	{
+	  mask_convert = make_ssa_name (TREE_TYPE (lhs));
+	  tree var = build1 (NOP_EXPR, TREE_TYPE (lhs), mask);
+	  g_convert = gimple_build_assign (mask_convert, var);
+	}
       g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
 			       BIT_XOR_EXPR, new_lhs,
 			       use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
-					: mask);
+					: mask_convert);
       new_lhs = gimple_assign_lhs (g);
       if (throws)
 	{
-	  gsi_insert_on_edge_immediate (e, g);
+	  if (g_convert)
+	    {
+	      gsi_insert_on_edge_immediate (e, g_convert);
+	      gsi = gsi_for_stmt (g_convert);
+	      gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+	    }
+	  else
+	    gsi_insert_on_edge_immediate (e, g);
 	  gsi = gsi_for_stmt (g);
 	}
       else
-	gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+	{
+	  if (g_convert)
+	    {
+	      gsi_insert_after (&gsi, g_convert, GSI_NEW_STMT);
+	      gsi = gsi_for_stmt (g_convert);
+	    }
+	  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+	}
     }
   if (use_bool && has_debug_uses)
     {
-- 
2.31.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-10-25  9:00 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-04 13:53 [PATCH] Improve integer bit test on atomic builtin return H.J. Lu
2021-10-05 10:07 ` Richard Biener
2021-10-05 16:40   ` H.J. Lu
2021-10-05 23:54     ` [PATCH v2] Improve integer bit test on __atomic_fetch_[or|and]_* returns H.J. Lu
2021-10-08  7:16     ` [PATCH] Improve integer bit test on atomic builtin return Richard Biener
2021-10-08 14:55       ` H.J. Lu
2021-10-22  5:48         ` [PATCH] Canonicalize __atomic/sync_fetch_or/xor/and for constant mask liuhongt
2021-10-22 13:12           ` H.J. Lu
2021-10-25  5:59             ` liuhongt
2021-10-25  9:07               ` Hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).