public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v4] Improve integer bit test on __atomic_fetch_[or|and]_* returns
@ 2021-10-10 13:49 H.J. Lu
  2021-10-13 12:34 ` Richard Biener
  0 siblings, 1 reply; 8+ messages in thread
From: H.J. Lu @ 2021-10-10 13:49 UTC (permalink / raw)
  To: gcc-patches; +Cc: Jakub Jelinek, Richard Biener

Changes in v4:

1. Bypass redundant check when inputs have been transformed to the
equivalent canonical form with valid bit operation.

Changes in v3:

1.  Check invalid bit operation.

commit adedd5c173388ae505470df152b9cb3947339566
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Tue May 3 13:37:25 2016 +0200

    re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')

optimized bit test on __atomic_fetch_or_* and __atomic_fetch_and_* returns
with lock bts/btr/btc by turning

  mask_2 = 1 << cnt_1;
  _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
  _5 = _4 & mask_2;

into

  _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
  _5 = _4;

and

  mask_6 = 1 << bit_5(D);
  _1 = ~mask_6;
  _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
  _3 = _2 & mask_6;
  _4 = _3 != 0;

into

  mask_6 = 1 << bit_5(D);
  _1 = ~mask_6;
  _11 = .ATOMIC_BIT_TEST_AND_RESET (v_8(D), bit_5(D), 1, 0);
  _4 = _11 != 0;

But it failed to optimize many equivalent, but slighly different cases:

1.
  _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
  _4 = (_Bool) _1;
2.
  _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
  _4 = (_Bool) _1;
3.
  _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
  _7 = ~_1;
  _5 = (_Bool) _7;
4.
  _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
  _7 = ~_1;
  _5 = (_Bool) _7;
5.
  _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
  _2 = (int) _1;
  _7 = ~_2;
  _5 = (_Bool) _7;
6.
  _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
  _2 = (int) _1;
  _7 = ~_2;
  _5 = (_Bool) _7;
7.
  _1 = _atomic_fetch_or_4 (ptr_6, mask, _3);
  _2 = (int) _1;
  _5 = _2 & mask;
8.
  _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
  _5 = (signed int) _1;
  _4 = _5 < 0;
9.
  _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
  _5 = (signed int) _1;
  _4 = _5 < 0;
10.
  _1 = 1 << bit_4(D);
  mask_5 = (unsigned int) _1;
  _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
  _3 = _2 & mask_5;
11.
  mask_7 = 1 << bit_6(D);
  _1 = ~mask_7;
  _2 = (unsigned int) _1;
  _3 = __atomic_fetch_and_4 (v_9(D), _2, 0);
  _4 = (int) _3;
  _5 = _4 & mask_7;

We make

  mask_2 = 1 << cnt_1;
  _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
  _5 = _4 & mask_2;

and

  mask_6 = 1 << bit_5(D);
  _1 = ~mask_6;
  _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
  _3 = _2 & mask_6;
  _4 = _3 != 0;

the canonical forms for this optimization and transform cases 1-9 to the
equivalent canonical form.  For cases 10 and 11, we simply remove the cast
before __atomic_fetch_or_4/__atomic_fetch_and_4 with

  _1 = 1 << bit_4(D);
  _2 = __atomic_fetch_or_4 (v_7(D), _1, 0);
  _3 = _2 & _1;

and

  mask_7 = 1 << bit_6(D);
  _1 = ~mask_7;
  _3 = __atomic_fetch_and_4 (v_9(D), _1, 0);
  _6 = _3 & mask_7;
  _5 = (int) _6;

gcc/

	PR middle-end/102566
	* tree-ssa-ccp.c (convert_atomic_bit_not): New function.
	(optimize_atomic_bit_test_and): Transform equivalent, but slighly
	different cases to their canonical forms.

gcc/testsuite/

	PR middle-end/102566
	* g++.target/i386/pr102566-1.C: New test.
	* g++.target/i386/pr102566-2.C: Likewise.
	* g++.target/i386/pr102566-3.C: Likewise.
	* g++.target/i386/pr102566-4.C: Likewise.
	* g++.target/i386/pr102566-5a.C: Likewise.
	* g++.target/i386/pr102566-5b.C: Likewise.
	* g++.target/i386/pr102566-6a.C: Likewise.
	* g++.target/i386/pr102566-6b.C: Likewise.
	* gcc.target/i386/pr102566-1a.c: Likewise.
	* gcc.target/i386/pr102566-1b.c: Likewise.
	* gcc.target/i386/pr102566-2.c: Likewise.
	* gcc.target/i386/pr102566-3a.c: Likewise.
	* gcc.target/i386/pr102566-3b.c: Likewise.
	* gcc.target/i386/pr102566-4.c: Likewise.
	* gcc.target/i386/pr102566-5.c: Likewise.
	* gcc.target/i386/pr102566-6.c: Likewise.
	* gcc.target/i386/pr102566-7.c: Likewise.
	* gcc.target/i386/pr102566-8a.c: Likewise.
	* gcc.target/i386/pr102566-8b.c: Likewise.
	* gcc.target/i386/pr102566-9a.c: Likewise.
	* gcc.target/i386/pr102566-9b.c: Likewise.
	* gcc.target/i386/pr102566-10a.c: Likewise.
	* gcc.target/i386/pr102566-10b.c: Likewise.
	* gcc.target/i386/pr102566-11.c: Likewise.
	* gcc.target/i386/pr102566-12.c: Likewise.
---
 gcc/testsuite/g++.target/i386/pr102566-1.C   |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-2.C   |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-3.C   |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-4.C   |  29 ++
 gcc/testsuite/g++.target/i386/pr102566-5a.C  |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-5b.C  |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-6a.C  |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-6b.C  |  31 ++
 gcc/testsuite/gcc.target/i386/pr102566-10a.c |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-10b.c |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-11.c  |  28 ++
 gcc/testsuite/gcc.target/i386/pr102566-12.c  |  28 ++
 gcc/testsuite/gcc.target/i386/pr102566-1a.c  | 188 +++++++
 gcc/testsuite/gcc.target/i386/pr102566-1b.c  | 107 ++++
 gcc/testsuite/gcc.target/i386/pr102566-2.c   |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-3a.c  |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-3b.c  |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-4.c   |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-5.c   |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-6.c   |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-7.c   |  30 ++
 gcc/testsuite/gcc.target/i386/pr102566-8a.c  |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-8b.c  |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-9a.c  |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-9b.c  |  32 ++
 gcc/tree-ssa-ccp.c                           | 503 +++++++++++++++++--
 26 files changed, 1375 insertions(+), 37 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-2.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-3.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-4.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5a.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5b.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6a.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6b.C
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9b.c

diff --git a/gcc/testsuite/g++.target/i386/pr102566-1.C b/gcc/testsuite/g++.target/i386/pr102566-1.C
new file mode 100644
index 00000000000..94a66d717cc
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-1.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<int> &i)
+{
+#define BIT (1 << 0)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<int> &i)
+{
+#define BIT (1 << 30)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<int> &i)
+{
+#define BIT (1 << 31)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-2.C b/gcc/testsuite/g++.target/i386/pr102566-2.C
new file mode 100644
index 00000000000..4f2aea961c2
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-2.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-3.C b/gcc/testsuite/g++.target/i386/pr102566-3.C
new file mode 100644
index 00000000000..e88921dd155
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-3.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-4.C b/gcc/testsuite/g++.target/i386/pr102566-4.C
new file mode 100644
index 00000000000..44d1362ac2e
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-4.C
@@ -0,0 +1,29 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+typedef int __attribute__ ((mode (__word__))) int_type;
+
+#define BIT (1 << 0)
+
+bool
+tbit0 (std::atomic<int_type> &i)
+{
+  return i.fetch_or(BIT, std::memory_order_relaxed) & ~1;
+}
+
+bool
+tbit30 (std::atomic<int_type> &i)
+{
+  return i.fetch_or(BIT, std::memory_order_relaxed) & ~2;
+}
+
+bool
+tbit31 (std::atomic<int_type> &i)
+{
+  return i.fetch_or(BIT, std::memory_order_relaxed) & ~4;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "bts" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-5a.C b/gcc/testsuite/g++.target/i386/pr102566-5a.C
new file mode 100644
index 00000000000..f9595bee2ab
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-5a.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-5b.C b/gcc/testsuite/g++.target/i386/pr102566-5b.C
new file mode 100644
index 00000000000..d917b27a918
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-5b.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 0)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 30)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 63)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-6a.C b/gcc/testsuite/g++.target/i386/pr102566-6a.C
new file mode 100644
index 00000000000..01d495eda23
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-6a.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-6b.C b/gcc/testsuite/g++.target/i386/pr102566-6b.C
new file mode 100644
index 00000000000..adc11fcbf2d
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-6b.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 0)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 30)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 63)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10a.c b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
new file mode 100644
index 00000000000..1c1f86a9659
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+  int mask = 1 << bit;
+  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10b.c b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
new file mode 100644
index 00000000000..0bf39824ea6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic long long int *v, int bit)
+{
+  long long int mask = 1ll << bit;
+  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-11.c b/gcc/testsuite/gcc.target/i386/pr102566-11.c
new file mode 100644
index 00000000000..2c8f8c4e59a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-11.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define MASK 0x1234
+
+bool
+foo1 (_Atomic int *v)
+{
+  return atomic_fetch_or_explicit (v, MASK, memory_order_relaxed) & MASK;
+}
+
+bool
+foo2 (_Atomic unsigned int *v, int mask)
+{
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+bool
+foo3 (_Atomic unsigned int *v, int mask)
+{
+  return !(atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask);
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "bts" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-12.c b/gcc/testsuite/gcc.target/i386/pr102566-12.c
new file mode 100644
index 00000000000..4603a77612c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-12.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define MASK 0x1234
+
+bool
+foo1 (_Atomic long *v)
+{
+  return atomic_fetch_and_explicit (v, ~MASK, memory_order_relaxed) & MASK;
+}
+
+bool
+foo2 (_Atomic long *v, long mask)
+{
+  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
+}
+
+bool
+foo3 (_Atomic long *v, long mask)
+{
+  return !(atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask);
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "btr" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1a.c b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
new file mode 100644
index 00000000000..a915de354e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
@@ -0,0 +1,188 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void bar (void);
+
+__attribute__((noinline, noclone)) int
+f1 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f2 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
+  int t2 = t1 & mask;
+  return t2 != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f3 (long int *a, int bit)
+{
+  long int mask = 1l << bit;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
+}
+
+__attribute__((noinline, noclone)) int
+f4 (int *a)
+{
+  int mask = 1 << 7;
+  return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f5 (int *a)
+{
+  int mask = 1 << 13;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f6 (int *a)
+{
+  int mask = 1 << 0;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) void
+f7 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
+    bar ();
+}
+
+__attribute__((noinline, noclone)) void
+f8 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
+    bar ();
+}
+
+__attribute__((noinline, noclone)) int
+f9 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f10 (int *a)
+{
+  int mask = 1 << 7;
+  return (__sync_fetch_and_xor (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f11 (int *a)
+{
+  int mask = 1 << 13;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f12 (int *a)
+{
+  int mask = 1 << 0;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f13 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f14 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f15 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f16 (int *a)
+{
+  int mask = 1 << 7;
+  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f17 (int *a)
+{
+  int mask = 1 << 13;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f18 (int *a)
+{
+  int mask = 1 << 0;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f19 (long int *a, int bit)
+{
+  long int mask = 1l << bit;
+  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f20 (long int *a)
+{
+  long int mask = 1l << 7;
+  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
+}
+
+__attribute__((noinline, noclone)) int
+f21 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__sync_fetch_and_or (a, mask) & mask);
+}
+
+__attribute__((noinline, noclone)) long int
+f22 (long int *a)
+{
+  long int mask = 1l << 7;
+  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
+}
+
+__attribute__((noinline, noclone)) long int
+f23 (long int *a)
+{
+  long int mask = 1l << 7;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
+}
+
+__attribute__((noinline, noclone)) short int
+f24 (short int *a)
+{
+  short int mask = 1 << 7;
+  return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) short int
+f25 (short int *a)
+{
+  short int mask = 1 << 7;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1b.c b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
new file mode 100644
index 00000000000..c4dab8135c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
@@ -0,0 +1,107 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -g" } */
+
+int cnt;
+
+__attribute__((noinline, noclone)) void
+bar (void)
+{
+  cnt++;
+}
+
+#include "pr102566-1a.c"
+
+int a;
+long int b;
+unsigned long int c;
+unsigned short int d;
+
+int
+main ()
+{
+  __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
+  if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
+      || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
+    __builtin_abort ();
+  if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
+      || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
+    __builtin_abort ();
+  __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
+  if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
+      || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
+    __builtin_abort ();
+  __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
+  if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
+      || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
+    __builtin_abort ();
+  if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
+      || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
+    __builtin_abort ();
+  if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
+      || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (cnt != 0
+      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
+      || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
+      || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
+      || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
+    __builtin_abort ();
+  if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
+      || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
+      || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
+  if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
+    __builtin_abort ();
+  if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
+      || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
+      || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
+      || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
+      || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
+  if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
+      || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
+    __builtin_abort ();
+  __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
+  if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
+      || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
+      || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
+      || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
+    __builtin_abort ();
+  __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
+  if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
+      || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
+      || cnt != 2)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-2.c b/gcc/testsuite/gcc.target/i386/pr102566-2.c
new file mode 100644
index 00000000000..00a7c349f2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-2.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3a.c b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
new file mode 100644
index 00000000000..8bf1cd6e1bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+  int mask = 1 << bit;
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3b.c b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
new file mode 100644
index 00000000000..d155ed367a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic long long int *v, int bit)
+{
+  long long int mask = 1ll << bit;
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsq" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-4.c b/gcc/testsuite/gcc.target/i386/pr102566-4.c
new file mode 100644
index 00000000000..2668ccf827c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-4.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+  unsigned int mask = 1 << bit;
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-5.c b/gcc/testsuite/gcc.target/i386/pr102566-5.c
new file mode 100644
index 00000000000..8bf1cd6e1bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-5.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+  int mask = 1 << bit;
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-6.c b/gcc/testsuite/gcc.target/i386/pr102566-6.c
new file mode 100644
index 00000000000..3dfe55ac683
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-6.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-7.c b/gcc/testsuite/gcc.target/i386/pr102566-7.c
new file mode 100644
index 00000000000..6bc0ae0f320
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-7.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+typedef int __attribute__ ((mode (__word__))) int_type;
+
+#define BIT (1 << 0)
+
+bool
+foo0 (_Atomic int_type *v)
+{
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~1;
+}
+
+bool
+foo1 (_Atomic int_type *v)
+{
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~2;
+}
+
+bool
+foo2 (_Atomic int_type *v)
+{
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~3;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "bts" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8a.c b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
new file mode 100644
index 00000000000..168e3db78c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8b.c b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
new file mode 100644
index 00000000000..392da3098e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
@@ -0,0 +1,32 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic long long *v)
+{
+#define BIT (1ll << 0)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo30 (_Atomic long long *v)
+{
+#define BIT (1ll << 62)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo31 (_Atomic long long *v)
+{
+#define BIT (1ll << 63)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9a.c b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
new file mode 100644
index 00000000000..3fa2a3ef043
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9b.c b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
new file mode 100644
index 00000000000..38ddbdc630f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
@@ -0,0 +1,32 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic long long *v)
+{
+#define BIT (1ll << 0)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo30 (_Atomic long long *v)
+{
+#define BIT (1ll << 62)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo31 (_Atomic long long *v)
+{
+#define BIT (1ll << 63)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index 70ce6a4d5b8..bb70b87aa5e 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -3243,6 +3243,81 @@ optimize_unreachable (gimple_stmt_iterator i)
   return ret;
 }
 
+/* Convert
+   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+   _7 = ~_1;
+   _5 = (_Bool) _7;
+   to
+   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+   _8 = _1 & 1;
+   _5 = _8 == 0;
+   and convert
+   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+   _7 = ~_1;
+   _4 = (_Bool) _7;
+   to
+   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+   _8 = _1 & 1;
+   _4 = (_Bool) _8;
+
+   USE_STMT is the gimplt statement which uses the return value of
+   __atomic_fetch_or_*.  LHS is the return value of __atomic_fetch_or_*.
+   MASK is the mask passed to __atomic_fetch_or_*.
+ */
+
+static gimple *
+convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
+			tree lhs, tree mask)
+{
+  tree and_mask;
+  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+    {
+      /* MASK must be ~1.  */
+      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
+					   ~HOST_WIDE_INT_1), mask, 0))
+	return nullptr;
+      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
+    }
+  else
+    {
+      /* MASK must be 1.  */
+      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs), 1), mask, 0))
+	return nullptr;
+      and_mask = mask;
+    }
+
+  tree use_lhs = gimple_assign_lhs (use_stmt);
+
+  use_operand_p use_p;
+  gimple *use_not_stmt;
+
+  if (!single_imm_use (use_lhs, &use_p, &use_not_stmt)
+      || !is_gimple_assign (use_not_stmt))
+    return nullptr;
+
+  if (gimple_assign_rhs_code (use_not_stmt) != NOP_EXPR)
+    return nullptr;
+
+  tree use_not_lhs = gimple_assign_lhs (use_not_stmt);
+  if (TREE_CODE (TREE_TYPE (use_not_lhs)) != BOOLEAN_TYPE)
+    return nullptr;
+
+  gimple_stmt_iterator gsi;
+  gsi = gsi_for_stmt (use_stmt);
+  gsi_remove (&gsi, true);
+  tree var = make_ssa_name (TREE_TYPE (lhs));
+  use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
+  gsi = gsi_for_stmt (use_not_stmt);
+  gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
+  lhs = gimple_assign_lhs (use_not_stmt);
+  gimple *g = gimple_build_assign (lhs, EQ_EXPR, var,
+				   build_zero_cst (TREE_TYPE (mask)));
+  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+  gsi = gsi_for_stmt (use_not_stmt);
+  gsi_remove (&gsi, true);
+  return use_stmt;
+}
+
 /* Optimize
      mask_2 = 1 << cnt_1;
      _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
@@ -3269,7 +3344,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
   tree lhs = gimple_call_lhs (call);
   use_operand_p use_p;
   gimple *use_stmt;
-  tree mask, bit;
+  tree mask;
   optab optab;
 
   if (!flag_inline_atomics
@@ -3279,10 +3354,317 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
       || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
       || !single_imm_use (lhs, &use_p, &use_stmt)
       || !is_gimple_assign (use_stmt)
-      || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
       || !gimple_vdef (call))
     return;
 
+  tree bit = nullptr;
+
+  mask = gimple_call_arg (call, 1);
+  tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
+  if (rhs_code != BIT_AND_EXPR)
+    {
+      if (rhs_code != NOP_EXPR && rhs_code != BIT_NOT_EXPR)
+	return;
+
+      tree use_lhs = gimple_assign_lhs (use_stmt);
+      if (TREE_CODE (use_lhs) == SSA_NAME
+	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
+	return;
+
+      tree use_rhs = gimple_assign_rhs1 (use_stmt);
+      if (lhs != use_rhs)
+	return;
+
+      gimple *g;
+      gimple_stmt_iterator gsi;
+      tree var;
+      int ibit = -1;
+
+      if (rhs_code == BIT_NOT_EXPR)
+	{
+	  g = convert_atomic_bit_not (fn, use_stmt, lhs, mask);
+	  if (!g)
+	    return;
+	  use_stmt = g;
+	  ibit = 0;
+	}
+      else if (TREE_CODE (TREE_TYPE (use_lhs)) == BOOLEAN_TYPE)
+	{
+	  tree and_mask;
+	  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+	    {
+	      /* MASK must be ~1.  */
+	      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
+						   ~HOST_WIDE_INT_1),
+				    mask, 0))
+		return;
+
+	      /* Convert
+		 _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+		 _4 = (_Bool) _1;
+		 to
+		 _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+		 _5 = _1 & 1;
+		 _4 = (_Bool) _5;
+	       */
+	      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
+	    }
+	  else
+	    {
+	      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
+	      if (!operand_equal_p (and_mask, mask, 0))
+		return;
+
+	      /* Convert
+		 _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+		 _4 = (_Bool) _1;
+		 to
+		 _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+		 _5 = _1 & 1;
+		 _4 = (_Bool) _5;
+	       */
+	    }
+	  var = make_ssa_name (TREE_TYPE (use_rhs));
+	  replace_uses_by (use_rhs, var);
+	  g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
+				   and_mask);
+	  gsi = gsi_for_stmt (use_stmt);
+	  gsi_insert_before (&gsi, g, GSI_NEW_STMT);
+	  use_stmt = g;
+	  ibit = 0;
+	}
+      else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
+	       == TYPE_PRECISION (TREE_TYPE (use_rhs)))
+	{
+	  gimple *use_nop_stmt;
+	  if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
+	      || !is_gimple_assign (use_nop_stmt))
+	    return;
+	  rhs_code = gimple_assign_rhs_code (use_nop_stmt);
+	  if (rhs_code != BIT_AND_EXPR)
+	    {
+	      tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
+	      if (TREE_CODE (use_nop_lhs) == SSA_NAME
+		  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
+		return;
+	      if (rhs_code == BIT_NOT_EXPR)
+		{
+		  g = convert_atomic_bit_not (fn, use_nop_stmt, lhs,
+					      mask);
+		  if (!g)
+		    return;
+		  /* Convert
+		     _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
+		     _2 = (int) _1;
+		     _7 = ~_2;
+		     _5 = (_Bool) _7;
+		     to
+		     _1 = __atomic_fetch_or_4 (ptr_6, ~1, _3);
+		     _8 = _1 & 1;
+		     _5 = _8 == 0;
+		     and convert
+		     _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
+		     _2 = (int) _1;
+		     _7 = ~_2;
+		     _5 = (_Bool) _7;
+		     to
+		     _1 = __atomic_fetch_and_4 (ptr_6, 1, _3);
+		     _8 = _1 & 1;
+		     _5 = _8 == 0;
+		   */
+		  gsi = gsi_for_stmt (use_stmt);
+		  gsi_remove (&gsi, true);
+		  use_stmt = g;
+		  ibit = 0;
+		}
+	      else
+		{
+		  if (TREE_CODE (TREE_TYPE (use_nop_lhs)) != BOOLEAN_TYPE)
+		    return;
+		  if (rhs_code != GE_EXPR && rhs_code != LT_EXPR)
+		    return;
+		  tree cmp_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
+		  if (use_lhs != cmp_rhs1)
+		    return;
+		  tree cmp_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
+		  if (!integer_zerop (cmp_rhs2))
+		    return;
+
+		  tree and_mask;
+
+		  unsigned HOST_WIDE_INT bytes
+		    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (use_rhs)));
+		  ibit = bytes * BITS_PER_UNIT - 1;
+		  unsigned HOST_WIDE_INT highest
+		    = HOST_WIDE_INT_1U << ibit;
+
+		  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+		    {
+		      /* Get the signed maximum of the USE_RHS type.  */
+		      and_mask = build_int_cst (TREE_TYPE (use_rhs),
+						highest - 1);
+		      if (!operand_equal_p (and_mask, mask, 0))
+			return;
+
+		      /* Convert
+			 _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
+			 _5 = (signed int) _1;
+			 _4 = _5 < 0 or _5 >= 0;
+			 to
+			 _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
+			 _6 = _1 & 0x80000000;
+			 _4 = _6 != 0 or _6 == 0;
+		       */
+		      and_mask = build_int_cst (TREE_TYPE (use_rhs),
+						highest);
+		    }
+		  else
+		    {
+		      /* Get the signed minimum of the USE_RHS type.  */
+		      and_mask = build_int_cst (TREE_TYPE (use_rhs),
+						highest);
+		      if (!operand_equal_p (and_mask, mask, 0))
+			return;
+
+		      /* Convert
+			 _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
+			 _5 = (signed int) _1;
+			 _4 = _5 < 0 or _5 >= 0;
+			 to
+			 _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
+			 _6 = _1 & 0x80000000;
+			 _4 = _6 != 0 or _6 == 0;
+		       */
+		    }
+		  var = make_ssa_name (TREE_TYPE (use_rhs));
+		  gsi = gsi_for_stmt (use_stmt);
+		  gsi_remove (&gsi, true);
+		  g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
+					   and_mask);
+		  gsi = gsi_for_stmt (use_nop_stmt);
+		  gsi_insert_before (&gsi, g, GSI_NEW_STMT);
+		  use_stmt = g;
+		  g = gimple_build_assign (use_nop_lhs,
+					   (rhs_code == GE_EXPR
+					    ? EQ_EXPR : NE_EXPR),
+					   var,
+					   build_zero_cst (TREE_TYPE (use_rhs)));
+		  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+		  gsi = gsi_for_stmt (use_nop_stmt);
+		  gsi_remove (&gsi, true);
+		}
+	    }
+	  else
+	    {
+	      tree op_mask = mask;
+	      tree check_mask = op_mask;
+	      if (TREE_CODE (op_mask) == SSA_NAME)
+		{
+		  g = SSA_NAME_DEF_STMT (op_mask);
+		  if (!is_gimple_assign (g))
+		    return;
+		  if (gimple_assign_rhs_code (g) == NOP_EXPR)
+		    {
+		      tree mask_nop_lhs = gimple_assign_lhs (g);
+
+		      if (TREE_CODE (mask_nop_lhs) == SSA_NAME
+			  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (mask_nop_lhs))
+			return;
+
+		      tree mask_nop_rhs = gimple_assign_rhs1 (g);
+		      if (TYPE_PRECISION (TREE_TYPE (mask_nop_lhs))
+			  != TYPE_PRECISION (TREE_TYPE (mask_nop_rhs)))
+			return;
+		      op_mask = mask_nop_rhs;
+		      check_mask = op_mask;
+		      g = SSA_NAME_DEF_STMT (op_mask);
+		      if (!is_gimple_assign (g))
+			return;
+		    }
+
+		  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+		    {
+		      if (gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
+			return;
+		      check_mask = gimple_assign_rhs1 (g);
+		      if (TREE_CODE (check_mask) != SSA_NAME)
+			return;
+		      g = SSA_NAME_DEF_STMT (check_mask);
+		      if (!is_gimple_assign (g))
+			return;
+		    }
+
+		  if (gimple_assign_rhs_code (g) != LSHIFT_EXPR
+		      || !integer_onep (gimple_assign_rhs1 (g)))
+		    return;
+
+		  bit = gimple_assign_rhs2 (g);
+		}
+
+	      if (TREE_CODE (check_mask) == INTEGER_CST)
+		{
+		  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+		    check_mask = const_unop (BIT_NOT_EXPR,
+					     TREE_TYPE (check_mask),
+					     check_mask);
+		  check_mask = fold_convert (TREE_TYPE (lhs),
+					     check_mask);
+		  /* Check if CHECK_MASK is a power of two.  */
+		  ibit = tree_log2 (check_mask);
+		  if (ibit < 0)
+		    return;
+		}
+
+	      tree use_nop_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
+	      tree use_nop_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
+	      if (!operand_equal_p (use_nop_rhs1, check_mask, 0)
+		  && !operand_equal_p (use_nop_rhs2, check_mask, 0))
+		return;
+
+	      /* Convert
+		 _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
+		 _2 = (int) _1;
+		 _5 = _2 & mask;
+		 to
+		 _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
+		 _6 = _1 & mask;
+		 _5 = (int) _6;
+		 and convert
+		 _1 = ~mask_7;
+		 _2 = (unsigned int) _1;
+		 _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
+		 _4 = (int) _3;
+		 _5 = _4 & mask_7;
+		 to
+		 _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
+		 _12 = _3 & mask_7;
+		 _5 = (int) _12;
+	       */
+	      replace_uses_by (use_lhs, lhs);
+	      tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
+	      var = make_ssa_name (TREE_TYPE (use_nop_lhs));
+	      gimple_assign_set_lhs (use_nop_stmt, var);
+	      gsi = gsi_for_stmt (use_stmt);
+	      gsi_remove (&gsi, true);
+	      release_defs (use_stmt);
+	      gsi_remove (gsip, true);
+	      var = build1 (NOP_EXPR, TREE_TYPE (use_nop_lhs), var);
+	      gsi = gsi_for_stmt (use_nop_stmt);
+	      g = gimple_build_assign (use_nop_lhs, var);
+	      gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+	      use_stmt = use_nop_stmt;
+	      mask = op_mask;
+	    }
+	}
+
+      if (!bit)
+	{
+	  if (ibit < 0)
+	    gcc_unreachable ();
+	  bit = build_int_cst (TREE_TYPE (lhs), ibit);
+	}
+    }
+
   switch (fn)
     {
     case IFN_ATOMIC_BIT_TEST_AND_SET:
@@ -3301,51 +3683,76 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
   if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
     return;
 
-  mask = gimple_call_arg (call, 1);
   tree use_lhs = gimple_assign_lhs (use_stmt);
   if (!use_lhs)
     return;
 
-  if (TREE_CODE (mask) == INTEGER_CST)
+  if (!bit)
     {
-      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
-	mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
-      mask = fold_convert (TREE_TYPE (lhs), mask);
-      int ibit = tree_log2 (mask);
-      if (ibit < 0)
-	return;
-      bit = build_int_cst (TREE_TYPE (lhs), ibit);
-    }
-  else if (TREE_CODE (mask) == SSA_NAME)
-    {
-      gimple *g = SSA_NAME_DEF_STMT (mask);
-      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+      if (TREE_CODE (mask) == INTEGER_CST)
 	{
-	  if (!is_gimple_assign (g)
-	      || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
+	  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+	    mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
+	  mask = fold_convert (TREE_TYPE (lhs), mask);
+	  int ibit = tree_log2 (mask);
+	  if (ibit < 0)
+	    return;
+	  bit = build_int_cst (TREE_TYPE (lhs), ibit);
+	}
+      else if (TREE_CODE (mask) == SSA_NAME)
+	{
+	  gimple *g = SSA_NAME_DEF_STMT (mask);
+	  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+	    {
+	      if (!is_gimple_assign (g)
+		  || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
+		return;
+	      mask = gimple_assign_rhs1 (g);
+	      if (TREE_CODE (mask) != SSA_NAME)
+		return;
+	      g = SSA_NAME_DEF_STMT (mask);
+	    }
+	  if (!is_gimple_assign (g))
 	    return;
-	  mask = gimple_assign_rhs1 (g);
-	  if (TREE_CODE (mask) != SSA_NAME)
+	  rhs_code = gimple_assign_rhs_code (g);
+	  if (rhs_code != LSHIFT_EXPR)
+	    {
+	      if (rhs_code != NOP_EXPR)
+		return;
+
+	      /* Handle
+		 _1 = 1 << bit_4(D);
+		 mask_5 = (unsigned int) _1;
+		 _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
+		 _3 = _2 & mask_5;
+		 */
+	      tree nop_lhs = gimple_assign_lhs (g);
+	      tree nop_rhs = gimple_assign_rhs1 (g);
+	      if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
+		  != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
+		return;
+	      g = SSA_NAME_DEF_STMT (nop_rhs);
+	      if (!is_gimple_assign (g)
+		  || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
+		return;
+	    }
+	  if (!integer_onep (gimple_assign_rhs1 (g)))
 	    return;
-	  g = SSA_NAME_DEF_STMT (mask);
+	  bit = gimple_assign_rhs2 (g);
 	}
-      if (!is_gimple_assign (g)
-	  || gimple_assign_rhs_code (g) != LSHIFT_EXPR
-	  || !integer_onep (gimple_assign_rhs1 (g)))
+      else
 	return;
-      bit = gimple_assign_rhs2 (g);
-    }
-  else
-    return;
 
-  if (gimple_assign_rhs1 (use_stmt) == lhs)
-    {
-      if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
+      if (gimple_assign_rhs1 (use_stmt) == lhs)
+	{
+	  if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
+	    return;
+	}
+      else if (gimple_assign_rhs2 (use_stmt) != lhs
+	       || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
+				    mask, 0))
 	return;
     }
-  else if (gimple_assign_rhs2 (use_stmt) != lhs
-	   || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
-    return;
 
   bool use_bool = true;
   bool has_debug_uses = false;
@@ -3434,18 +3841,40 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
 	 of the specified bit after the atomic operation (makes only sense
 	 for xor, otherwise the bit content is compile time known),
 	 we need to invert the bit.  */
+      tree mask_convert = mask;
+      gimple *g_convert = nullptr;
+      if (!use_bool && TREE_TYPE (lhs) != TREE_TYPE (mask))
+	{
+	  mask_convert = make_ssa_name (TREE_TYPE (lhs));
+	  tree var = build1 (NOP_EXPR, TREE_TYPE (lhs), mask);
+	  g_convert = gimple_build_assign (mask_convert, var);
+	}
       g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
 			       BIT_XOR_EXPR, new_lhs,
 			       use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
-					: mask);
+					: mask_convert);
       new_lhs = gimple_assign_lhs (g);
       if (throws)
 	{
-	  gsi_insert_on_edge_immediate (e, g);
+	  if (g_convert)
+	    {
+	      gsi_insert_on_edge_immediate (e, g_convert);
+	      gsi = gsi_for_stmt (g_convert);
+	      gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+	    }
+	  else
+	    gsi_insert_on_edge_immediate (e, g);
 	  gsi = gsi_for_stmt (g);
 	}
       else
-	gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+	{
+	  if (g_convert)
+	    {
+	      gsi_insert_after (&gsi, g_convert, GSI_NEW_STMT);
+	      gsi = gsi_for_stmt (g_convert);
+	    }
+	  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+	}
     }
   if (use_bool && has_debug_uses)
     {
-- 
2.31.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4] Improve integer bit test on __atomic_fetch_[or|and]_* returns
  2021-10-10 13:49 [PATCH v4] Improve integer bit test on __atomic_fetch_[or|and]_* returns H.J. Lu
@ 2021-10-13 12:34 ` Richard Biener
  2021-10-21 11:15   ` Hongtao Liu
  0 siblings, 1 reply; 8+ messages in thread
From: Richard Biener @ 2021-10-13 12:34 UTC (permalink / raw)
  To: H.J. Lu; +Cc: GCC Patches, Jakub Jelinek

On Sun, Oct 10, 2021 at 3:49 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Changes in v4:
>
> 1. Bypass redundant check when inputs have been transformed to the
> equivalent canonical form with valid bit operation.
>
> Changes in v3:
>
> 1.  Check invalid bit operation.
>
> commit adedd5c173388ae505470df152b9cb3947339566
> Author: Jakub Jelinek <jakub@redhat.com>
> Date:   Tue May 3 13:37:25 2016 +0200
>
>     re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')
>
> optimized bit test on __atomic_fetch_or_* and __atomic_fetch_and_* returns
> with lock bts/btr/btc by turning
>
>   mask_2 = 1 << cnt_1;
>   _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
>   _5 = _4 & mask_2;
>
> into
>
>   _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
>   _5 = _4;
>
> and
>
>   mask_6 = 1 << bit_5(D);
>   _1 = ~mask_6;
>   _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
>   _3 = _2 & mask_6;
>   _4 = _3 != 0;
>
> into
>
>   mask_6 = 1 << bit_5(D);
>   _1 = ~mask_6;
>   _11 = .ATOMIC_BIT_TEST_AND_RESET (v_8(D), bit_5(D), 1, 0);
>   _4 = _11 != 0;
>
> But it failed to optimize many equivalent, but slighly different cases:
>
> 1.
>   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
>   _4 = (_Bool) _1;
> 2.
>   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
>   _4 = (_Bool) _1;
> 3.
>   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
>   _7 = ~_1;
>   _5 = (_Bool) _7;
> 4.
>   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
>   _7 = ~_1;
>   _5 = (_Bool) _7;
> 5.
>   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
>   _2 = (int) _1;
>   _7 = ~_2;
>   _5 = (_Bool) _7;
> 6.
>   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
>   _2 = (int) _1;
>   _7 = ~_2;
>   _5 = (_Bool) _7;
> 7.
>   _1 = _atomic_fetch_or_4 (ptr_6, mask, _3);
>   _2 = (int) _1;
>   _5 = _2 & mask;
> 8.
>   _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
>   _5 = (signed int) _1;
>   _4 = _5 < 0;
> 9.
>   _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
>   _5 = (signed int) _1;
>   _4 = _5 < 0;
> 10.
>   _1 = 1 << bit_4(D);
>   mask_5 = (unsigned int) _1;
>   _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
>   _3 = _2 & mask_5;
> 11.
>   mask_7 = 1 << bit_6(D);
>   _1 = ~mask_7;
>   _2 = (unsigned int) _1;
>   _3 = __atomic_fetch_and_4 (v_9(D), _2, 0);
>   _4 = (int) _3;
>   _5 = _4 & mask_7;
>
> We make
>
>   mask_2 = 1 << cnt_1;
>   _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
>   _5 = _4 & mask_2;
>
> and
>
>   mask_6 = 1 << bit_5(D);
>   _1 = ~mask_6;
>   _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
>   _3 = _2 & mask_6;
>   _4 = _3 != 0;
>
> the canonical forms for this optimization and transform cases 1-9 to the
> equivalent canonical form.  For cases 10 and 11, we simply remove the cast
> before __atomic_fetch_or_4/__atomic_fetch_and_4 with
>
>   _1 = 1 << bit_4(D);
>   _2 = __atomic_fetch_or_4 (v_7(D), _1, 0);
>   _3 = _2 & _1;
>
> and
>
>   mask_7 = 1 << bit_6(D);
>   _1 = ~mask_7;
>   _3 = __atomic_fetch_and_4 (v_9(D), _1, 0);
>   _6 = _3 & mask_7;
>   _5 = (int) _6;
>
> gcc/
>
>         PR middle-end/102566
>         * tree-ssa-ccp.c (convert_atomic_bit_not): New function.
>         (optimize_atomic_bit_test_and): Transform equivalent, but slighly
>         different cases to their canonical forms.
>
> gcc/testsuite/
>
>         PR middle-end/102566
>         * g++.target/i386/pr102566-1.C: New test.
>         * g++.target/i386/pr102566-2.C: Likewise.
>         * g++.target/i386/pr102566-3.C: Likewise.
>         * g++.target/i386/pr102566-4.C: Likewise.
>         * g++.target/i386/pr102566-5a.C: Likewise.
>         * g++.target/i386/pr102566-5b.C: Likewise.
>         * g++.target/i386/pr102566-6a.C: Likewise.
>         * g++.target/i386/pr102566-6b.C: Likewise.
>         * gcc.target/i386/pr102566-1a.c: Likewise.
>         * gcc.target/i386/pr102566-1b.c: Likewise.
>         * gcc.target/i386/pr102566-2.c: Likewise.
>         * gcc.target/i386/pr102566-3a.c: Likewise.
>         * gcc.target/i386/pr102566-3b.c: Likewise.
>         * gcc.target/i386/pr102566-4.c: Likewise.
>         * gcc.target/i386/pr102566-5.c: Likewise.
>         * gcc.target/i386/pr102566-6.c: Likewise.
>         * gcc.target/i386/pr102566-7.c: Likewise.
>         * gcc.target/i386/pr102566-8a.c: Likewise.
>         * gcc.target/i386/pr102566-8b.c: Likewise.
>         * gcc.target/i386/pr102566-9a.c: Likewise.
>         * gcc.target/i386/pr102566-9b.c: Likewise.
>         * gcc.target/i386/pr102566-10a.c: Likewise.
>         * gcc.target/i386/pr102566-10b.c: Likewise.
>         * gcc.target/i386/pr102566-11.c: Likewise.
>         * gcc.target/i386/pr102566-12.c: Likewise.
> ---
>  gcc/testsuite/g++.target/i386/pr102566-1.C   |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-2.C   |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-3.C   |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-4.C   |  29 ++
>  gcc/testsuite/g++.target/i386/pr102566-5a.C  |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-5b.C  |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-6a.C  |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-6b.C  |  31 ++
>  gcc/testsuite/gcc.target/i386/pr102566-10a.c |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-10b.c |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-11.c  |  28 ++
>  gcc/testsuite/gcc.target/i386/pr102566-12.c  |  28 ++
>  gcc/testsuite/gcc.target/i386/pr102566-1a.c  | 188 +++++++
>  gcc/testsuite/gcc.target/i386/pr102566-1b.c  | 107 ++++
>  gcc/testsuite/gcc.target/i386/pr102566-2.c   |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-3a.c  |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-3b.c  |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-4.c   |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-5.c   |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-6.c   |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-7.c   |  30 ++
>  gcc/testsuite/gcc.target/i386/pr102566-8a.c  |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-8b.c  |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-9a.c  |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-9b.c  |  32 ++
>  gcc/tree-ssa-ccp.c                           | 503 +++++++++++++++++--
>  26 files changed, 1375 insertions(+), 37 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-2.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-3.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-4.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5a.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5b.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6a.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6b.C
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-11.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-12.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-7.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9b.c
>
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-1.C b/gcc/testsuite/g++.target/i386/pr102566-1.C
> new file mode 100644
> index 00000000000..94a66d717cc
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-1.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<int> &i)
> +{
> +#define BIT (1 << 0)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<int> &i)
> +{
> +#define BIT (1 << 30)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<int> &i)
> +{
> +#define BIT (1 << 31)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-2.C b/gcc/testsuite/g++.target/i386/pr102566-2.C
> new file mode 100644
> index 00000000000..4f2aea961c2
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-2.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-3.C b/gcc/testsuite/g++.target/i386/pr102566-3.C
> new file mode 100644
> index 00000000000..e88921dd155
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-3.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-4.C b/gcc/testsuite/g++.target/i386/pr102566-4.C
> new file mode 100644
> index 00000000000..44d1362ac2e
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-4.C
> @@ -0,0 +1,29 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +typedef int __attribute__ ((mode (__word__))) int_type;
> +
> +#define BIT (1 << 0)
> +
> +bool
> +tbit0 (std::atomic<int_type> &i)
> +{
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~1;
> +}
> +
> +bool
> +tbit30 (std::atomic<int_type> &i)
> +{
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~2;
> +}
> +
> +bool
> +tbit31 (std::atomic<int_type> &i)
> +{
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~4;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "bts" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-5a.C b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> new file mode 100644
> index 00000000000..f9595bee2ab
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-5b.C b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> new file mode 100644
> index 00000000000..d917b27a918
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 0)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 30)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 63)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-6a.C b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> new file mode 100644
> index 00000000000..01d495eda23
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-6b.C b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> new file mode 100644
> index 00000000000..adc11fcbf2d
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 0)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 30)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 63)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10a.c b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> new file mode 100644
> index 00000000000..1c1f86a9659
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> +  int mask = 1 << bit;
> +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10b.c b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> new file mode 100644
> index 00000000000..0bf39824ea6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic long long int *v, int bit)
> +{
> +  long long int mask = 1ll << bit;
> +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-11.c b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> new file mode 100644
> index 00000000000..2c8f8c4e59a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +#define MASK 0x1234
> +
> +bool
> +foo1 (_Atomic int *v)
> +{
> +  return atomic_fetch_or_explicit (v, MASK, memory_order_relaxed) & MASK;
> +}
> +
> +bool
> +foo2 (_Atomic unsigned int *v, int mask)
> +{
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +bool
> +foo3 (_Atomic unsigned int *v, int mask)
> +{
> +  return !(atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask);
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "bts" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-12.c b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> new file mode 100644
> index 00000000000..4603a77612c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +#define MASK 0x1234
> +
> +bool
> +foo1 (_Atomic long *v)
> +{
> +  return atomic_fetch_and_explicit (v, ~MASK, memory_order_relaxed) & MASK;
> +}
> +
> +bool
> +foo2 (_Atomic long *v, long mask)
> +{
> +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> +}
> +
> +bool
> +foo3 (_Atomic long *v, long mask)
> +{
> +  return !(atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask);
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "btr" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1a.c b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> new file mode 100644
> index 00000000000..a915de354e5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> @@ -0,0 +1,188 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +void bar (void);
> +
> +__attribute__((noinline, noclone)) int
> +f1 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f2 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
> +  int t2 = t1 & mask;
> +  return t2 != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f3 (long int *a, int bit)
> +{
> +  long int mask = 1l << bit;
> +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f4 (int *a)
> +{
> +  int mask = 1 << 7;
> +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f5 (int *a)
> +{
> +  int mask = 1 << 13;
> +  return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f6 (int *a)
> +{
> +  int mask = 1 << 0;
> +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f7 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
> +    bar ();
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f8 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
> +    bar ();
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f9 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f10 (int *a)
> +{
> +  int mask = 1 << 7;
> +  return (__sync_fetch_and_xor (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f11 (int *a)
> +{
> +  int mask = 1 << 13;
> +  return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f12 (int *a)
> +{
> +  int mask = 1 << 0;
> +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f13 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f14 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f15 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f16 (int *a)
> +{
> +  int mask = 1 << 7;
> +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f17 (int *a)
> +{
> +  int mask = 1 << 13;
> +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f18 (int *a)
> +{
> +  int mask = 1 << 0;
> +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f19 (long int *a, int bit)
> +{
> +  long int mask = 1l << bit;
> +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f20 (long int *a)
> +{
> +  long int mask = 1l << 7;
> +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f21 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__sync_fetch_and_or (a, mask) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f22 (long int *a)
> +{
> +  long int mask = 1l << 7;
> +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f23 (long int *a)
> +{
> +  long int mask = 1l << 7;
> +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) short int
> +f24 (short int *a)
> +{
> +  short int mask = 1 << 7;
> +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) short int
> +f25 (short int *a)
> +{
> +  short int mask = 1 << 7;
> +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1b.c b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> new file mode 100644
> index 00000000000..c4dab8135c7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> @@ -0,0 +1,107 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -g" } */
> +
> +int cnt;
> +
> +__attribute__((noinline, noclone)) void
> +bar (void)
> +{
> +  cnt++;
> +}
> +
> +#include "pr102566-1a.c"
> +
> +int a;
> +long int b;
> +unsigned long int c;
> +unsigned short int d;
> +
> +int
> +main ()
> +{
> +  __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
> +  if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
> +      || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
> +    __builtin_abort ();
> +  if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
> +      || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
> +    __builtin_abort ();
> +  __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
> +  if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
> +      || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
> +    __builtin_abort ();
> +  __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
> +  if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
> +      || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
> +    __builtin_abort ();
> +  if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> +      || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
> +    __builtin_abort ();
> +  if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
> +      || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (cnt != 0
> +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> +      || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> +      || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> +      || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> +    __builtin_abort ();
> +  if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> +      || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> +    __builtin_abort ();
> +  if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> +      || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> +    __builtin_abort ();
> +  __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
> +  if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> +    __builtin_abort ();
> +  if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> +      || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> +    __builtin_abort ();
> +  if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> +      || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> +    __builtin_abort ();
> +  if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> +      || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> +    __builtin_abort ();
> +  if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> +      || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> +    __builtin_abort ();
> +  __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
> +  if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
> +      || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
> +    __builtin_abort ();
> +  __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
> +  if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> +      || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> +    __builtin_abort ();
> +  if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> +      || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> +    __builtin_abort ();
> +  if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
> +      || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
> +    __builtin_abort ();
> +  __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
> +  if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> +      || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> +      || cnt != 2)
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-2.c b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> new file mode 100644
> index 00000000000..00a7c349f2a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3a.c b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> new file mode 100644
> index 00000000000..8bf1cd6e1bd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> +  int mask = 1 << bit;
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3b.c b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> new file mode 100644
> index 00000000000..d155ed367a1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic long long int *v, int bit)
> +{
> +  long long int mask = 1ll << bit;
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsq" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-4.c b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> new file mode 100644
> index 00000000000..2668ccf827c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> +  unsigned int mask = 1 << bit;
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-5.c b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> new file mode 100644
> index 00000000000..8bf1cd6e1bd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> +  int mask = 1 << bit;
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-6.c b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> new file mode 100644
> index 00000000000..3dfe55ac683
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-7.c b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> new file mode 100644
> index 00000000000..6bc0ae0f320
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +typedef int __attribute__ ((mode (__word__))) int_type;
> +
> +#define BIT (1 << 0)
> +
> +bool
> +foo0 (_Atomic int_type *v)
> +{
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~1;
> +}
> +
> +bool
> +foo1 (_Atomic int_type *v)
> +{
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~2;
> +}
> +
> +bool
> +foo2 (_Atomic int_type *v)
> +{
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~3;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "bts" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8a.c b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> new file mode 100644
> index 00000000000..168e3db78c9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8b.c b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> new file mode 100644
> index 00000000000..392da3098e0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 0)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 62)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 63)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9a.c b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> new file mode 100644
> index 00000000000..3fa2a3ef043
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9b.c b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> new file mode 100644
> index 00000000000..38ddbdc630f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 0)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 62)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 63)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
> index 70ce6a4d5b8..bb70b87aa5e 100644
> --- a/gcc/tree-ssa-ccp.c
> +++ b/gcc/tree-ssa-ccp.c
> @@ -3243,6 +3243,81 @@ optimize_unreachable (gimple_stmt_iterator i)
>    return ret;
>  }
>
> +/* Convert
> +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> +   _7 = ~_1;
> +   _5 = (_Bool) _7;
> +   to
> +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> +   _8 = _1 & 1;
> +   _5 = _8 == 0;
> +   and convert
> +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> +   _7 = ~_1;
> +   _4 = (_Bool) _7;
> +   to
> +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> +   _8 = _1 & 1;
> +   _4 = (_Bool) _8;
> +
> +   USE_STMT is the gimplt statement which uses the return value of
> +   __atomic_fetch_or_*.  LHS is the return value of __atomic_fetch_or_*.
> +   MASK is the mask passed to __atomic_fetch_or_*.
> + */
> +
> +static gimple *
> +convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
> +                       tree lhs, tree mask)
> +{
> +  tree and_mask;
> +  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +    {
> +      /* MASK must be ~1.  */
> +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> +                                          ~HOST_WIDE_INT_1), mask, 0))
> +       return nullptr;
> +      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> +    }
> +  else
> +    {
> +      /* MASK must be 1.  */
> +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs), 1), mask, 0))
> +       return nullptr;
> +      and_mask = mask;
> +    }
> +
> +  tree use_lhs = gimple_assign_lhs (use_stmt);
> +
> +  use_operand_p use_p;
> +  gimple *use_not_stmt;
> +
> +  if (!single_imm_use (use_lhs, &use_p, &use_not_stmt)
> +      || !is_gimple_assign (use_not_stmt))
> +    return nullptr;
> +
> +  if (gimple_assign_rhs_code (use_not_stmt) != NOP_EXPR)
> +    return nullptr;
> +
> +  tree use_not_lhs = gimple_assign_lhs (use_not_stmt);
> +  if (TREE_CODE (TREE_TYPE (use_not_lhs)) != BOOLEAN_TYPE)
> +    return nullptr;
> +
> +  gimple_stmt_iterator gsi;
> +  gsi = gsi_for_stmt (use_stmt);
> +  gsi_remove (&gsi, true);
> +  tree var = make_ssa_name (TREE_TYPE (lhs));
> +  use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
> +  gsi = gsi_for_stmt (use_not_stmt);
> +  gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
> +  lhs = gimple_assign_lhs (use_not_stmt);
> +  gimple *g = gimple_build_assign (lhs, EQ_EXPR, var,
> +                                  build_zero_cst (TREE_TYPE (mask)));
> +  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +  gsi = gsi_for_stmt (use_not_stmt);
> +  gsi_remove (&gsi, true);
> +  return use_stmt;
> +}
> +
>  /* Optimize
>       mask_2 = 1 << cnt_1;
>       _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> @@ -3269,7 +3344,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
>    tree lhs = gimple_call_lhs (call);
>    use_operand_p use_p;
>    gimple *use_stmt;
> -  tree mask, bit;
> +  tree mask;
>    optab optab;
>
>    if (!flag_inline_atomics
> @@ -3279,10 +3354,317 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
>        || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
>        || !single_imm_use (lhs, &use_p, &use_stmt)
>        || !is_gimple_assign (use_stmt)
> -      || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
>        || !gimple_vdef (call))
>      return;
>
> +  tree bit = nullptr;
> +
> +  mask = gimple_call_arg (call, 1);
> +  tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
> +  if (rhs_code != BIT_AND_EXPR)
> +    {
> +      if (rhs_code != NOP_EXPR && rhs_code != BIT_NOT_EXPR)
> +       return;
> +
> +      tree use_lhs = gimple_assign_lhs (use_stmt);
> +      if (TREE_CODE (use_lhs) == SSA_NAME
> +         && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
> +       return;
> +
> +      tree use_rhs = gimple_assign_rhs1 (use_stmt);
> +      if (lhs != use_rhs)
> +       return;
> +
> +      gimple *g;
> +      gimple_stmt_iterator gsi;
> +      tree var;
> +      int ibit = -1;
> +
> +      if (rhs_code == BIT_NOT_EXPR)
> +       {
> +         g = convert_atomic_bit_not (fn, use_stmt, lhs, mask);
> +         if (!g)
> +           return;
> +         use_stmt = g;
> +         ibit = 0;
> +       }
> +      else if (TREE_CODE (TREE_TYPE (use_lhs)) == BOOLEAN_TYPE)
> +       {
> +         tree and_mask;
> +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +           {
> +             /* MASK must be ~1.  */
> +             if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> +                                                  ~HOST_WIDE_INT_1),
> +                                   mask, 0))
> +               return;
> +
> +             /* Convert
> +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> +                _4 = (_Bool) _1;
> +                to
> +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> +                _5 = _1 & 1;
> +                _4 = (_Bool) _5;
> +              */
> +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> +           }
> +         else
> +           {
> +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> +             if (!operand_equal_p (and_mask, mask, 0))
> +               return;
> +
> +             /* Convert
> +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> +                _4 = (_Bool) _1;
> +                to
> +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> +                _5 = _1 & 1;
> +                _4 = (_Bool) _5;
> +              */
> +           }
> +         var = make_ssa_name (TREE_TYPE (use_rhs));
> +         replace_uses_by (use_rhs, var);
> +         g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> +                                  and_mask);
> +         gsi = gsi_for_stmt (use_stmt);
> +         gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> +         use_stmt = g;
> +         ibit = 0;
> +       }
> +      else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
> +              == TYPE_PRECISION (TREE_TYPE (use_rhs)))
> +       {
> +         gimple *use_nop_stmt;
> +         if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
> +             || !is_gimple_assign (use_nop_stmt))
> +           return;
> +         rhs_code = gimple_assign_rhs_code (use_nop_stmt);
> +         if (rhs_code != BIT_AND_EXPR)
> +           {
> +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> +             if (TREE_CODE (use_nop_lhs) == SSA_NAME
> +                 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
> +               return;
> +             if (rhs_code == BIT_NOT_EXPR)
> +               {
> +                 g = convert_atomic_bit_not (fn, use_nop_stmt, lhs,
> +                                             mask);
> +                 if (!g)
> +                   return;
> +                 /* Convert
> +                    _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> +                    _2 = (int) _1;
> +                    _7 = ~_2;
> +                    _5 = (_Bool) _7;
> +                    to
> +                    _1 = __atomic_fetch_or_4 (ptr_6, ~1, _3);
> +                    _8 = _1 & 1;
> +                    _5 = _8 == 0;
> +                    and convert
> +                    _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> +                    _2 = (int) _1;
> +                    _7 = ~_2;
> +                    _5 = (_Bool) _7;
> +                    to
> +                    _1 = __atomic_fetch_and_4 (ptr_6, 1, _3);
> +                    _8 = _1 & 1;
> +                    _5 = _8 == 0;
> +                  */
> +                 gsi = gsi_for_stmt (use_stmt);
> +                 gsi_remove (&gsi, true);
> +                 use_stmt = g;
> +                 ibit = 0;
> +               }
> +             else
> +               {
> +                 if (TREE_CODE (TREE_TYPE (use_nop_lhs)) != BOOLEAN_TYPE)
> +                   return;
> +                 if (rhs_code != GE_EXPR && rhs_code != LT_EXPR)
> +                   return;
> +                 tree cmp_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
> +                 if (use_lhs != cmp_rhs1)
> +                   return;
> +                 tree cmp_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
> +                 if (!integer_zerop (cmp_rhs2))
> +                   return;
> +
> +                 tree and_mask;
> +
> +                 unsigned HOST_WIDE_INT bytes
> +                   = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (use_rhs)));
> +                 ibit = bytes * BITS_PER_UNIT - 1;
> +                 unsigned HOST_WIDE_INT highest
> +                   = HOST_WIDE_INT_1U << ibit;
> +
> +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +                   {
> +                     /* Get the signed maximum of the USE_RHS type.  */
> +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> +                                               highest - 1);
> +                     if (!operand_equal_p (and_mask, mask, 0))
> +                       return;
> +
> +                     /* Convert
> +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> +                        _5 = (signed int) _1;
> +                        _4 = _5 < 0 or _5 >= 0;
> +                        to
> +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> +                        _6 = _1 & 0x80000000;
> +                        _4 = _6 != 0 or _6 == 0;
> +                      */
> +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> +                                               highest);
> +                   }
> +                 else
> +                   {
> +                     /* Get the signed minimum of the USE_RHS type.  */
> +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> +                                               highest);
> +                     if (!operand_equal_p (and_mask, mask, 0))
> +                       return;
> +
> +                     /* Convert
> +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> +                        _5 = (signed int) _1;
> +                        _4 = _5 < 0 or _5 >= 0;
> +                        to
> +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> +                        _6 = _1 & 0x80000000;
> +                        _4 = _6 != 0 or _6 == 0;
> +                      */
> +                   }
> +                 var = make_ssa_name (TREE_TYPE (use_rhs));
> +                 gsi = gsi_for_stmt (use_stmt);
> +                 gsi_remove (&gsi, true);
> +                 g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> +                                          and_mask);
> +                 gsi = gsi_for_stmt (use_nop_stmt);
> +                 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> +                 use_stmt = g;
> +                 g = gimple_build_assign (use_nop_lhs,
> +                                          (rhs_code == GE_EXPR
> +                                           ? EQ_EXPR : NE_EXPR),
> +                                          var,
> +                                          build_zero_cst (TREE_TYPE (use_rhs)));
> +                 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +                 gsi = gsi_for_stmt (use_nop_stmt);
> +                 gsi_remove (&gsi, true);
> +               }
> +           }
> +         else
> +           {
> +             tree op_mask = mask;
> +             tree check_mask = op_mask;
> +             if (TREE_CODE (op_mask) == SSA_NAME)
> +               {
> +                 g = SSA_NAME_DEF_STMT (op_mask);
> +                 if (!is_gimple_assign (g))
> +                   return;
> +                 if (gimple_assign_rhs_code (g) == NOP_EXPR)
> +                   {
> +                     tree mask_nop_lhs = gimple_assign_lhs (g);
> +
> +                     if (TREE_CODE (mask_nop_lhs) == SSA_NAME
> +                         && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (mask_nop_lhs))
> +                       return;
> +
> +                     tree mask_nop_rhs = gimple_assign_rhs1 (g);
> +                     if (TYPE_PRECISION (TREE_TYPE (mask_nop_lhs))
> +                         != TYPE_PRECISION (TREE_TYPE (mask_nop_rhs)))
> +                       return;
> +                     op_mask = mask_nop_rhs;
> +                     check_mask = op_mask;
> +                     g = SSA_NAME_DEF_STMT (op_mask);
> +                     if (!is_gimple_assign (g))
> +                       return;
> +                   }
> +
> +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +                   {
> +                     if (gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> +                       return;
> +                     check_mask = gimple_assign_rhs1 (g);
> +                     if (TREE_CODE (check_mask) != SSA_NAME)
> +                       return;
> +                     g = SSA_NAME_DEF_STMT (check_mask);
> +                     if (!is_gimple_assign (g))
> +                       return;
> +                   }
> +
> +                 if (gimple_assign_rhs_code (g) != LSHIFT_EXPR
> +                     || !integer_onep (gimple_assign_rhs1 (g)))
> +                   return;
> +
> +                 bit = gimple_assign_rhs2 (g);
> +               }
> +
> +             if (TREE_CODE (check_mask) == INTEGER_CST)
> +               {
> +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +                   check_mask = const_unop (BIT_NOT_EXPR,
> +                                            TREE_TYPE (check_mask),
> +                                            check_mask);
> +                 check_mask = fold_convert (TREE_TYPE (lhs),
> +                                            check_mask);
> +                 /* Check if CHECK_MASK is a power of two.  */
> +                 ibit = tree_log2 (check_mask);
> +                 if (ibit < 0)
> +                   return;
> +               }
> +
> +             tree use_nop_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
> +             tree use_nop_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
> +             if (!operand_equal_p (use_nop_rhs1, check_mask, 0)
> +                 && !operand_equal_p (use_nop_rhs2, check_mask, 0))
> +               return;
> +
> +             /* Convert
> +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> +                _2 = (int) _1;
> +                _5 = _2 & mask;

(***)

> +                to
> +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> +                _6 = _1 & mask;
> +                _5 = (int) _6;
> +                and convert
> +                _1 = ~mask_7;
> +                _2 = (unsigned int) _1;
> +                _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
> +                _4 = (int) _3;
> +                _5 = _4 & mask_7;
> +                to
> +                _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
> +                _12 = _3 & mask_7;
> +                _5 = (int) _12;
> +              */

I wonder if it's better to maintain to have the matching part of match.pd

there you could have

(match (atomic_fetch_mask @1 @2 @3 @mask)
 (bit_and (convert (IFN_ATOMIC_BIT_TEST_AND_RESET @2 @mask @3)) @mask))

and here in this code do

extern bool gimple_atomic_fetch_mask (tree t, tree *res_ops, tree (*)(tree));

and call it on the _5 from (***) where the function will return true if it
matched and it will set res_ops[] with the positional operands @1 @2
@3 and @mask.

You can add variants and conditions to the same match entry, see match.pd
for examples and also match-and-simplify.texi

> +             replace_uses_by (use_lhs, lhs);
> +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> +             var = make_ssa_name (TREE_TYPE (use_nop_lhs));
> +             gimple_assign_set_lhs (use_nop_stmt, var);
> +             gsi = gsi_for_stmt (use_stmt);
> +             gsi_remove (&gsi, true);
> +             release_defs (use_stmt);
> +             gsi_remove (gsip, true);
> +             var = build1 (NOP_EXPR, TREE_TYPE (use_nop_lhs), var);

instead of building a GENERIC NOP you could use the

gassign *gimple_build_assign (tree, enum tree_code, tree CXX_MEM_STAT_INFO);

overload.

> +             gsi = gsi_for_stmt (use_nop_stmt);
> +             g = gimple_build_assign (use_nop_lhs, var);
> +             gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +             use_stmt = use_nop_stmt;
> +             mask = op_mask;
> +           }
> +       }
> +
> +      if (!bit)
> +       {
> +         if (ibit < 0)
> +           gcc_unreachable ();
> +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> +       }
> +    }
> +
>    switch (fn)
>      {
>      case IFN_ATOMIC_BIT_TEST_AND_SET:
> @@ -3301,51 +3683,76 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
>    if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
>      return;
>
> -  mask = gimple_call_arg (call, 1);
>    tree use_lhs = gimple_assign_lhs (use_stmt);
>    if (!use_lhs)
>      return;
>
> -  if (TREE_CODE (mask) == INTEGER_CST)
> +  if (!bit)
>      {
> -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> -       mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> -      mask = fold_convert (TREE_TYPE (lhs), mask);
> -      int ibit = tree_log2 (mask);
> -      if (ibit < 0)
> -       return;
> -      bit = build_int_cst (TREE_TYPE (lhs), ibit);
> -    }
> -  else if (TREE_CODE (mask) == SSA_NAME)
> -    {
> -      gimple *g = SSA_NAME_DEF_STMT (mask);
> -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +      if (TREE_CODE (mask) == INTEGER_CST)
>         {
> -         if (!is_gimple_assign (g)
> -             || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +           mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> +         mask = fold_convert (TREE_TYPE (lhs), mask);
> +         int ibit = tree_log2 (mask);
> +         if (ibit < 0)
> +           return;
> +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> +       }
> +      else if (TREE_CODE (mask) == SSA_NAME)
> +       {
> +         gimple *g = SSA_NAME_DEF_STMT (mask);
> +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +           {
> +             if (!is_gimple_assign (g)
> +                 || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> +               return;
> +             mask = gimple_assign_rhs1 (g);
> +             if (TREE_CODE (mask) != SSA_NAME)
> +               return;
> +             g = SSA_NAME_DEF_STMT (mask);
> +           }
> +         if (!is_gimple_assign (g))
>             return;
> -         mask = gimple_assign_rhs1 (g);
> -         if (TREE_CODE (mask) != SSA_NAME)
> +         rhs_code = gimple_assign_rhs_code (g);
> +         if (rhs_code != LSHIFT_EXPR)
> +           {
> +             if (rhs_code != NOP_EXPR)
> +               return;
> +
> +             /* Handle
> +                _1 = 1 << bit_4(D);
> +                mask_5 = (unsigned int) _1;
> +                _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
> +                _3 = _2 & mask_5;
> +                */
> +             tree nop_lhs = gimple_assign_lhs (g);
> +             tree nop_rhs = gimple_assign_rhs1 (g);
> +             if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
> +                 != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
> +               return;
> +             g = SSA_NAME_DEF_STMT (nop_rhs);
> +             if (!is_gimple_assign (g)
> +                 || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
> +               return;
> +           }
> +         if (!integer_onep (gimple_assign_rhs1 (g)))
>             return;
> -         g = SSA_NAME_DEF_STMT (mask);
> +         bit = gimple_assign_rhs2 (g);
>         }
> -      if (!is_gimple_assign (g)
> -         || gimple_assign_rhs_code (g) != LSHIFT_EXPR
> -         || !integer_onep (gimple_assign_rhs1 (g)))
> +      else
>         return;
> -      bit = gimple_assign_rhs2 (g);
> -    }
> -  else
> -    return;
>
> -  if (gimple_assign_rhs1 (use_stmt) == lhs)
> -    {
> -      if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> +      if (gimple_assign_rhs1 (use_stmt) == lhs)
> +       {
> +         if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> +           return;
> +       }
> +      else if (gimple_assign_rhs2 (use_stmt) != lhs
> +              || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
> +                                   mask, 0))
>         return;
>      }
> -  else if (gimple_assign_rhs2 (use_stmt) != lhs
> -          || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
> -    return;
>
>    bool use_bool = true;
>    bool has_debug_uses = false;
> @@ -3434,18 +3841,40 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
>          of the specified bit after the atomic operation (makes only sense
>          for xor, otherwise the bit content is compile time known),
>          we need to invert the bit.  */
> +      tree mask_convert = mask;
> +      gimple *g_convert = nullptr;
> +      if (!use_bool && TREE_TYPE (lhs) != TREE_TYPE (mask))
> +       {
> +         mask_convert = make_ssa_name (TREE_TYPE (lhs));
> +         tree var = build1 (NOP_EXPR, TREE_TYPE (lhs), mask);
> +         g_convert = gimple_build_assign (mask_convert, var);
> +       }
>        g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
>                                BIT_XOR_EXPR, new_lhs,
>                                use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
> -                                       : mask);
> +                                       : mask_convert);
>        new_lhs = gimple_assign_lhs (g);

You could use

        gimple_seq stmts = NULL;
        mask_convert = gimple_convert (&stmts, TREE_TYPE (lhs), mask);
        new_lhs = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (lhs), new_lhs,
                                               use_bool ?
build_int_cst (TREE_TYPE (lhs), 1) : mask_convert);

>        if (throws)
>         {
> -         gsi_insert_on_edge_immediate (e, g);

gsi_insert_seq_on_edge_immediate (e, stmts);

to simplify this.  The conversion will be only generated if necessary.

> +         if (g_convert)
> +           {
> +             gsi_insert_on_edge_immediate (e, g_convert);
> +             gsi = gsi_for_stmt (g_convert);
> +             gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +           }
> +         else
> +           gsi_insert_on_edge_immediate (e, g);
>           gsi = gsi_for_stmt (g);
>         }
>        else
> -       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +       {
> +         if (g_convert)
> +           {
> +             gsi_insert_after (&gsi, g_convert, GSI_NEW_STMT);
> +             gsi = gsi_for_stmt (g_convert);
> +           }
> +         gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +       }
>      }
>    if (use_bool && has_debug_uses)
>      {
> --
> 2.31.1
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4] Improve integer bit test on __atomic_fetch_[or|and]_* returns
  2021-10-13 12:34 ` Richard Biener
@ 2021-10-21 11:15   ` Hongtao Liu
  2021-10-26  8:16     ` Richard Biener
  0 siblings, 1 reply; 8+ messages in thread
From: Hongtao Liu @ 2021-10-21 11:15 UTC (permalink / raw)
  To: Richard Biener; +Cc: H.J. Lu, Jakub Jelinek, GCC Patches

 i is

On Wed, Oct 13, 2021 at 8:34 PM Richard Biener via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Sun, Oct 10, 2021 at 3:49 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > Changes in v4:
> >
> > 1. Bypass redundant check when inputs have been transformed to the
> > equivalent canonical form with valid bit operation.
> >
> > Changes in v3:
> >
> > 1.  Check invalid bit operation.
> >
> > commit adedd5c173388ae505470df152b9cb3947339566
> > Author: Jakub Jelinek <jakub@redhat.com>
> > Date:   Tue May 3 13:37:25 2016 +0200
> >
> >     re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')
> >
> > optimized bit test on __atomic_fetch_or_* and __atomic_fetch_and_* returns
> > with lock bts/btr/btc by turning
> >
> >   mask_2 = 1 << cnt_1;
> >   _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> >   _5 = _4 & mask_2;
> >
> > into
> >
> >   _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
> >   _5 = _4;
> >
> > and
> >
> >   mask_6 = 1 << bit_5(D);
> >   _1 = ~mask_6;
> >   _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
> >   _3 = _2 & mask_6;
> >   _4 = _3 != 0;
> >
> > into
> >
> >   mask_6 = 1 << bit_5(D);
> >   _1 = ~mask_6;
> >   _11 = .ATOMIC_BIT_TEST_AND_RESET (v_8(D), bit_5(D), 1, 0);
> >   _4 = _11 != 0;
> >
> > But it failed to optimize many equivalent, but slighly different cases:
> >
> > 1.
> >   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> >   _4 = (_Bool) _1;
> > 2.
> >   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> >   _4 = (_Bool) _1;
> > 3.
> >   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> >   _7 = ~_1;
> >   _5 = (_Bool) _7;
> > 4.
> >   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> >   _7 = ~_1;
> >   _5 = (_Bool) _7;
> > 5.
> >   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> >   _2 = (int) _1;
> >   _7 = ~_2;
> >   _5 = (_Bool) _7;
> > 6.
> >   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> >   _2 = (int) _1;
> >   _7 = ~_2;
> >   _5 = (_Bool) _7;
> > 7.
> >   _1 = _atomic_fetch_or_4 (ptr_6, mask, _3);
> >   _2 = (int) _1;
> >   _5 = _2 & mask;
> > 8.
> >   _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> >   _5 = (signed int) _1;
> >   _4 = _5 < 0;
> > 9.
> >   _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> >   _5 = (signed int) _1;
> >   _4 = _5 < 0;
> > 10.
> >   _1 = 1 << bit_4(D);
> >   mask_5 = (unsigned int) _1;
> >   _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
> >   _3 = _2 & mask_5;
> > 11.
> >   mask_7 = 1 << bit_6(D);
> >   _1 = ~mask_7;
> >   _2 = (unsigned int) _1;
> >   _3 = __atomic_fetch_and_4 (v_9(D), _2, 0);
> >   _4 = (int) _3;
> >   _5 = _4 & mask_7;
> >
> > We make
> >
> >   mask_2 = 1 << cnt_1;
> >   _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> >   _5 = _4 & mask_2;
> >
> > and
> >
> >   mask_6 = 1 << bit_5(D);
> >   _1 = ~mask_6;
> >   _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
> >   _3 = _2 & mask_6;
> >   _4 = _3 != 0;
> >
> > the canonical forms for this optimization and transform cases 1-9 to the
> > equivalent canonical form.  For cases 10 and 11, we simply remove the cast
> > before __atomic_fetch_or_4/__atomic_fetch_and_4 with
> >
> >   _1 = 1 << bit_4(D);
> >   _2 = __atomic_fetch_or_4 (v_7(D), _1, 0);
> >   _3 = _2 & _1;
> >
> > and
> >
> >   mask_7 = 1 << bit_6(D);
> >   _1 = ~mask_7;
> >   _3 = __atomic_fetch_and_4 (v_9(D), _1, 0);
> >   _6 = _3 & mask_7;
> >   _5 = (int) _6;
> >
> > gcc/
> >
> >         PR middle-end/102566
> >         * tree-ssa-ccp.c (convert_atomic_bit_not): New function.
> >         (optimize_atomic_bit_test_and): Transform equivalent, but slighly
> >         different cases to their canonical forms.
> >
> > gcc/testsuite/
> >
> >         PR middle-end/102566
> >         * g++.target/i386/pr102566-1.C: New test.
> >         * g++.target/i386/pr102566-2.C: Likewise.
> >         * g++.target/i386/pr102566-3.C: Likewise.
> >         * g++.target/i386/pr102566-4.C: Likewise.
> >         * g++.target/i386/pr102566-5a.C: Likewise.
> >         * g++.target/i386/pr102566-5b.C: Likewise.
> >         * g++.target/i386/pr102566-6a.C: Likewise.
> >         * g++.target/i386/pr102566-6b.C: Likewise.
> >         * gcc.target/i386/pr102566-1a.c: Likewise.
> >         * gcc.target/i386/pr102566-1b.c: Likewise.
> >         * gcc.target/i386/pr102566-2.c: Likewise.
> >         * gcc.target/i386/pr102566-3a.c: Likewise.
> >         * gcc.target/i386/pr102566-3b.c: Likewise.
> >         * gcc.target/i386/pr102566-4.c: Likewise.
> >         * gcc.target/i386/pr102566-5.c: Likewise.
> >         * gcc.target/i386/pr102566-6.c: Likewise.
> >         * gcc.target/i386/pr102566-7.c: Likewise.
> >         * gcc.target/i386/pr102566-8a.c: Likewise.
> >         * gcc.target/i386/pr102566-8b.c: Likewise.
> >         * gcc.target/i386/pr102566-9a.c: Likewise.
> >         * gcc.target/i386/pr102566-9b.c: Likewise.
> >         * gcc.target/i386/pr102566-10a.c: Likewise.
> >         * gcc.target/i386/pr102566-10b.c: Likewise.
> >         * gcc.target/i386/pr102566-11.c: Likewise.
> >         * gcc.target/i386/pr102566-12.c: Likewise.
> > ---
> >  gcc/testsuite/g++.target/i386/pr102566-1.C   |  31 ++
> >  gcc/testsuite/g++.target/i386/pr102566-2.C   |  31 ++
> >  gcc/testsuite/g++.target/i386/pr102566-3.C   |  31 ++
> >  gcc/testsuite/g++.target/i386/pr102566-4.C   |  29 ++
> >  gcc/testsuite/g++.target/i386/pr102566-5a.C  |  31 ++
> >  gcc/testsuite/g++.target/i386/pr102566-5b.C  |  31 ++
> >  gcc/testsuite/g++.target/i386/pr102566-6a.C  |  31 ++
> >  gcc/testsuite/g++.target/i386/pr102566-6b.C  |  31 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-10a.c |  15 +
> >  gcc/testsuite/gcc.target/i386/pr102566-10b.c |  15 +
> >  gcc/testsuite/gcc.target/i386/pr102566-11.c  |  28 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-12.c  |  28 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-1a.c  | 188 +++++++
> >  gcc/testsuite/gcc.target/i386/pr102566-1b.c  | 107 ++++
> >  gcc/testsuite/gcc.target/i386/pr102566-2.c   |  32 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-3a.c  |  15 +
> >  gcc/testsuite/gcc.target/i386/pr102566-3b.c  |  15 +
> >  gcc/testsuite/gcc.target/i386/pr102566-4.c   |  15 +
> >  gcc/testsuite/gcc.target/i386/pr102566-5.c   |  15 +
> >  gcc/testsuite/gcc.target/i386/pr102566-6.c   |  32 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-7.c   |  30 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-8a.c  |  32 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-8b.c  |  32 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-9a.c  |  32 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-9b.c  |  32 ++
> >  gcc/tree-ssa-ccp.c                           | 503 +++++++++++++++++--
> >  26 files changed, 1375 insertions(+), 37 deletions(-)
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-2.C
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-3.C
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-4.C
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5a.C
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5b.C
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6a.C
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6b.C
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10a.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10b.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-11.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-12.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3a.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3b.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-4.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-5.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-6.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-7.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8a.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8b.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9a.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9b.c
> >
> > diff --git a/gcc/testsuite/g++.target/i386/pr102566-1.C b/gcc/testsuite/g++.target/i386/pr102566-1.C
> > new file mode 100644
> > index 00000000000..94a66d717cc
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr102566-1.C
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile { target c++11 } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <atomic>
> > +
> > +bool
> > +tbit0 (std::atomic<int> &i)
> > +{
> > +#define BIT (1 << 0)
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit30 (std::atomic<int> &i)
> > +{
> > +#define BIT (1 << 30)
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit31 (std::atomic<int> &i)
> > +{
> > +#define BIT (1 << 31)
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/g++.target/i386/pr102566-2.C b/gcc/testsuite/g++.target/i386/pr102566-2.C
> > new file mode 100644
> > index 00000000000..4f2aea961c2
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr102566-2.C
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile { target c++11 } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <atomic>
> > +
> > +bool
> > +tbit0 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 0)
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit30 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 30)
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit31 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 31)
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/g++.target/i386/pr102566-3.C b/gcc/testsuite/g++.target/i386/pr102566-3.C
> > new file mode 100644
> > index 00000000000..e88921dd155
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr102566-3.C
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile { target c++11 } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <atomic>
> > +
> > +bool
> > +tbit0 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 0)
> > +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit30 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 30)
> > +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit31 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 31)
> > +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/g++.target/i386/pr102566-4.C b/gcc/testsuite/g++.target/i386/pr102566-4.C
> > new file mode 100644
> > index 00000000000..44d1362ac2e
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr102566-4.C
> > @@ -0,0 +1,29 @@
> > +/* { dg-do compile { target c++11 } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <atomic>
> > +
> > +typedef int __attribute__ ((mode (__word__))) int_type;
> > +
> > +#define BIT (1 << 0)
> > +
> > +bool
> > +tbit0 (std::atomic<int_type> &i)
> > +{
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~1;
> > +}
> > +
> > +bool
> > +tbit30 (std::atomic<int_type> &i)
> > +{
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~2;
> > +}
> > +
> > +bool
> > +tbit31 (std::atomic<int_type> &i)
> > +{
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~4;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > +/* { dg-final { scan-assembler-not "bts" } } */
> > diff --git a/gcc/testsuite/g++.target/i386/pr102566-5a.C b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> > new file mode 100644
> > index 00000000000..f9595bee2ab
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile { target c++11 } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <atomic>
> > +
> > +bool
> > +tbit0 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 0)
> > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit30 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 30)
> > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit31 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 31)
> > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/g++.target/i386/pr102566-5b.C b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> > new file mode 100644
> > index 00000000000..d917b27a918
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <atomic>
> > +
> > +bool
> > +tbit0 (std::atomic<unsigned long long> &i)
> > +{
> > +#define BIT (1ll << 0)
> > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit30 (std::atomic<unsigned long long> &i)
> > +{
> > +#define BIT (1ll << 30)
> > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit31 (std::atomic<unsigned long long> &i)
> > +{
> > +#define BIT (1ll << 63)
> > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/g++.target/i386/pr102566-6a.C b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> > new file mode 100644
> > index 00000000000..01d495eda23
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile { target c++11 } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <atomic>
> > +
> > +bool
> > +tbit0 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 0)
> > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit30 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 30)
> > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit31 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 31)
> > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/g++.target/i386/pr102566-6b.C b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> > new file mode 100644
> > index 00000000000..adc11fcbf2d
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <atomic>
> > +
> > +bool
> > +tbit0 (std::atomic<unsigned long long> &i)
> > +{
> > +#define BIT (1ll << 0)
> > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit30 (std::atomic<unsigned long long> &i)
> > +{
> > +#define BIT (1ll << 30)
> > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit31 (std::atomic<unsigned long long> &i)
> > +{
> > +#define BIT (1ll << 63)
> > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10a.c b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> > new file mode 100644
> > index 00000000000..1c1f86a9659
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo (_Atomic int *v, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 1 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10b.c b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> > new file mode 100644
> > index 00000000000..0bf39824ea6
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo (_Atomic long long int *v, int bit)
> > +{
> > +  long long int mask = 1ll << bit;
> > +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 1 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-11.c b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> > new file mode 100644
> > index 00000000000..2c8f8c4e59a
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> > @@ -0,0 +1,28 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +#define MASK 0x1234
> > +
> > +bool
> > +foo1 (_Atomic int *v)
> > +{
> > +  return atomic_fetch_or_explicit (v, MASK, memory_order_relaxed) & MASK;
> > +}
> > +
> > +bool
> > +foo2 (_Atomic unsigned int *v, int mask)
> > +{
> > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > +}
> > +
> > +bool
> > +foo3 (_Atomic unsigned int *v, int mask)
> > +{
> > +  return !(atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask);
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > +/* { dg-final { scan-assembler-not "bts" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-12.c b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> > new file mode 100644
> > index 00000000000..4603a77612c
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> > @@ -0,0 +1,28 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +#define MASK 0x1234
> > +
> > +bool
> > +foo1 (_Atomic long *v)
> > +{
> > +  return atomic_fetch_and_explicit (v, ~MASK, memory_order_relaxed) & MASK;
> > +}
> > +
> > +bool
> > +foo2 (_Atomic long *v, long mask)
> > +{
> > +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> > +}
> > +
> > +bool
> > +foo3 (_Atomic long *v, long mask)
> > +{
> > +  return !(atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask);
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > +/* { dg-final { scan-assembler-not "btr" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1a.c b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> > new file mode 100644
> > index 00000000000..a915de354e5
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> > @@ -0,0 +1,188 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +void bar (void);
> > +
> > +__attribute__((noinline, noclone)) int
> > +f1 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f2 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
> > +  int t2 = t1 & mask;
> > +  return t2 != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) long int
> > +f3 (long int *a, int bit)
> > +{
> > +  long int mask = 1l << bit;
> > +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f4 (int *a)
> > +{
> > +  int mask = 1 << 7;
> > +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f5 (int *a)
> > +{
> > +  int mask = 1 << 13;
> > +  return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f6 (int *a)
> > +{
> > +  int mask = 1 << 0;
> > +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) void
> > +f7 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
> > +    bar ();
> > +}
> > +
> > +__attribute__((noinline, noclone)) void
> > +f8 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
> > +    bar ();
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f9 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f10 (int *a)
> > +{
> > +  int mask = 1 << 7;
> > +  return (__sync_fetch_and_xor (a, mask) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f11 (int *a)
> > +{
> > +  int mask = 1 << 13;
> > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f12 (int *a)
> > +{
> > +  int mask = 1 << 0;
> > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f13 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f14 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f15 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f16 (int *a)
> > +{
> > +  int mask = 1 << 7;
> > +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f17 (int *a)
> > +{
> > +  int mask = 1 << 13;
> > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f18 (int *a)
> > +{
> > +  int mask = 1 << 0;
> > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) long int
> > +f19 (long int *a, int bit)
> > +{
> > +  long int mask = 1l << bit;
> > +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) long int
> > +f20 (long int *a)
> > +{
> > +  long int mask = 1l << 7;
> > +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f21 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return (__sync_fetch_and_or (a, mask) & mask);
> > +}
> > +
> > +__attribute__((noinline, noclone)) long int
> > +f22 (long int *a)
> > +{
> > +  long int mask = 1l << 7;
> > +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
> > +}
> > +
> > +__attribute__((noinline, noclone)) long int
> > +f23 (long int *a)
> > +{
> > +  long int mask = 1l << 7;
> > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
> > +}
> > +
> > +__attribute__((noinline, noclone)) short int
> > +f24 (short int *a)
> > +{
> > +  short int mask = 1 << 7;
> > +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) short int
> > +f25 (short int *a)
> > +{
> > +  short int mask = 1 << 7;
> > +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1b.c b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> > new file mode 100644
> > index 00000000000..c4dab8135c7
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> > @@ -0,0 +1,107 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-O2 -g" } */
> > +
> > +int cnt;
> > +
> > +__attribute__((noinline, noclone)) void
> > +bar (void)
> > +{
> > +  cnt++;
> > +}
> > +
> > +#include "pr102566-1a.c"
> > +
> > +int a;
> > +long int b;
> > +unsigned long int c;
> > +unsigned short int d;
> > +
> > +int
> > +main ()
> > +{
> > +  __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
> > +  if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
> > +      || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
> > +    __builtin_abort ();
> > +  if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
> > +      || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
> > +    __builtin_abort ();
> > +  __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
> > +  if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
> > +      || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
> > +    __builtin_abort ();
> > +  __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
> > +  if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
> > +      || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
> > +    __builtin_abort ();
> > +  if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> > +      || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
> > +    __builtin_abort ();
> > +  if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
> > +      || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > +    __builtin_abort ();
> > +  if (cnt != 0
> > +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > +    __builtin_abort ();
> > +  if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > +      || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > +    __builtin_abort ();
> > +  if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> > +      || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > +    __builtin_abort ();
> > +  if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > +      || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > +    __builtin_abort ();
> > +  if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> > +      || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > +    __builtin_abort ();
> > +  if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> > +      || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > +    __builtin_abort ();
> > +  if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > +      || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> > +    __builtin_abort ();
> > +  if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> > +      || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> > +    __builtin_abort ();
> > +  if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> > +      || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> > +    __builtin_abort ();
> > +  __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
> > +  if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > +      || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> > +    __builtin_abort ();
> > +  if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> > +      || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> > +    __builtin_abort ();
> > +  if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> > +      || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> > +    __builtin_abort ();
> > +  if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> > +      || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> > +    __builtin_abort ();
> > +  if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> > +      || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> > +    __builtin_abort ();
> > +  __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
> > +  if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
> > +      || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
> > +    __builtin_abort ();
> > +  __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
> > +  if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> > +      || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> > +    __builtin_abort ();
> > +  if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> > +      || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> > +    __builtin_abort ();
> > +  if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
> > +      || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
> > +    __builtin_abort ();
> > +  __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
> > +  if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> > +      || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> > +      || cnt != 2)
> > +    __builtin_abort ();
> > +  return 0;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-2.c b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> > new file mode 100644
> > index 00000000000..00a7c349f2a
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> > @@ -0,0 +1,32 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo0 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 0)
> > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo30 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 30)
> > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo31 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 31)
> > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3a.c b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> > new file mode 100644
> > index 00000000000..8bf1cd6e1bd
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo (_Atomic int *v, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3b.c b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> > new file mode 100644
> > index 00000000000..d155ed367a1
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo (_Atomic long long int *v, int bit)
> > +{
> > +  long long int mask = 1ll << bit;
> > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsq" 1 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-4.c b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> > new file mode 100644
> > index 00000000000..2668ccf827c
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo (_Atomic int *v, int bit)
> > +{
> > +  unsigned int mask = 1 << bit;
> > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-5.c b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> > new file mode 100644
> > index 00000000000..8bf1cd6e1bd
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo (_Atomic int *v, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-6.c b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> > new file mode 100644
> > index 00000000000..3dfe55ac683
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> > @@ -0,0 +1,32 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo0 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 0)
> > +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo30 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 30)
> > +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo31 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 31)
> > +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-7.c b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> > new file mode 100644
> > index 00000000000..6bc0ae0f320
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> > @@ -0,0 +1,30 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +typedef int __attribute__ ((mode (__word__))) int_type;
> > +
> > +#define BIT (1 << 0)
> > +
> > +bool
> > +foo0 (_Atomic int_type *v)
> > +{
> > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~1;
> > +}
> > +
> > +bool
> > +foo1 (_Atomic int_type *v)
> > +{
> > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~2;
> > +}
> > +
> > +bool
> > +foo2 (_Atomic int_type *v)
> > +{
> > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~3;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > +/* { dg-final { scan-assembler-not "bts" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8a.c b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> > new file mode 100644
> > index 00000000000..168e3db78c9
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> > @@ -0,0 +1,32 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo0 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 0)
> > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo30 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 30)
> > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo31 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 31)
> > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8b.c b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> > new file mode 100644
> > index 00000000000..392da3098e0
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> > @@ -0,0 +1,32 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo0 (_Atomic long long *v)
> > +{
> > +#define BIT (1ll << 0)
> > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo30 (_Atomic long long *v)
> > +{
> > +#define BIT (1ll << 62)
> > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo31 (_Atomic long long *v)
> > +{
> > +#define BIT (1ll << 63)
> > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9a.c b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> > new file mode 100644
> > index 00000000000..3fa2a3ef043
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> > @@ -0,0 +1,32 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo0 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 0)
> > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo30 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 30)
> > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo31 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 31)
> > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9b.c b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> > new file mode 100644
> > index 00000000000..38ddbdc630f
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> > @@ -0,0 +1,32 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo0 (_Atomic long long *v)
> > +{
> > +#define BIT (1ll << 0)
> > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo30 (_Atomic long long *v)
> > +{
> > +#define BIT (1ll << 62)
> > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo31 (_Atomic long long *v)
> > +{
> > +#define BIT (1ll << 63)
> > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
> > index 70ce6a4d5b8..bb70b87aa5e 100644
> > --- a/gcc/tree-ssa-ccp.c
> > +++ b/gcc/tree-ssa-ccp.c
> > @@ -3243,6 +3243,81 @@ optimize_unreachable (gimple_stmt_iterator i)
> >    return ret;
> >  }
> >
> > +/* Convert
> > +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > +   _7 = ~_1;
> > +   _5 = (_Bool) _7;
> > +   to
> > +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > +   _8 = _1 & 1;
> > +   _5 = _8 == 0;
> > +   and convert
> > +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > +   _7 = ~_1;
> > +   _4 = (_Bool) _7;
> > +   to
> > +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > +   _8 = _1 & 1;
> > +   _4 = (_Bool) _8;
> > +
> > +   USE_STMT is the gimplt statement which uses the return value of
> > +   __atomic_fetch_or_*.  LHS is the return value of __atomic_fetch_or_*.
> > +   MASK is the mask passed to __atomic_fetch_or_*.
> > + */
> > +
> > +static gimple *
> > +convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
> > +                       tree lhs, tree mask)
> > +{
> > +  tree and_mask;
> > +  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > +    {
> > +      /* MASK must be ~1.  */
> > +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> > +                                          ~HOST_WIDE_INT_1), mask, 0))
> > +       return nullptr;
> > +      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> > +    }
> > +  else
> > +    {
> > +      /* MASK must be 1.  */
> > +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs), 1), mask, 0))
> > +       return nullptr;
> > +      and_mask = mask;
> > +    }
> > +
> > +  tree use_lhs = gimple_assign_lhs (use_stmt);
> > +
> > +  use_operand_p use_p;
> > +  gimple *use_not_stmt;
> > +
> > +  if (!single_imm_use (use_lhs, &use_p, &use_not_stmt)
> > +      || !is_gimple_assign (use_not_stmt))
> > +    return nullptr;
> > +
> > +  if (gimple_assign_rhs_code (use_not_stmt) != NOP_EXPR)
> > +    return nullptr;
> > +
> > +  tree use_not_lhs = gimple_assign_lhs (use_not_stmt);
> > +  if (TREE_CODE (TREE_TYPE (use_not_lhs)) != BOOLEAN_TYPE)
> > +    return nullptr;
> > +
> > +  gimple_stmt_iterator gsi;
> > +  gsi = gsi_for_stmt (use_stmt);
> > +  gsi_remove (&gsi, true);
> > +  tree var = make_ssa_name (TREE_TYPE (lhs));
> > +  use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
> > +  gsi = gsi_for_stmt (use_not_stmt);
> > +  gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
> > +  lhs = gimple_assign_lhs (use_not_stmt);
> > +  gimple *g = gimple_build_assign (lhs, EQ_EXPR, var,
> > +                                  build_zero_cst (TREE_TYPE (mask)));
> > +  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > +  gsi = gsi_for_stmt (use_not_stmt);
> > +  gsi_remove (&gsi, true);
> > +  return use_stmt;
> > +}
> > +
> >  /* Optimize
> >       mask_2 = 1 << cnt_1;
> >       _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> > @@ -3269,7 +3344,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> >    tree lhs = gimple_call_lhs (call);
> >    use_operand_p use_p;
> >    gimple *use_stmt;
> > -  tree mask, bit;
> > +  tree mask;
> >    optab optab;
> >
> >    if (!flag_inline_atomics
> > @@ -3279,10 +3354,317 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> >        || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
> >        || !single_imm_use (lhs, &use_p, &use_stmt)
> >        || !is_gimple_assign (use_stmt)
> > -      || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
> >        || !gimple_vdef (call))
> >      return;
> >
> > +  tree bit = nullptr;
> > +
> > +  mask = gimple_call_arg (call, 1);
> > +  tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
> > +  if (rhs_code != BIT_AND_EXPR)
> > +    {
> > +      if (rhs_code != NOP_EXPR && rhs_code != BIT_NOT_EXPR)
> > +       return;
> > +
> > +      tree use_lhs = gimple_assign_lhs (use_stmt);
> > +      if (TREE_CODE (use_lhs) == SSA_NAME
> > +         && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
> > +       return;
> > +
> > +      tree use_rhs = gimple_assign_rhs1 (use_stmt);
> > +      if (lhs != use_rhs)
> > +       return;
> > +
> > +      gimple *g;
> > +      gimple_stmt_iterator gsi;
> > +      tree var;
> > +      int ibit = -1;
> > +
> > +      if (rhs_code == BIT_NOT_EXPR)
> > +       {
> > +         g = convert_atomic_bit_not (fn, use_stmt, lhs, mask);
> > +         if (!g)
> > +           return;
> > +         use_stmt = g;
> > +         ibit = 0;
> > +       }
> > +      else if (TREE_CODE (TREE_TYPE (use_lhs)) == BOOLEAN_TYPE)
> > +       {
> > +         tree and_mask;
> > +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > +           {
> > +             /* MASK must be ~1.  */
> > +             if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> > +                                                  ~HOST_WIDE_INT_1),
> > +                                   mask, 0))
> > +               return;
> > +
> > +             /* Convert
> > +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > +                _4 = (_Bool) _1;
> > +                to
> > +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > +                _5 = _1 & 1;
> > +                _4 = (_Bool) _5;
> > +              */
> > +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> > +           }
> > +         else
> > +           {
> > +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> > +             if (!operand_equal_p (and_mask, mask, 0))
> > +               return;
> > +
> > +             /* Convert
> > +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > +                _4 = (_Bool) _1;
> > +                to
> > +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > +                _5 = _1 & 1;
> > +                _4 = (_Bool) _5;
> > +              */
> > +           }
> > +         var = make_ssa_name (TREE_TYPE (use_rhs));
> > +         replace_uses_by (use_rhs, var);
> > +         g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> > +                                  and_mask);
> > +         gsi = gsi_for_stmt (use_stmt);
> > +         gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> > +         use_stmt = g;
> > +         ibit = 0;
> > +       }
> > +      else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
> > +              == TYPE_PRECISION (TREE_TYPE (use_rhs)))
> > +       {
> > +         gimple *use_nop_stmt;
> > +         if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
> > +             || !is_gimple_assign (use_nop_stmt))
> > +           return;
> > +         rhs_code = gimple_assign_rhs_code (use_nop_stmt);
> > +         if (rhs_code != BIT_AND_EXPR)
> > +           {
> > +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> > +             if (TREE_CODE (use_nop_lhs) == SSA_NAME
> > +                 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
> > +               return;
> > +             if (rhs_code == BIT_NOT_EXPR)
> > +               {
> > +                 g = convert_atomic_bit_not (fn, use_nop_stmt, lhs,
> > +                                             mask);
> > +                 if (!g)
> > +                   return;
> > +                 /* Convert
> > +                    _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> > +                    _2 = (int) _1;
> > +                    _7 = ~_2;
> > +                    _5 = (_Bool) _7;
> > +                    to
> > +                    _1 = __atomic_fetch_or_4 (ptr_6, ~1, _3);
> > +                    _8 = _1 & 1;
> > +                    _5 = _8 == 0;
> > +                    and convert
> > +                    _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> > +                    _2 = (int) _1;
> > +                    _7 = ~_2;
> > +                    _5 = (_Bool) _7;
> > +                    to
> > +                    _1 = __atomic_fetch_and_4 (ptr_6, 1, _3);
> > +                    _8 = _1 & 1;
> > +                    _5 = _8 == 0;
> > +                  */
> > +                 gsi = gsi_for_stmt (use_stmt);
> > +                 gsi_remove (&gsi, true);
> > +                 use_stmt = g;
> > +                 ibit = 0;
> > +               }
> > +             else
> > +               {
> > +                 if (TREE_CODE (TREE_TYPE (use_nop_lhs)) != BOOLEAN_TYPE)
> > +                   return;
> > +                 if (rhs_code != GE_EXPR && rhs_code != LT_EXPR)
> > +                   return;
> > +                 tree cmp_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
> > +                 if (use_lhs != cmp_rhs1)
> > +                   return;
> > +                 tree cmp_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
> > +                 if (!integer_zerop (cmp_rhs2))
> > +                   return;
> > +
> > +                 tree and_mask;
> > +
> > +                 unsigned HOST_WIDE_INT bytes
> > +                   = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (use_rhs)));
> > +                 ibit = bytes * BITS_PER_UNIT - 1;
> > +                 unsigned HOST_WIDE_INT highest
> > +                   = HOST_WIDE_INT_1U << ibit;
> > +
> > +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > +                   {
> > +                     /* Get the signed maximum of the USE_RHS type.  */
> > +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> > +                                               highest - 1);
> > +                     if (!operand_equal_p (and_mask, mask, 0))
> > +                       return;
> > +
> > +                     /* Convert
> > +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> > +                        _5 = (signed int) _1;
> > +                        _4 = _5 < 0 or _5 >= 0;
> > +                        to
> > +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> > +                        _6 = _1 & 0x80000000;
> > +                        _4 = _6 != 0 or _6 == 0;
> > +                      */
> > +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> > +                                               highest);
> > +                   }
> > +                 else
> > +                   {
> > +                     /* Get the signed minimum of the USE_RHS type.  */
> > +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> > +                                               highest);
> > +                     if (!operand_equal_p (and_mask, mask, 0))
> > +                       return;
> > +
> > +                     /* Convert
> > +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> > +                        _5 = (signed int) _1;
> > +                        _4 = _5 < 0 or _5 >= 0;
> > +                        to
> > +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> > +                        _6 = _1 & 0x80000000;
> > +                        _4 = _6 != 0 or _6 == 0;
> > +                      */
> > +                   }
> > +                 var = make_ssa_name (TREE_TYPE (use_rhs));
> > +                 gsi = gsi_for_stmt (use_stmt);
> > +                 gsi_remove (&gsi, true);
> > +                 g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> > +                                          and_mask);
> > +                 gsi = gsi_for_stmt (use_nop_stmt);
> > +                 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> > +                 use_stmt = g;
> > +                 g = gimple_build_assign (use_nop_lhs,
> > +                                          (rhs_code == GE_EXPR
> > +                                           ? EQ_EXPR : NE_EXPR),
> > +                                          var,
> > +                                          build_zero_cst (TREE_TYPE (use_rhs)));
> > +                 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > +                 gsi = gsi_for_stmt (use_nop_stmt);
> > +                 gsi_remove (&gsi, true);
> > +               }
> > +           }
> > +         else
> > +           {
> > +             tree op_mask = mask;
> > +             tree check_mask = op_mask;
> > +             if (TREE_CODE (op_mask) == SSA_NAME)
> > +               {
> > +                 g = SSA_NAME_DEF_STMT (op_mask);
> > +                 if (!is_gimple_assign (g))
> > +                   return;
> > +                 if (gimple_assign_rhs_code (g) == NOP_EXPR)
> > +                   {
> > +                     tree mask_nop_lhs = gimple_assign_lhs (g);
> > +
> > +                     if (TREE_CODE (mask_nop_lhs) == SSA_NAME
> > +                         && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (mask_nop_lhs))
> > +                       return;
> > +
> > +                     tree mask_nop_rhs = gimple_assign_rhs1 (g);
> > +                     if (TYPE_PRECISION (TREE_TYPE (mask_nop_lhs))
> > +                         != TYPE_PRECISION (TREE_TYPE (mask_nop_rhs)))
> > +                       return;
> > +                     op_mask = mask_nop_rhs;
> > +                     check_mask = op_mask;
> > +                     g = SSA_NAME_DEF_STMT (op_mask);
> > +                     if (!is_gimple_assign (g))
> > +                       return;
> > +                   }
> > +
> > +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > +                   {
> > +                     if (gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> > +                       return;
> > +                     check_mask = gimple_assign_rhs1 (g);
> > +                     if (TREE_CODE (check_mask) != SSA_NAME)
> > +                       return;
> > +                     g = SSA_NAME_DEF_STMT (check_mask);
> > +                     if (!is_gimple_assign (g))
> > +                       return;
> > +                   }
> > +
> > +                 if (gimple_assign_rhs_code (g) != LSHIFT_EXPR
> > +                     || !integer_onep (gimple_assign_rhs1 (g)))
> > +                   return;
> > +
> > +                 bit = gimple_assign_rhs2 (g);
> > +               }
> > +
> > +             if (TREE_CODE (check_mask) == INTEGER_CST)
> > +               {
> > +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > +                   check_mask = const_unop (BIT_NOT_EXPR,
> > +                                            TREE_TYPE (check_mask),
> > +                                            check_mask);
> > +                 check_mask = fold_convert (TREE_TYPE (lhs),
> > +                                            check_mask);
> > +                 /* Check if CHECK_MASK is a power of two.  */
> > +                 ibit = tree_log2 (check_mask);
> > +                 if (ibit < 0)
> > +                   return;
> > +               }
> > +
> > +             tree use_nop_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
> > +             tree use_nop_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
> > +             if (!operand_equal_p (use_nop_rhs1, check_mask, 0)
> > +                 && !operand_equal_p (use_nop_rhs2, check_mask, 0))
> > +               return;
> > +
> > +             /* Convert
> > +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> > +                _2 = (int) _1;
> > +                _5 = _2 & mask;
>
> (***)
>
> > +                to
> > +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> > +                _6 = _1 & mask;
> > +                _5 = (int) _6;
> > +                and convert
> > +                _1 = ~mask_7;
> > +                _2 = (unsigned int) _1;
> > +                _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
> > +                _4 = (int) _3;
> > +                _5 = _4 & mask_7;
> > +                to
> > +                _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
> > +                _12 = _3 & mask_7;
> > +                _5 = (int) _12;
> > +              */
>
> I wonder if it's better to maintain to have the matching part of match.pd
I'm trying to rewrite match part in match.pd and find the
canonicalization is ok when mask is constant, but not for variable
since it will be simplified back by
 /* In GIMPLE, getting rid of 2 conversions for one new results
    in smaller IL.  */
 (simplify
  (convert (bitop:cs@2 (nop_convert:s @0) @1))
  (if (GIMPLE
       && TREE_CODE (@1) != INTEGER_CST
       && tree_nop_conversion_p (type, TREE_TYPE (@2))
       && types_match (type, @0))
   (bitop @0 (convert @1)))))

The canonicalization for variabled is like

convert
  _1 = ~mask_7;
  _2 = (unsigned int) _1;
  _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
 _4 = (int) _3;
 _5 = _4 & mask_7;

to
  _1 = ~mask_7;
  _2 = (unsigned int) _1;
  _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
  _4 = (unsigned int) mask_7
  _6 = _3 & _4
  _5 = (int) _6

and be simplified back.

I've also tried another way of simplication like

convert
  _1 = ~mask_7;
  _2 = (unsigned int) _1;
  _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
 _4 = (int) _3;
 _5 = _4 & mask_7;

to
  _1 = (unsigned int)mask_7;
  _2 = ~ _1;
  _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
   _6 = _3 & _1
  _5 = (int)

but it's prevent by below since __atomic_fetch_and_4 is not CONST, but
we need to regenerate it with updated parameter.

  /* We can't and should not emit calls to non-const functions.  */
  if (!(flags_from_decl_or_type (decl) & ECF_CONST))
    return NULL;

>
> there you could have
>
> (match (atomic_fetch_mask @1 @2 @3 @mask)
>  (bit_and (convert (IFN_ATOMIC_BIT_TEST_AND_RESET @2 @mask @3)) @mask))
>
> and here in this code do
>
> extern bool gimple_atomic_fetch_mask (tree t, tree *res_ops, tree (*)(tree));
>
> and call it on the _5 from (***) where the function will return true if it
> matched and it will set res_ops[] with the positional operands @1 @2
> @3 and @mask.
>
> You can add variants and conditions to the same match entry, see match.pd
> for examples and also match-and-simplify.texi
>
> > +             replace_uses_by (use_lhs, lhs);
> > +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> > +             var = make_ssa_name (TREE_TYPE (use_nop_lhs));
> > +             gimple_assign_set_lhs (use_nop_stmt, var);
> > +             gsi = gsi_for_stmt (use_stmt);
> > +             gsi_remove (&gsi, true);
> > +             release_defs (use_stmt);
> > +             gsi_remove (gsip, true);
> > +             var = build1 (NOP_EXPR, TREE_TYPE (use_nop_lhs), var);
>
> instead of building a GENERIC NOP you could use the
>
> gassign *gimple_build_assign (tree, enum tree_code, tree CXX_MEM_STAT_INFO);
>
> overload.
>
> > +             gsi = gsi_for_stmt (use_nop_stmt);
> > +             g = gimple_build_assign (use_nop_lhs, var);
> > +             gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > +             use_stmt = use_nop_stmt;
> > +             mask = op_mask;
> > +           }
> > +       }
> > +
> > +      if (!bit)
> > +       {
> > +         if (ibit < 0)
> > +           gcc_unreachable ();
> > +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> > +       }
> > +    }
> > +
> >    switch (fn)
> >      {
> >      case IFN_ATOMIC_BIT_TEST_AND_SET:
> > @@ -3301,51 +3683,76 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> >    if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
> >      return;
> >
> > -  mask = gimple_call_arg (call, 1);
> >    tree use_lhs = gimple_assign_lhs (use_stmt);
> >    if (!use_lhs)
> >      return;
> >
> > -  if (TREE_CODE (mask) == INTEGER_CST)
> > +  if (!bit)
> >      {
> > -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > -       mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> > -      mask = fold_convert (TREE_TYPE (lhs), mask);
> > -      int ibit = tree_log2 (mask);
> > -      if (ibit < 0)
> > -       return;
> > -      bit = build_int_cst (TREE_TYPE (lhs), ibit);
> > -    }
> > -  else if (TREE_CODE (mask) == SSA_NAME)
> > -    {
> > -      gimple *g = SSA_NAME_DEF_STMT (mask);
> > -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > +      if (TREE_CODE (mask) == INTEGER_CST)
> >         {
> > -         if (!is_gimple_assign (g)
> > -             || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> > +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > +           mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> > +         mask = fold_convert (TREE_TYPE (lhs), mask);
> > +         int ibit = tree_log2 (mask);
> > +         if (ibit < 0)
> > +           return;
> > +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> > +       }
> > +      else if (TREE_CODE (mask) == SSA_NAME)
> > +       {
> > +         gimple *g = SSA_NAME_DEF_STMT (mask);
> > +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > +           {
> > +             if (!is_gimple_assign (g)
> > +                 || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> > +               return;
> > +             mask = gimple_assign_rhs1 (g);
> > +             if (TREE_CODE (mask) != SSA_NAME)
> > +               return;
> > +             g = SSA_NAME_DEF_STMT (mask);
> > +           }
> > +         if (!is_gimple_assign (g))
> >             return;
> > -         mask = gimple_assign_rhs1 (g);
> > -         if (TREE_CODE (mask) != SSA_NAME)
> > +         rhs_code = gimple_assign_rhs_code (g);
> > +         if (rhs_code != LSHIFT_EXPR)
> > +           {
> > +             if (rhs_code != NOP_EXPR)
> > +               return;
> > +
> > +             /* Handle
> > +                _1 = 1 << bit_4(D);
> > +                mask_5 = (unsigned int) _1;
> > +                _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
> > +                _3 = _2 & mask_5;
> > +                */
> > +             tree nop_lhs = gimple_assign_lhs (g);
> > +             tree nop_rhs = gimple_assign_rhs1 (g);
> > +             if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
> > +                 != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
> > +               return;
> > +             g = SSA_NAME_DEF_STMT (nop_rhs);
> > +             if (!is_gimple_assign (g)
> > +                 || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
> > +               return;
> > +           }
> > +         if (!integer_onep (gimple_assign_rhs1 (g)))
> >             return;
> > -         g = SSA_NAME_DEF_STMT (mask);
> > +         bit = gimple_assign_rhs2 (g);
> >         }
> > -      if (!is_gimple_assign (g)
> > -         || gimple_assign_rhs_code (g) != LSHIFT_EXPR
> > -         || !integer_onep (gimple_assign_rhs1 (g)))
> > +      else
> >         return;
> > -      bit = gimple_assign_rhs2 (g);
> > -    }
> > -  else
> > -    return;
> >
> > -  if (gimple_assign_rhs1 (use_stmt) == lhs)
> > -    {
> > -      if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> > +      if (gimple_assign_rhs1 (use_stmt) == lhs)
> > +       {
> > +         if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> > +           return;
> > +       }
> > +      else if (gimple_assign_rhs2 (use_stmt) != lhs
> > +              || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
> > +                                   mask, 0))
> >         return;
> >      }
> > -  else if (gimple_assign_rhs2 (use_stmt) != lhs
> > -          || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
> > -    return;
> >
> >    bool use_bool = true;
> >    bool has_debug_uses = false;
> > @@ -3434,18 +3841,40 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> >          of the specified bit after the atomic operation (makes only sense
> >          for xor, otherwise the bit content is compile time known),
> >          we need to invert the bit.  */
> > +      tree mask_convert = mask;
> > +      gimple *g_convert = nullptr;
> > +      if (!use_bool && TREE_TYPE (lhs) != TREE_TYPE (mask))
> > +       {
> > +         mask_convert = make_ssa_name (TREE_TYPE (lhs));
> > +         tree var = build1 (NOP_EXPR, TREE_TYPE (lhs), mask);
> > +         g_convert = gimple_build_assign (mask_convert, var);
> > +       }
> >        g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
> >                                BIT_XOR_EXPR, new_lhs,
> >                                use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
> > -                                       : mask);
> > +                                       : mask_convert);
> >        new_lhs = gimple_assign_lhs (g);
>
> You could use
>
>         gimple_seq stmts = NULL;
>         mask_convert = gimple_convert (&stmts, TREE_TYPE (lhs), mask);
>         new_lhs = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (lhs), new_lhs,
>                                                use_bool ?
> build_int_cst (TREE_TYPE (lhs), 1) : mask_convert);
>
> >        if (throws)
> >         {
> > -         gsi_insert_on_edge_immediate (e, g);
>
> gsi_insert_seq_on_edge_immediate (e, stmts);
>
> to simplify this.  The conversion will be only generated if necessary.
>
> > +         if (g_convert)
> > +           {
> > +             gsi_insert_on_edge_immediate (e, g_convert);
> > +             gsi = gsi_for_stmt (g_convert);
> > +             gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > +           }
> > +         else
> > +           gsi_insert_on_edge_immediate (e, g);
> >           gsi = gsi_for_stmt (g);
> >         }
> >        else
> > -       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > +       {
> > +         if (g_convert)
> > +           {
> > +             gsi_insert_after (&gsi, g_convert, GSI_NEW_STMT);
> > +             gsi = gsi_for_stmt (g_convert);
> > +           }
> > +         gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > +       }
> >      }
> >    if (use_bool && has_debug_uses)
> >      {
> > --
> > 2.31.1
> >



--
BR,
Hongtao

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4] Improve integer bit test on __atomic_fetch_[or|and]_* returns
  2021-10-21 11:15   ` Hongtao Liu
@ 2021-10-26  8:16     ` Richard Biener
  2021-11-04  1:27       ` [PATCH v5] " liuhongt
  0 siblings, 1 reply; 8+ messages in thread
From: Richard Biener @ 2021-10-26  8:16 UTC (permalink / raw)
  To: Hongtao Liu; +Cc: H.J. Lu, Jakub Jelinek, GCC Patches

On Thu, Oct 21, 2021 at 1:09 PM Hongtao Liu <crazylht@gmail.com> wrote:
>
>  i is
>
> On Wed, Oct 13, 2021 at 8:34 PM Richard Biener via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Sun, Oct 10, 2021 at 3:49 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > Changes in v4:
> > >
> > > 1. Bypass redundant check when inputs have been transformed to the
> > > equivalent canonical form with valid bit operation.
> > >
> > > Changes in v3:
> > >
> > > 1.  Check invalid bit operation.
> > >
> > > commit adedd5c173388ae505470df152b9cb3947339566
> > > Author: Jakub Jelinek <jakub@redhat.com>
> > > Date:   Tue May 3 13:37:25 2016 +0200
> > >
> > >     re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')
> > >
> > > optimized bit test on __atomic_fetch_or_* and __atomic_fetch_and_* returns
> > > with lock bts/btr/btc by turning
> > >
> > >   mask_2 = 1 << cnt_1;
> > >   _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> > >   _5 = _4 & mask_2;
> > >
> > > into
> > >
> > >   _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
> > >   _5 = _4;
> > >
> > > and
> > >
> > >   mask_6 = 1 << bit_5(D);
> > >   _1 = ~mask_6;
> > >   _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
> > >   _3 = _2 & mask_6;
> > >   _4 = _3 != 0;
> > >
> > > into
> > >
> > >   mask_6 = 1 << bit_5(D);
> > >   _1 = ~mask_6;
> > >   _11 = .ATOMIC_BIT_TEST_AND_RESET (v_8(D), bit_5(D), 1, 0);
> > >   _4 = _11 != 0;
> > >
> > > But it failed to optimize many equivalent, but slighly different cases:
> > >
> > > 1.
> > >   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> > >   _4 = (_Bool) _1;
> > > 2.
> > >   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> > >   _4 = (_Bool) _1;
> > > 3.
> > >   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> > >   _7 = ~_1;
> > >   _5 = (_Bool) _7;
> > > 4.
> > >   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> > >   _7 = ~_1;
> > >   _5 = (_Bool) _7;
> > > 5.
> > >   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> > >   _2 = (int) _1;
> > >   _7 = ~_2;
> > >   _5 = (_Bool) _7;
> > > 6.
> > >   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> > >   _2 = (int) _1;
> > >   _7 = ~_2;
> > >   _5 = (_Bool) _7;
> > > 7.
> > >   _1 = _atomic_fetch_or_4 (ptr_6, mask, _3);
> > >   _2 = (int) _1;
> > >   _5 = _2 & mask;
> > > 8.
> > >   _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> > >   _5 = (signed int) _1;
> > >   _4 = _5 < 0;
> > > 9.
> > >   _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> > >   _5 = (signed int) _1;
> > >   _4 = _5 < 0;
> > > 10.
> > >   _1 = 1 << bit_4(D);
> > >   mask_5 = (unsigned int) _1;
> > >   _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
> > >   _3 = _2 & mask_5;
> > > 11.
> > >   mask_7 = 1 << bit_6(D);
> > >   _1 = ~mask_7;
> > >   _2 = (unsigned int) _1;
> > >   _3 = __atomic_fetch_and_4 (v_9(D), _2, 0);
> > >   _4 = (int) _3;
> > >   _5 = _4 & mask_7;
> > >
> > > We make
> > >
> > >   mask_2 = 1 << cnt_1;
> > >   _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> > >   _5 = _4 & mask_2;
> > >
> > > and
> > >
> > >   mask_6 = 1 << bit_5(D);
> > >   _1 = ~mask_6;
> > >   _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
> > >   _3 = _2 & mask_6;
> > >   _4 = _3 != 0;
> > >
> > > the canonical forms for this optimization and transform cases 1-9 to the
> > > equivalent canonical form.  For cases 10 and 11, we simply remove the cast
> > > before __atomic_fetch_or_4/__atomic_fetch_and_4 with
> > >
> > >   _1 = 1 << bit_4(D);
> > >   _2 = __atomic_fetch_or_4 (v_7(D), _1, 0);
> > >   _3 = _2 & _1;
> > >
> > > and
> > >
> > >   mask_7 = 1 << bit_6(D);
> > >   _1 = ~mask_7;
> > >   _3 = __atomic_fetch_and_4 (v_9(D), _1, 0);
> > >   _6 = _3 & mask_7;
> > >   _5 = (int) _6;
> > >
> > > gcc/
> > >
> > >         PR middle-end/102566
> > >         * tree-ssa-ccp.c (convert_atomic_bit_not): New function.
> > >         (optimize_atomic_bit_test_and): Transform equivalent, but slighly
> > >         different cases to their canonical forms.
> > >
> > > gcc/testsuite/
> > >
> > >         PR middle-end/102566
> > >         * g++.target/i386/pr102566-1.C: New test.
> > >         * g++.target/i386/pr102566-2.C: Likewise.
> > >         * g++.target/i386/pr102566-3.C: Likewise.
> > >         * g++.target/i386/pr102566-4.C: Likewise.
> > >         * g++.target/i386/pr102566-5a.C: Likewise.
> > >         * g++.target/i386/pr102566-5b.C: Likewise.
> > >         * g++.target/i386/pr102566-6a.C: Likewise.
> > >         * g++.target/i386/pr102566-6b.C: Likewise.
> > >         * gcc.target/i386/pr102566-1a.c: Likewise.
> > >         * gcc.target/i386/pr102566-1b.c: Likewise.
> > >         * gcc.target/i386/pr102566-2.c: Likewise.
> > >         * gcc.target/i386/pr102566-3a.c: Likewise.
> > >         * gcc.target/i386/pr102566-3b.c: Likewise.
> > >         * gcc.target/i386/pr102566-4.c: Likewise.
> > >         * gcc.target/i386/pr102566-5.c: Likewise.
> > >         * gcc.target/i386/pr102566-6.c: Likewise.
> > >         * gcc.target/i386/pr102566-7.c: Likewise.
> > >         * gcc.target/i386/pr102566-8a.c: Likewise.
> > >         * gcc.target/i386/pr102566-8b.c: Likewise.
> > >         * gcc.target/i386/pr102566-9a.c: Likewise.
> > >         * gcc.target/i386/pr102566-9b.c: Likewise.
> > >         * gcc.target/i386/pr102566-10a.c: Likewise.
> > >         * gcc.target/i386/pr102566-10b.c: Likewise.
> > >         * gcc.target/i386/pr102566-11.c: Likewise.
> > >         * gcc.target/i386/pr102566-12.c: Likewise.
> > > ---
> > >  gcc/testsuite/g++.target/i386/pr102566-1.C   |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-2.C   |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-3.C   |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-4.C   |  29 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-5a.C  |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-5b.C  |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-6a.C  |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-6b.C  |  31 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-10a.c |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-10b.c |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-11.c  |  28 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-12.c  |  28 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-1a.c  | 188 +++++++
> > >  gcc/testsuite/gcc.target/i386/pr102566-1b.c  | 107 ++++
> > >  gcc/testsuite/gcc.target/i386/pr102566-2.c   |  32 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-3a.c  |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-3b.c  |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-4.c   |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-5.c   |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-6.c   |  32 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-7.c   |  30 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-8a.c  |  32 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-8b.c  |  32 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-9a.c  |  32 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-9b.c  |  32 ++
> > >  gcc/tree-ssa-ccp.c                           | 503 +++++++++++++++++--
> > >  26 files changed, 1375 insertions(+), 37 deletions(-)
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-2.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-3.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-4.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5a.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5b.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6a.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6b.C
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10a.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10b.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-11.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-12.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3a.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3b.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-4.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-5.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-6.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-7.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8a.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8b.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9a.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9b.c
> > >
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-1.C b/gcc/testsuite/g++.target/i386/pr102566-1.C
> > > new file mode 100644
> > > index 00000000000..94a66d717cc
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-1.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<int> &i)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<int> &i)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<int> &i)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-2.C b/gcc/testsuite/g++.target/i386/pr102566-2.C
> > > new file mode 100644
> > > index 00000000000..4f2aea961c2
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-2.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-3.C b/gcc/testsuite/g++.target/i386/pr102566-3.C
> > > new file mode 100644
> > > index 00000000000..e88921dd155
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-3.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-4.C b/gcc/testsuite/g++.target/i386/pr102566-4.C
> > > new file mode 100644
> > > index 00000000000..44d1362ac2e
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-4.C
> > > @@ -0,0 +1,29 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +typedef int __attribute__ ((mode (__word__))) int_type;
> > > +
> > > +#define BIT (1 << 0)
> > > +
> > > +bool
> > > +tbit0 (std::atomic<int_type> &i)
> > > +{
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~1;
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<int_type> &i)
> > > +{
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~2;
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<int_type> &i)
> > > +{
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~4;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > > +/* { dg-final { scan-assembler-not "bts" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-5a.C b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> > > new file mode 100644
> > > index 00000000000..f9595bee2ab
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-5b.C b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> > > new file mode 100644
> > > index 00000000000..d917b27a918
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 0)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 30)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 63)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-6a.C b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> > > new file mode 100644
> > > index 00000000000..01d495eda23
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-6b.C b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> > > new file mode 100644
> > > index 00000000000..adc11fcbf2d
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 0)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 30)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 63)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10a.c b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> > > new file mode 100644
> > > index 00000000000..1c1f86a9659
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic int *v, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10b.c b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> > > new file mode 100644
> > > index 00000000000..0bf39824ea6
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile { target { ! ia32 } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic long long int *v, int bit)
> > > +{
> > > +  long long int mask = 1ll << bit;
> > > +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-11.c b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> > > new file mode 100644
> > > index 00000000000..2c8f8c4e59a
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> > > @@ -0,0 +1,28 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +#define MASK 0x1234
> > > +
> > > +bool
> > > +foo1 (_Atomic int *v)
> > > +{
> > > +  return atomic_fetch_or_explicit (v, MASK, memory_order_relaxed) & MASK;
> > > +}
> > > +
> > > +bool
> > > +foo2 (_Atomic unsigned int *v, int mask)
> > > +{
> > > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +bool
> > > +foo3 (_Atomic unsigned int *v, int mask)
> > > +{
> > > +  return !(atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask);
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > > +/* { dg-final { scan-assembler-not "bts" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-12.c b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> > > new file mode 100644
> > > index 00000000000..4603a77612c
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> > > @@ -0,0 +1,28 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +#define MASK 0x1234
> > > +
> > > +bool
> > > +foo1 (_Atomic long *v)
> > > +{
> > > +  return atomic_fetch_and_explicit (v, ~MASK, memory_order_relaxed) & MASK;
> > > +}
> > > +
> > > +bool
> > > +foo2 (_Atomic long *v, long mask)
> > > +{
> > > +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +bool
> > > +foo3 (_Atomic long *v, long mask)
> > > +{
> > > +  return !(atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask);
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > > +/* { dg-final { scan-assembler-not "btr" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1a.c b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> > > new file mode 100644
> > > index 00000000000..a915de354e5
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> > > @@ -0,0 +1,188 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +void bar (void);
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f1 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f2 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
> > > +  int t2 = t1 & mask;
> > > +  return t2 != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) long int
> > > +f3 (long int *a, int bit)
> > > +{
> > > +  long int mask = 1l << bit;
> > > +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f4 (int *a)
> > > +{
> > > +  int mask = 1 << 7;
> > > +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f5 (int *a)
> > > +{
> > > +  int mask = 1 << 13;
> > > +  return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f6 (int *a)
> > > +{
> > > +  int mask = 1 << 0;
> > > +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) void
> > > +f7 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
> > > +    bar ();
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) void
> > > +f8 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
> > > +    bar ();
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f9 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f10 (int *a)
> > > +{
> > > +  int mask = 1 << 7;
> > > +  return (__sync_fetch_and_xor (a, mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f11 (int *a)
> > > +{
> > > +  int mask = 1 << 13;
> > > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f12 (int *a)
> > > +{
> > > +  int mask = 1 << 0;
> > > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f13 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f14 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f15 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f16 (int *a)
> > > +{
> > > +  int mask = 1 << 7;
> > > +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f17 (int *a)
> > > +{
> > > +  int mask = 1 << 13;
> > > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f18 (int *a)
> > > +{
> > > +  int mask = 1 << 0;
> > > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) long int
> > > +f19 (long int *a, int bit)
> > > +{
> > > +  long int mask = 1l << bit;
> > > +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) long int
> > > +f20 (long int *a)
> > > +{
> > > +  long int mask = 1l << 7;
> > > +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f21 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__sync_fetch_and_or (a, mask) & mask);
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) long int
> > > +f22 (long int *a)
> > > +{
> > > +  long int mask = 1l << 7;
> > > +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) long int
> > > +f23 (long int *a)
> > > +{
> > > +  long int mask = 1l << 7;
> > > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) short int
> > > +f24 (short int *a)
> > > +{
> > > +  short int mask = 1 << 7;
> > > +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) short int
> > > +f25 (short int *a)
> > > +{
> > > +  short int mask = 1 << 7;
> > > +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1b.c b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> > > new file mode 100644
> > > index 00000000000..c4dab8135c7
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> > > @@ -0,0 +1,107 @@
> > > +/* { dg-do run } */
> > > +/* { dg-options "-O2 -g" } */
> > > +
> > > +int cnt;
> > > +
> > > +__attribute__((noinline, noclone)) void
> > > +bar (void)
> > > +{
> > > +  cnt++;
> > > +}
> > > +
> > > +#include "pr102566-1a.c"
> > > +
> > > +int a;
> > > +long int b;
> > > +unsigned long int c;
> > > +unsigned short int d;
> > > +
> > > +int
> > > +main ()
> > > +{
> > > +  __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
> > > +  if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
> > > +      || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
> > > +    __builtin_abort ();
> > > +  if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
> > > +      || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
> > > +  if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
> > > +      || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
> > > +  if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
> > > +      || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
> > > +    __builtin_abort ();
> > > +  if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> > > +      || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
> > > +    __builtin_abort ();
> > > +  if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
> > > +      || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if (cnt != 0
> > > +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > > +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > > +      || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> > > +      || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > > +      || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> > > +      || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> > > +      || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > > +      || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> > > +    __builtin_abort ();
> > > +  if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> > > +      || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> > > +    __builtin_abort ();
> > > +  if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> > > +      || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
> > > +  if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > > +      || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> > > +    __builtin_abort ();
> > > +  if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> > > +      || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> > > +    __builtin_abort ();
> > > +  if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> > > +      || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> > > +    __builtin_abort ();
> > > +  if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> > > +      || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> > > +    __builtin_abort ();
> > > +  if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> > > +      || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
> > > +  if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
> > > +      || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
> > > +  if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> > > +      || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> > > +    __builtin_abort ();
> > > +  if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> > > +      || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> > > +    __builtin_abort ();
> > > +  if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
> > > +      || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
> > > +  if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> > > +      || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> > > +      || cnt != 2)
> > > +    __builtin_abort ();
> > > +  return 0;
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-2.c b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> > > new file mode 100644
> > > index 00000000000..00a7c349f2a
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3a.c b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> > > new file mode 100644
> > > index 00000000000..8bf1cd6e1bd
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic int *v, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3b.c b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> > > new file mode 100644
> > > index 00000000000..d155ed367a1
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile { target { ! ia32 } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic long long int *v, int bit)
> > > +{
> > > +  long long int mask = 1ll << bit;
> > > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsq" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-4.c b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> > > new file mode 100644
> > > index 00000000000..2668ccf827c
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic int *v, int bit)
> > > +{
> > > +  unsigned int mask = 1 << bit;
> > > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-5.c b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> > > new file mode 100644
> > > index 00000000000..8bf1cd6e1bd
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic int *v, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-6.c b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> > > new file mode 100644
> > > index 00000000000..3dfe55ac683
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-7.c b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> > > new file mode 100644
> > > index 00000000000..6bc0ae0f320
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> > > @@ -0,0 +1,30 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +typedef int __attribute__ ((mode (__word__))) int_type;
> > > +
> > > +#define BIT (1 << 0)
> > > +
> > > +bool
> > > +foo0 (_Atomic int_type *v)
> > > +{
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~1;
> > > +}
> > > +
> > > +bool
> > > +foo1 (_Atomic int_type *v)
> > > +{
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~2;
> > > +}
> > > +
> > > +bool
> > > +foo2 (_Atomic int_type *v)
> > > +{
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~3;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > > +/* { dg-final { scan-assembler-not "bts" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8a.c b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> > > new file mode 100644
> > > index 00000000000..168e3db78c9
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8b.c b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> > > new file mode 100644
> > > index 00000000000..392da3098e0
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile { target { ! ia32 } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 0)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 62)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 63)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9a.c b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> > > new file mode 100644
> > > index 00000000000..3fa2a3ef043
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9b.c b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> > > new file mode 100644
> > > index 00000000000..38ddbdc630f
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile { target { ! ia32 } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 0)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 62)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 63)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
> > > index 70ce6a4d5b8..bb70b87aa5e 100644
> > > --- a/gcc/tree-ssa-ccp.c
> > > +++ b/gcc/tree-ssa-ccp.c
> > > @@ -3243,6 +3243,81 @@ optimize_unreachable (gimple_stmt_iterator i)
> > >    return ret;
> > >  }
> > >
> > > +/* Convert
> > > +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > > +   _7 = ~_1;
> > > +   _5 = (_Bool) _7;
> > > +   to
> > > +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > > +   _8 = _1 & 1;
> > > +   _5 = _8 == 0;
> > > +   and convert
> > > +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > > +   _7 = ~_1;
> > > +   _4 = (_Bool) _7;
> > > +   to
> > > +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > > +   _8 = _1 & 1;
> > > +   _4 = (_Bool) _8;
> > > +
> > > +   USE_STMT is the gimplt statement which uses the return value of
> > > +   __atomic_fetch_or_*.  LHS is the return value of __atomic_fetch_or_*.
> > > +   MASK is the mask passed to __atomic_fetch_or_*.
> > > + */
> > > +
> > > +static gimple *
> > > +convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
> > > +                       tree lhs, tree mask)
> > > +{
> > > +  tree and_mask;
> > > +  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +    {
> > > +      /* MASK must be ~1.  */
> > > +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> > > +                                          ~HOST_WIDE_INT_1), mask, 0))
> > > +       return nullptr;
> > > +      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> > > +    }
> > > +  else
> > > +    {
> > > +      /* MASK must be 1.  */
> > > +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs), 1), mask, 0))
> > > +       return nullptr;
> > > +      and_mask = mask;
> > > +    }
> > > +
> > > +  tree use_lhs = gimple_assign_lhs (use_stmt);
> > > +
> > > +  use_operand_p use_p;
> > > +  gimple *use_not_stmt;
> > > +
> > > +  if (!single_imm_use (use_lhs, &use_p, &use_not_stmt)
> > > +      || !is_gimple_assign (use_not_stmt))
> > > +    return nullptr;
> > > +
> > > +  if (gimple_assign_rhs_code (use_not_stmt) != NOP_EXPR)
> > > +    return nullptr;
> > > +
> > > +  tree use_not_lhs = gimple_assign_lhs (use_not_stmt);
> > > +  if (TREE_CODE (TREE_TYPE (use_not_lhs)) != BOOLEAN_TYPE)
> > > +    return nullptr;
> > > +
> > > +  gimple_stmt_iterator gsi;
> > > +  gsi = gsi_for_stmt (use_stmt);
> > > +  gsi_remove (&gsi, true);
> > > +  tree var = make_ssa_name (TREE_TYPE (lhs));
> > > +  use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
> > > +  gsi = gsi_for_stmt (use_not_stmt);
> > > +  gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
> > > +  lhs = gimple_assign_lhs (use_not_stmt);
> > > +  gimple *g = gimple_build_assign (lhs, EQ_EXPR, var,
> > > +                                  build_zero_cst (TREE_TYPE (mask)));
> > > +  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +  gsi = gsi_for_stmt (use_not_stmt);
> > > +  gsi_remove (&gsi, true);
> > > +  return use_stmt;
> > > +}
> > > +
> > >  /* Optimize
> > >       mask_2 = 1 << cnt_1;
> > >       _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> > > @@ -3269,7 +3344,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> > >    tree lhs = gimple_call_lhs (call);
> > >    use_operand_p use_p;
> > >    gimple *use_stmt;
> > > -  tree mask, bit;
> > > +  tree mask;
> > >    optab optab;
> > >
> > >    if (!flag_inline_atomics
> > > @@ -3279,10 +3354,317 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> > >        || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
> > >        || !single_imm_use (lhs, &use_p, &use_stmt)
> > >        || !is_gimple_assign (use_stmt)
> > > -      || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
> > >        || !gimple_vdef (call))
> > >      return;
> > >
> > > +  tree bit = nullptr;
> > > +
> > > +  mask = gimple_call_arg (call, 1);
> > > +  tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
> > > +  if (rhs_code != BIT_AND_EXPR)
> > > +    {
> > > +      if (rhs_code != NOP_EXPR && rhs_code != BIT_NOT_EXPR)
> > > +       return;
> > > +
> > > +      tree use_lhs = gimple_assign_lhs (use_stmt);
> > > +      if (TREE_CODE (use_lhs) == SSA_NAME
> > > +         && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
> > > +       return;
> > > +
> > > +      tree use_rhs = gimple_assign_rhs1 (use_stmt);
> > > +      if (lhs != use_rhs)
> > > +       return;
> > > +
> > > +      gimple *g;
> > > +      gimple_stmt_iterator gsi;
> > > +      tree var;
> > > +      int ibit = -1;
> > > +
> > > +      if (rhs_code == BIT_NOT_EXPR)
> > > +       {
> > > +         g = convert_atomic_bit_not (fn, use_stmt, lhs, mask);
> > > +         if (!g)
> > > +           return;
> > > +         use_stmt = g;
> > > +         ibit = 0;
> > > +       }
> > > +      else if (TREE_CODE (TREE_TYPE (use_lhs)) == BOOLEAN_TYPE)
> > > +       {
> > > +         tree and_mask;
> > > +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +           {
> > > +             /* MASK must be ~1.  */
> > > +             if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> > > +                                                  ~HOST_WIDE_INT_1),
> > > +                                   mask, 0))
> > > +               return;
> > > +
> > > +             /* Convert
> > > +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > > +                _4 = (_Bool) _1;
> > > +                to
> > > +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > > +                _5 = _1 & 1;
> > > +                _4 = (_Bool) _5;
> > > +              */
> > > +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> > > +           }
> > > +         else
> > > +           {
> > > +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> > > +             if (!operand_equal_p (and_mask, mask, 0))
> > > +               return;
> > > +
> > > +             /* Convert
> > > +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > > +                _4 = (_Bool) _1;
> > > +                to
> > > +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > > +                _5 = _1 & 1;
> > > +                _4 = (_Bool) _5;
> > > +              */
> > > +           }
> > > +         var = make_ssa_name (TREE_TYPE (use_rhs));
> > > +         replace_uses_by (use_rhs, var);
> > > +         g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> > > +                                  and_mask);
> > > +         gsi = gsi_for_stmt (use_stmt);
> > > +         gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> > > +         use_stmt = g;
> > > +         ibit = 0;
> > > +       }
> > > +      else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
> > > +              == TYPE_PRECISION (TREE_TYPE (use_rhs)))
> > > +       {
> > > +         gimple *use_nop_stmt;
> > > +         if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
> > > +             || !is_gimple_assign (use_nop_stmt))
> > > +           return;
> > > +         rhs_code = gimple_assign_rhs_code (use_nop_stmt);
> > > +         if (rhs_code != BIT_AND_EXPR)
> > > +           {
> > > +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> > > +             if (TREE_CODE (use_nop_lhs) == SSA_NAME
> > > +                 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
> > > +               return;
> > > +             if (rhs_code == BIT_NOT_EXPR)
> > > +               {
> > > +                 g = convert_atomic_bit_not (fn, use_nop_stmt, lhs,
> > > +                                             mask);
> > > +                 if (!g)
> > > +                   return;
> > > +                 /* Convert
> > > +                    _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> > > +                    _2 = (int) _1;
> > > +                    _7 = ~_2;
> > > +                    _5 = (_Bool) _7;
> > > +                    to
> > > +                    _1 = __atomic_fetch_or_4 (ptr_6, ~1, _3);
> > > +                    _8 = _1 & 1;
> > > +                    _5 = _8 == 0;
> > > +                    and convert
> > > +                    _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> > > +                    _2 = (int) _1;
> > > +                    _7 = ~_2;
> > > +                    _5 = (_Bool) _7;
> > > +                    to
> > > +                    _1 = __atomic_fetch_and_4 (ptr_6, 1, _3);
> > > +                    _8 = _1 & 1;
> > > +                    _5 = _8 == 0;
> > > +                  */
> > > +                 gsi = gsi_for_stmt (use_stmt);
> > > +                 gsi_remove (&gsi, true);
> > > +                 use_stmt = g;
> > > +                 ibit = 0;
> > > +               }
> > > +             else
> > > +               {
> > > +                 if (TREE_CODE (TREE_TYPE (use_nop_lhs)) != BOOLEAN_TYPE)
> > > +                   return;
> > > +                 if (rhs_code != GE_EXPR && rhs_code != LT_EXPR)
> > > +                   return;
> > > +                 tree cmp_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
> > > +                 if (use_lhs != cmp_rhs1)
> > > +                   return;
> > > +                 tree cmp_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
> > > +                 if (!integer_zerop (cmp_rhs2))
> > > +                   return;
> > > +
> > > +                 tree and_mask;
> > > +
> > > +                 unsigned HOST_WIDE_INT bytes
> > > +                   = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (use_rhs)));
> > > +                 ibit = bytes * BITS_PER_UNIT - 1;
> > > +                 unsigned HOST_WIDE_INT highest
> > > +                   = HOST_WIDE_INT_1U << ibit;
> > > +
> > > +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +                   {
> > > +                     /* Get the signed maximum of the USE_RHS type.  */
> > > +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> > > +                                               highest - 1);
> > > +                     if (!operand_equal_p (and_mask, mask, 0))
> > > +                       return;
> > > +
> > > +                     /* Convert
> > > +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> > > +                        _5 = (signed int) _1;
> > > +                        _4 = _5 < 0 or _5 >= 0;
> > > +                        to
> > > +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> > > +                        _6 = _1 & 0x80000000;
> > > +                        _4 = _6 != 0 or _6 == 0;
> > > +                      */
> > > +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> > > +                                               highest);
> > > +                   }
> > > +                 else
> > > +                   {
> > > +                     /* Get the signed minimum of the USE_RHS type.  */
> > > +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> > > +                                               highest);
> > > +                     if (!operand_equal_p (and_mask, mask, 0))
> > > +                       return;
> > > +
> > > +                     /* Convert
> > > +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> > > +                        _5 = (signed int) _1;
> > > +                        _4 = _5 < 0 or _5 >= 0;
> > > +                        to
> > > +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> > > +                        _6 = _1 & 0x80000000;
> > > +                        _4 = _6 != 0 or _6 == 0;
> > > +                      */
> > > +                   }
> > > +                 var = make_ssa_name (TREE_TYPE (use_rhs));
> > > +                 gsi = gsi_for_stmt (use_stmt);
> > > +                 gsi_remove (&gsi, true);
> > > +                 g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> > > +                                          and_mask);
> > > +                 gsi = gsi_for_stmt (use_nop_stmt);
> > > +                 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> > > +                 use_stmt = g;
> > > +                 g = gimple_build_assign (use_nop_lhs,
> > > +                                          (rhs_code == GE_EXPR
> > > +                                           ? EQ_EXPR : NE_EXPR),
> > > +                                          var,
> > > +                                          build_zero_cst (TREE_TYPE (use_rhs)));
> > > +                 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +                 gsi = gsi_for_stmt (use_nop_stmt);
> > > +                 gsi_remove (&gsi, true);
> > > +               }
> > > +           }
> > > +         else
> > > +           {
> > > +             tree op_mask = mask;
> > > +             tree check_mask = op_mask;
> > > +             if (TREE_CODE (op_mask) == SSA_NAME)
> > > +               {
> > > +                 g = SSA_NAME_DEF_STMT (op_mask);
> > > +                 if (!is_gimple_assign (g))
> > > +                   return;
> > > +                 if (gimple_assign_rhs_code (g) == NOP_EXPR)
> > > +                   {
> > > +                     tree mask_nop_lhs = gimple_assign_lhs (g);
> > > +
> > > +                     if (TREE_CODE (mask_nop_lhs) == SSA_NAME
> > > +                         && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (mask_nop_lhs))
> > > +                       return;
> > > +
> > > +                     tree mask_nop_rhs = gimple_assign_rhs1 (g);
> > > +                     if (TYPE_PRECISION (TREE_TYPE (mask_nop_lhs))
> > > +                         != TYPE_PRECISION (TREE_TYPE (mask_nop_rhs)))
> > > +                       return;
> > > +                     op_mask = mask_nop_rhs;
> > > +                     check_mask = op_mask;
> > > +                     g = SSA_NAME_DEF_STMT (op_mask);
> > > +                     if (!is_gimple_assign (g))
> > > +                       return;
> > > +                   }
> > > +
> > > +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +                   {
> > > +                     if (gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> > > +                       return;
> > > +                     check_mask = gimple_assign_rhs1 (g);
> > > +                     if (TREE_CODE (check_mask) != SSA_NAME)
> > > +                       return;
> > > +                     g = SSA_NAME_DEF_STMT (check_mask);
> > > +                     if (!is_gimple_assign (g))
> > > +                       return;
> > > +                   }
> > > +
> > > +                 if (gimple_assign_rhs_code (g) != LSHIFT_EXPR
> > > +                     || !integer_onep (gimple_assign_rhs1 (g)))
> > > +                   return;
> > > +
> > > +                 bit = gimple_assign_rhs2 (g);
> > > +               }
> > > +
> > > +             if (TREE_CODE (check_mask) == INTEGER_CST)
> > > +               {
> > > +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +                   check_mask = const_unop (BIT_NOT_EXPR,
> > > +                                            TREE_TYPE (check_mask),
> > > +                                            check_mask);
> > > +                 check_mask = fold_convert (TREE_TYPE (lhs),
> > > +                                            check_mask);
> > > +                 /* Check if CHECK_MASK is a power of two.  */
> > > +                 ibit = tree_log2 (check_mask);
> > > +                 if (ibit < 0)
> > > +                   return;
> > > +               }
> > > +
> > > +             tree use_nop_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
> > > +             tree use_nop_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
> > > +             if (!operand_equal_p (use_nop_rhs1, check_mask, 0)
> > > +                 && !operand_equal_p (use_nop_rhs2, check_mask, 0))
> > > +               return;
> > > +
> > > +             /* Convert
> > > +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> > > +                _2 = (int) _1;
> > > +                _5 = _2 & mask;
> >
> > (***)
> >
> > > +                to
> > > +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> > > +                _6 = _1 & mask;
> > > +                _5 = (int) _6;
> > > +                and convert
> > > +                _1 = ~mask_7;
> > > +                _2 = (unsigned int) _1;
> > > +                _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
> > > +                _4 = (int) _3;
> > > +                _5 = _4 & mask_7;
> > > +                to
> > > +                _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
> > > +                _12 = _3 & mask_7;
> > > +                _5 = (int) _12;
> > > +              */
> >
> > I wonder if it's better to maintain to have the matching part of match.pd
> I'm trying to rewrite match part in match.pd and find the
> canonicalization is ok when mask is constant, but not for variable
> since it will be simplified back by

Note I didn't suggest to use (simplify (....)) but instead use

(match (...))

you can look at the ctz_table_index example and how it is used from
tree-ssa-forwprop.c as gimple_ctz_table_index ().  With such way you
can replace the boiler-plates for matching expressions.  You can
match multiple related forms (when the "leafs" have the same structure)
by multiple (match instances with the same name, see for example
'nop_convert'.

>  /* In GIMPLE, getting rid of 2 conversions for one new results
>     in smaller IL.  */
>  (simplify
>   (convert (bitop:cs@2 (nop_convert:s @0) @1))
>   (if (GIMPLE
>        && TREE_CODE (@1) != INTEGER_CST
>        && tree_nop_conversion_p (type, TREE_TYPE (@2))
>        && types_match (type, @0))
>    (bitop @0 (convert @1)))))
>
> The canonicalization for variabled is like
>
> convert
>   _1 = ~mask_7;
>   _2 = (unsigned int) _1;
>   _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
>  _4 = (int) _3;
>  _5 = _4 & mask_7;
>
> to
>   _1 = ~mask_7;
>   _2 = (unsigned int) _1;
>   _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
>   _4 = (unsigned int) mask_7
>   _6 = _3 & _4
>   _5 = (int) _6
>
> and be simplified back.
>
> I've also tried another way of simplication like
>
> convert
>   _1 = ~mask_7;
>   _2 = (unsigned int) _1;
>   _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
>  _4 = (int) _3;
>  _5 = _4 & mask_7;
>
> to
>   _1 = (unsigned int)mask_7;
>   _2 = ~ _1;
>   _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
>    _6 = _3 & _1
>   _5 = (int)
>
> but it's prevent by below since __atomic_fetch_and_4 is not CONST, but
> we need to regenerate it with updated parameter.
>
>   /* We can't and should not emit calls to non-const functions.  */
>   if (!(flags_from_decl_or_type (decl) & ECF_CONST))
>     return NULL;
>
> >
> > there you could have
> >
> > (match (atomic_fetch_mask @1 @2 @3 @mask)
> >  (bit_and (convert (IFN_ATOMIC_BIT_TEST_AND_RESET @2 @mask @3)) @mask))
> >
> > and here in this code do
> >
> > extern bool gimple_atomic_fetch_mask (tree t, tree *res_ops, tree (*)(tree));
> >
> > and call it on the _5 from (***) where the function will return true if it
> > matched and it will set res_ops[] with the positional operands @1 @2
> > @3 and @mask.
> >
> > You can add variants and conditions to the same match entry, see match.pd
> > for examples and also match-and-simplify.texi
> >
> > > +             replace_uses_by (use_lhs, lhs);
> > > +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> > > +             var = make_ssa_name (TREE_TYPE (use_nop_lhs));
> > > +             gimple_assign_set_lhs (use_nop_stmt, var);
> > > +             gsi = gsi_for_stmt (use_stmt);
> > > +             gsi_remove (&gsi, true);
> > > +             release_defs (use_stmt);
> > > +             gsi_remove (gsip, true);
> > > +             var = build1 (NOP_EXPR, TREE_TYPE (use_nop_lhs), var);
> >
> > instead of building a GENERIC NOP you could use the
> >
> > gassign *gimple_build_assign (tree, enum tree_code, tree CXX_MEM_STAT_INFO);
> >
> > overload.
> >
> > > +             gsi = gsi_for_stmt (use_nop_stmt);
> > > +             g = gimple_build_assign (use_nop_lhs, var);
> > > +             gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +             use_stmt = use_nop_stmt;
> > > +             mask = op_mask;
> > > +           }
> > > +       }
> > > +
> > > +      if (!bit)
> > > +       {
> > > +         if (ibit < 0)
> > > +           gcc_unreachable ();
> > > +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> > > +       }
> > > +    }
> > > +
> > >    switch (fn)
> > >      {
> > >      case IFN_ATOMIC_BIT_TEST_AND_SET:
> > > @@ -3301,51 +3683,76 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> > >    if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
> > >      return;
> > >
> > > -  mask = gimple_call_arg (call, 1);
> > >    tree use_lhs = gimple_assign_lhs (use_stmt);
> > >    if (!use_lhs)
> > >      return;
> > >
> > > -  if (TREE_CODE (mask) == INTEGER_CST)
> > > +  if (!bit)
> > >      {
> > > -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > -       mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> > > -      mask = fold_convert (TREE_TYPE (lhs), mask);
> > > -      int ibit = tree_log2 (mask);
> > > -      if (ibit < 0)
> > > -       return;
> > > -      bit = build_int_cst (TREE_TYPE (lhs), ibit);
> > > -    }
> > > -  else if (TREE_CODE (mask) == SSA_NAME)
> > > -    {
> > > -      gimple *g = SSA_NAME_DEF_STMT (mask);
> > > -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +      if (TREE_CODE (mask) == INTEGER_CST)
> > >         {
> > > -         if (!is_gimple_assign (g)
> > > -             || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> > > +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +           mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> > > +         mask = fold_convert (TREE_TYPE (lhs), mask);
> > > +         int ibit = tree_log2 (mask);
> > > +         if (ibit < 0)
> > > +           return;
> > > +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> > > +       }
> > > +      else if (TREE_CODE (mask) == SSA_NAME)
> > > +       {
> > > +         gimple *g = SSA_NAME_DEF_STMT (mask);
> > > +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +           {
> > > +             if (!is_gimple_assign (g)
> > > +                 || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> > > +               return;
> > > +             mask = gimple_assign_rhs1 (g);
> > > +             if (TREE_CODE (mask) != SSA_NAME)
> > > +               return;
> > > +             g = SSA_NAME_DEF_STMT (mask);
> > > +           }
> > > +         if (!is_gimple_assign (g))
> > >             return;
> > > -         mask = gimple_assign_rhs1 (g);
> > > -         if (TREE_CODE (mask) != SSA_NAME)
> > > +         rhs_code = gimple_assign_rhs_code (g);
> > > +         if (rhs_code != LSHIFT_EXPR)
> > > +           {
> > > +             if (rhs_code != NOP_EXPR)
> > > +               return;
> > > +
> > > +             /* Handle
> > > +                _1 = 1 << bit_4(D);
> > > +                mask_5 = (unsigned int) _1;
> > > +                _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
> > > +                _3 = _2 & mask_5;
> > > +                */
> > > +             tree nop_lhs = gimple_assign_lhs (g);
> > > +             tree nop_rhs = gimple_assign_rhs1 (g);
> > > +             if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
> > > +                 != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
> > > +               return;
> > > +             g = SSA_NAME_DEF_STMT (nop_rhs);
> > > +             if (!is_gimple_assign (g)
> > > +                 || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
> > > +               return;
> > > +           }
> > > +         if (!integer_onep (gimple_assign_rhs1 (g)))
> > >             return;
> > > -         g = SSA_NAME_DEF_STMT (mask);
> > > +         bit = gimple_assign_rhs2 (g);
> > >         }
> > > -      if (!is_gimple_assign (g)
> > > -         || gimple_assign_rhs_code (g) != LSHIFT_EXPR
> > > -         || !integer_onep (gimple_assign_rhs1 (g)))
> > > +      else
> > >         return;
> > > -      bit = gimple_assign_rhs2 (g);
> > > -    }
> > > -  else
> > > -    return;
> > >
> > > -  if (gimple_assign_rhs1 (use_stmt) == lhs)
> > > -    {
> > > -      if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> > > +      if (gimple_assign_rhs1 (use_stmt) == lhs)
> > > +       {
> > > +         if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> > > +           return;
> > > +       }
> > > +      else if (gimple_assign_rhs2 (use_stmt) != lhs
> > > +              || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
> > > +                                   mask, 0))
> > >         return;
> > >      }
> > > -  else if (gimple_assign_rhs2 (use_stmt) != lhs
> > > -          || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
> > > -    return;
> > >
> > >    bool use_bool = true;
> > >    bool has_debug_uses = false;
> > > @@ -3434,18 +3841,40 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> > >          of the specified bit after the atomic operation (makes only sense
> > >          for xor, otherwise the bit content is compile time known),
> > >          we need to invert the bit.  */
> > > +      tree mask_convert = mask;
> > > +      gimple *g_convert = nullptr;
> > > +      if (!use_bool && TREE_TYPE (lhs) != TREE_TYPE (mask))
> > > +       {
> > > +         mask_convert = make_ssa_name (TREE_TYPE (lhs));
> > > +         tree var = build1 (NOP_EXPR, TREE_TYPE (lhs), mask);
> > > +         g_convert = gimple_build_assign (mask_convert, var);
> > > +       }
> > >        g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
> > >                                BIT_XOR_EXPR, new_lhs,
> > >                                use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
> > > -                                       : mask);
> > > +                                       : mask_convert);
> > >        new_lhs = gimple_assign_lhs (g);
> >
> > You could use
> >
> >         gimple_seq stmts = NULL;
> >         mask_convert = gimple_convert (&stmts, TREE_TYPE (lhs), mask);
> >         new_lhs = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (lhs), new_lhs,
> >                                                use_bool ?
> > build_int_cst (TREE_TYPE (lhs), 1) : mask_convert);
> >
> > >        if (throws)
> > >         {
> > > -         gsi_insert_on_edge_immediate (e, g);
> >
> > gsi_insert_seq_on_edge_immediate (e, stmts);
> >
> > to simplify this.  The conversion will be only generated if necessary.
> >
> > > +         if (g_convert)
> > > +           {
> > > +             gsi_insert_on_edge_immediate (e, g_convert);
> > > +             gsi = gsi_for_stmt (g_convert);
> > > +             gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +           }
> > > +         else
> > > +           gsi_insert_on_edge_immediate (e, g);
> > >           gsi = gsi_for_stmt (g);
> > >         }
> > >        else
> > > -       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +       {
> > > +         if (g_convert)
> > > +           {
> > > +             gsi_insert_after (&gsi, g_convert, GSI_NEW_STMT);
> > > +             gsi = gsi_for_stmt (g_convert);
> > > +           }
> > > +         gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +       }
> > >      }
> > >    if (use_bool && has_debug_uses)
> > >      {
> > > --
> > > 2.31.1
> > >
>
>
>
> --
> BR,
> Hongtao

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v5] Improve integer bit test on __atomic_fetch_[or|and]_* returns
  2021-10-26  8:16     ` Richard Biener
@ 2021-11-04  1:27       ` liuhongt
  2021-11-09 12:48         ` Richard Biener
  0 siblings, 1 reply; 8+ messages in thread
From: liuhongt @ 2021-11-04  1:27 UTC (permalink / raw)
  To: gcc-patches

Sorry for the slow reply:
Here is update according to comments
1. Define new match function in match.pd.
2. Adjust code for below
   >> +             gsi_remove (gsip, true);
   >> +             var = build1 (NOP_EXPR, TREE_TYPE (use_nop_lhs), var);
   >
   >instead of building a GENERIC NOP you could use the
   >
   >gassign *gimple_build_assign (tree, enum tree_code, tree CXX_MEM_STAT_INFO);
   >
   >overload.
   >You could use
   >
   >        gimple_seq stmts = NULL;
   >        mask_convert = gimple_convert (&stmts, TREE_TYPE (lhs), mask);
   >        new_lhs = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (lhs), new_lhs,
   >                                               use_bool ?
   >build_int_cst (TREE_TYPE (lhs), 1) : mask_convert);
   >
   >>        if (throws)
   >>         {
   >> -         gsi_insert_on_edge_immediate (e, g);
   >
   >gsi_insert_seq_on_edge_immediate (e, stmts);
   >
   >to simplify this.  The conversion will be only generated if necessary.

Bootstrapped and regtest on x86-64-pc-linux-gnu{-m32,}
Ok for trunk?

Improve integer bit test on __atomic_fetch_[or|and]_* returns

commit adedd5c173388ae505470df152b9cb3947339566
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Tue May 3 13:37:25 2016 +0200

    re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')

optimized bit test on __atomic_fetch_or_* and __atomic_fetch_and_* returns
with lock bts/btr/btc by turning

  mask_2 = 1 << cnt_1;
  _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
  _5 = _4 & mask_2;

into

  _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
  _5 = _4;

and

  mask_6 = 1 << bit_5(D);
  _1 = ~mask_6;
  _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
  _3 = _2 & mask_6;
  _4 = _3 != 0;

into

  mask_6 = 1 << bit_5(D);
  _1 = ~mask_6;
  _11 = .ATOMIC_BIT_TEST_AND_RESET (v_8(D), bit_5(D), 1, 0);
  _4 = _11 != 0;

But it failed to optimize many equivalent, but slighly different cases:

1.
  _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
  _4 = (_Bool) _1;
2.
  _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
  _4 = (_Bool) _1;
3.
  _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
  _7 = ~_1;
  _5 = (_Bool) _7;
4.
  _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
  _7 = ~_1;
  _5 = (_Bool) _7;
5.
  _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
  _2 = (int) _1;
  _7 = ~_2;
  _5 = (_Bool) _7;
6.
  _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
  _2 = (int) _1;
  _7 = ~_2;
  _5 = (_Bool) _7;
7.
  _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
  _5 = (signed int) _1;
  _4 = _5 < 0;
8.
  _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
  _5 = (signed int) _1;
  _4 = _5 < 0;
9.
  _1 = 1 << bit_4(D);
  mask_5 = (unsigned int) _1;
  _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
  _3 = _2 & mask_5;
10.
  mask_7 = 1 << bit_6(D);
  _1 = ~mask_7;
  _2 = (unsigned int) _1;
  _3 = __atomic_fetch_and_4 (v_9(D), _2, 0);
  _4 = (int) _3;
  _5 = _4 & mask_7;

We make

  mask_2 = 1 << cnt_1;
  _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
  _5 = _4 & mask_2;

and

  mask_6 = 1 << bit_5(D);
  _1 = ~mask_6;
  _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
  _3 = _2 & mask_6;
  _4 = _3 != 0;

the canonical forms for this optimization and transform cases 1-9 to the
equivalent canonical form.  For cases 10 and 11, we simply remove the cast
before __atomic_fetch_or_4/__atomic_fetch_and_4 with

  _1 = 1 << bit_4(D);
  _2 = __atomic_fetch_or_4 (v_7(D), _1, 0);
  _3 = _2 & _1;

and

  mask_7 = 1 << bit_6(D);
  _1 = ~mask_7;
  _3 = __atomic_fetch_and_4 (v_9(D), _1, 0);
  _6 = _3 & mask_7;
  _5 = (int) _6;

2021-11-04  H.J. Lu  <hongjiu.lu@intel.com>
	    Hongtao Liu  <hongtao.liu@intel.com>
gcc/

	PR middle-end/102566
	* match.pd (nop_atomic_bit_test_and_p): New match.
	* tree-ssa-ccp.c (convert_atomic_bit_not): New function.
	(gimple_nop_atomic_bit_test_and_p): New prototype.
	(optimize_atomic_bit_test_and): Transform equivalent, but slighly
	different cases to their canonical forms.

gcc/testsuite/

	PR middle-end/102566
	* g++.target/i386/pr102566-1.C: New test.
	* g++.target/i386/pr102566-2.C: Likewise.
	* g++.target/i386/pr102566-3.C: Likewise.
	* g++.target/i386/pr102566-4.C: Likewise.
	* g++.target/i386/pr102566-5a.C: Likewise.
	* g++.target/i386/pr102566-5b.C: Likewise.
	* g++.target/i386/pr102566-6a.C: Likewise.
	* g++.target/i386/pr102566-6b.C: Likewise.
	* gcc.target/i386/pr102566-1a.c: Likewise.
	* gcc.target/i386/pr102566-1b.c: Likewise.
	* gcc.target/i386/pr102566-2.c: Likewise.
	* gcc.target/i386/pr102566-3a.c: Likewise.
	* gcc.target/i386/pr102566-3b.c: Likewise.
	* gcc.target/i386/pr102566-4.c: Likewise.
	* gcc.target/i386/pr102566-5.c: Likewise.
	* gcc.target/i386/pr102566-6.c: Likewise.
	* gcc.target/i386/pr102566-7.c: Likewise.
	* gcc.target/i386/pr102566-8a.c: Likewise.
	* gcc.target/i386/pr102566-8b.c: Likewise.
	* gcc.target/i386/pr102566-9a.c: Likewise.
	* gcc.target/i386/pr102566-9b.c: Likewise.
	* gcc.target/i386/pr102566-10a.c: Likewise.
	* gcc.target/i386/pr102566-10b.c: Likewise.
	* gcc.target/i386/pr102566-11.c: Likewise.
	* gcc.target/i386/pr102566-12.c: Likewise.
	* gcc.target/i386/pr102566-13.c: New test.
	* gcc.target/i386/pr102566-14.c: New test.
---
 gcc/match.pd                                 | 125 +++++
 gcc/testsuite/g++.target/i386/pr102566-1.C   |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-2.C   |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-3.C   |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-4.C   |  29 ++
 gcc/testsuite/g++.target/i386/pr102566-5a.C  |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-5b.C  |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-6a.C  |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-6b.C  |  31 ++
 gcc/testsuite/gcc.target/i386/pr102566-10a.c |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-10b.c |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-11.c  |  28 ++
 gcc/testsuite/gcc.target/i386/pr102566-12.c  |  28 ++
 gcc/testsuite/gcc.target/i386/pr102566-13.c  |  66 +++
 gcc/testsuite/gcc.target/i386/pr102566-14.c  |  65 +++
 gcc/testsuite/gcc.target/i386/pr102566-1a.c  | 188 ++++++++
 gcc/testsuite/gcc.target/i386/pr102566-1b.c  | 107 +++++
 gcc/testsuite/gcc.target/i386/pr102566-2.c   |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-3a.c  |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-3b.c  |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-4.c   |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-5.c   |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-6.c   |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-7.c   |  30 ++
 gcc/testsuite/gcc.target/i386/pr102566-8a.c  |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-8b.c  |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-9a.c  |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-9b.c  |  32 ++
 gcc/tree-ssa-ccp.c                           | 452 +++++++++++++++++--
 29 files changed, 1575 insertions(+), 42 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-2.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-3.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-4.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5a.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5b.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6a.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6b.C
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-13.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-14.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9b.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 0734c45700c..7888401be02 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -104,6 +104,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (define_operator_list COND_TERNARY
   IFN_COND_FMA IFN_COND_FMS IFN_COND_FNMA IFN_COND_FNMS)
 
+/* __atomic_fetch_or_*, __atomic_fetch_xor_*, __atomic_xor_fetch_*  */
+(define_operator_list ATOMIC_FETCH_OR_XOR_N
+  BUILT_IN_ATOMIC_FETCH_OR_1 BUILT_IN_ATOMIC_FETCH_OR_2
+  BUILT_IN_ATOMIC_FETCH_OR_4 BUILT_IN_ATOMIC_FETCH_OR_8
+  BUILT_IN_ATOMIC_FETCH_OR_16
+  BUILT_IN_ATOMIC_FETCH_XOR_1 BUILT_IN_ATOMIC_FETCH_XOR_2
+  BUILT_IN_ATOMIC_FETCH_XOR_4 BUILT_IN_ATOMIC_FETCH_XOR_8
+  BUILT_IN_ATOMIC_FETCH_XOR_16
+  BUILT_IN_ATOMIC_XOR_FETCH_1 BUILT_IN_ATOMIC_XOR_FETCH_2
+  BUILT_IN_ATOMIC_XOR_FETCH_4 BUILT_IN_ATOMIC_XOR_FETCH_8
+  BUILT_IN_ATOMIC_XOR_FETCH_16)
+/* __sync_fetch_and_or_*, __sync_fetch_and_xor_*, __sync_xor_and_fetch_*  */
+(define_operator_list SYNC_FETCH_OR_XOR_N
+  BUILT_IN_SYNC_FETCH_AND_OR_1 BUILT_IN_SYNC_FETCH_AND_OR_2
+  BUILT_IN_SYNC_FETCH_AND_OR_4 BUILT_IN_SYNC_FETCH_AND_OR_8
+  BUILT_IN_SYNC_FETCH_AND_OR_16
+  BUILT_IN_SYNC_FETCH_AND_XOR_1 BUILT_IN_SYNC_FETCH_AND_XOR_2
+  BUILT_IN_SYNC_FETCH_AND_XOR_4 BUILT_IN_SYNC_FETCH_AND_XOR_8
+  BUILT_IN_SYNC_FETCH_AND_XOR_16
+  BUILT_IN_SYNC_XOR_AND_FETCH_1 BUILT_IN_SYNC_XOR_AND_FETCH_2
+  BUILT_IN_SYNC_XOR_AND_FETCH_4 BUILT_IN_SYNC_XOR_AND_FETCH_8
+  BUILT_IN_SYNC_XOR_AND_FETCH_16)
+/* __atomic_fetch_and_*.  */
+(define_operator_list ATOMIC_FETCH_AND_N
+  BUILT_IN_ATOMIC_FETCH_AND_1 BUILT_IN_ATOMIC_FETCH_AND_2
+  BUILT_IN_ATOMIC_FETCH_AND_4 BUILT_IN_ATOMIC_FETCH_AND_8
+  BUILT_IN_ATOMIC_FETCH_AND_16)
+/* __sync_fetch_and_and_*.  */
+(define_operator_list SYNC_FETCH_AND_AND_N
+  BUILT_IN_SYNC_FETCH_AND_AND_1 BUILT_IN_SYNC_FETCH_AND_AND_2
+  BUILT_IN_SYNC_FETCH_AND_AND_4 BUILT_IN_SYNC_FETCH_AND_AND_8
+  BUILT_IN_SYNC_FETCH_AND_AND_16)
+
 /* With nop_convert? combine convert? and view_convert? in one pattern
    plus conditionalize on tree_nop_conversion_p conversions.  */
 (match (nop_convert @0)
@@ -3931,6 +3964,98 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (vec_cond @0 (op! @3 @1) (op! @3 @2))))
 #endif
 
+#if GIMPLE
+(match (nop_atomic_bit_test_and_p @0 @1)
+ (bit_and:c (nop_convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3))
+	    INTEGER_CST@1)
+ (with {
+	 int ibit = tree_log2 (@0);
+	 int ibit2 = tree_log2 (@1);
+       }
+  (if (single_use (@4)
+      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)
+      && ibit == ibit2
+      && ibit >= 0))))
+
+(match (nop_atomic_bit_test_and_p @0 @1)
+ (bit_and:c (nop_convert?@3 (SYNC_FETCH_OR_XOR_N @2 INTEGER_CST@0))
+	    INTEGER_CST@1)
+ (with {
+	 int ibit = tree_log2 (@0);
+	 int ibit2 = tree_log2 (@1);
+       }
+  (if (single_use (@3)
+      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
+      && ibit == ibit2
+      && ibit >= 0))))
+
+(match (nop_atomic_bit_test_and_p @0 @1)
+ (bit_and:c
+  (nop_convert?@4
+   (ATOMIC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@5 @6)) @3))
+  @1)
+ (if (single_use (@4)
+     && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)
+     && operand_equal_p (@0, @1))))
+
+(match (nop_atomic_bit_test_and_p @0 @1)
+ (bit_and:c
+  (nop_convert?@4
+   (SYNC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@3 @5))))
+  @1)
+ (if (single_use (@4)
+     && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)
+     && operand_equal_p (@0, @1))))
+
+(match (nop_atomic_bit_test_and_p @0 @1)
+ (bit_and:c@4 (nop_convert?@3 (ATOMIC_FETCH_AND_N @2 INTEGER_CST@0 @5))
+	      INTEGER_CST@1)
+ (with {
+	 tree mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (@0), @0);
+	 mask = fold_convert (TREE_TYPE (@4), mask);
+	 int ibit = tree_log2 (mask);
+	 int ibit2 = tree_log2 (@1);
+       }
+  (if (single_use (@3)
+      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
+      && ibit == ibit2
+      && ibit >= 0))))
+
+(match (nop_atomic_bit_test_and_p @0 @1)
+ (bit_and:c@4
+  (nop_convert?@3 (SYNC_FETCH_AND_AND_N @2 INTEGER_CST@0))
+  INTEGER_CST@1)
+ (with {
+	 tree mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (@0), @0);
+	 mask = fold_convert (TREE_TYPE (@4), mask);
+	 int ibit = tree_log2 (mask);
+	 int ibit2 = tree_log2 (@1);
+       }
+  (if (single_use (@3)
+      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
+      && ibit == ibit2
+      && ibit >= 0))))
+
+(match (nop_atomic_bit_test_and_p @0 @1)
+ (bit_and:c
+  (nop_convert?@3
+   (ATOMIC_FETCH_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7))) @5))
+   @1)
+ (if (single_use (@3)
+     && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
+    && operand_equal_p (@0, @1))))
+
+(match (nop_atomic_bit_test_and_p @0 @1)
+ (bit_and:c
+  (nop_convert?@3
+   (SYNC_FETCH_AND_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7)))))
+   @1)
+ (if (single_use (@3)
+     && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
+     && operand_equal_p (@0, @1))))
+
+#endif
+
 /* (v ? w : 0) ? a : b is just (v & w) ? a : b
    Currently disabled after pass lvec because ARM understands
    VEC_COND_EXPR<v==w,-1,0> but not a plain v==w fed to BIT_IOR_EXPR.  */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-1.C b/gcc/testsuite/g++.target/i386/pr102566-1.C
new file mode 100644
index 00000000000..94a66d717cc
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-1.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<int> &i)
+{
+#define BIT (1 << 0)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<int> &i)
+{
+#define BIT (1 << 30)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<int> &i)
+{
+#define BIT (1 << 31)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-2.C b/gcc/testsuite/g++.target/i386/pr102566-2.C
new file mode 100644
index 00000000000..4f2aea961c2
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-2.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-3.C b/gcc/testsuite/g++.target/i386/pr102566-3.C
new file mode 100644
index 00000000000..e88921dd155
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-3.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-4.C b/gcc/testsuite/g++.target/i386/pr102566-4.C
new file mode 100644
index 00000000000..44d1362ac2e
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-4.C
@@ -0,0 +1,29 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+typedef int __attribute__ ((mode (__word__))) int_type;
+
+#define BIT (1 << 0)
+
+bool
+tbit0 (std::atomic<int_type> &i)
+{
+  return i.fetch_or(BIT, std::memory_order_relaxed) & ~1;
+}
+
+bool
+tbit30 (std::atomic<int_type> &i)
+{
+  return i.fetch_or(BIT, std::memory_order_relaxed) & ~2;
+}
+
+bool
+tbit31 (std::atomic<int_type> &i)
+{
+  return i.fetch_or(BIT, std::memory_order_relaxed) & ~4;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "bts" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-5a.C b/gcc/testsuite/g++.target/i386/pr102566-5a.C
new file mode 100644
index 00000000000..f9595bee2ab
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-5a.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-5b.C b/gcc/testsuite/g++.target/i386/pr102566-5b.C
new file mode 100644
index 00000000000..d917b27a918
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-5b.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 0)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 30)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 63)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-6a.C b/gcc/testsuite/g++.target/i386/pr102566-6a.C
new file mode 100644
index 00000000000..01d495eda23
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-6a.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-6b.C b/gcc/testsuite/g++.target/i386/pr102566-6b.C
new file mode 100644
index 00000000000..adc11fcbf2d
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-6b.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 0)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 30)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 63)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10a.c b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
new file mode 100644
index 00000000000..1c1f86a9659
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+  int mask = 1 << bit;
+  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10b.c b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
new file mode 100644
index 00000000000..0bf39824ea6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic long long int *v, int bit)
+{
+  long long int mask = 1ll << bit;
+  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-11.c b/gcc/testsuite/gcc.target/i386/pr102566-11.c
new file mode 100644
index 00000000000..2c8f8c4e59a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-11.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define MASK 0x1234
+
+bool
+foo1 (_Atomic int *v)
+{
+  return atomic_fetch_or_explicit (v, MASK, memory_order_relaxed) & MASK;
+}
+
+bool
+foo2 (_Atomic unsigned int *v, int mask)
+{
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+bool
+foo3 (_Atomic unsigned int *v, int mask)
+{
+  return !(atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask);
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "bts" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-12.c b/gcc/testsuite/gcc.target/i386/pr102566-12.c
new file mode 100644
index 00000000000..4603a77612c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-12.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define MASK 0x1234
+
+bool
+foo1 (_Atomic long *v)
+{
+  return atomic_fetch_and_explicit (v, ~MASK, memory_order_relaxed) & MASK;
+}
+
+bool
+foo2 (_Atomic long *v, long mask)
+{
+  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
+}
+
+bool
+foo3 (_Atomic long *v, long mask)
+{
+  return !(atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask);
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "btr" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-13.c b/gcc/testsuite/gcc.target/i386/pr102566-13.c
new file mode 100644
index 00000000000..2657a2f62ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-13.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define FOO(TYPE,MASK)							\
+  __attribute__((noinline,noclone)) TYPE				\
+  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_or (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_xor (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_xor_and_fetch (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_and (a, ~mask) & mask;			\
+  }									\
+
+FOO(short, 0);
+FOO(short, 7);
+FOO(short, 15);
+FOO(int, 0);
+FOO(int, 15);
+FOO(int, 31);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 12 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 24 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 12 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-14.c b/gcc/testsuite/gcc.target/i386/pr102566-14.c
new file mode 100644
index 00000000000..24681c1da18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-14.c
@@ -0,0 +1,65 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+typedef long long int64;
+
+#define FOO(TYPE,MASK)							\
+  __attribute__((noinline,noclone)) TYPE				\
+  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_or (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_xor (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_xor_and_fetch (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_and (a, ~mask) & mask;			\
+  }									\
+
+
+FOO(int64, 0);
+FOO(int64, 32);
+FOO(int64, 63);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 6 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 12 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1a.c b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
new file mode 100644
index 00000000000..a915de354e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
@@ -0,0 +1,188 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void bar (void);
+
+__attribute__((noinline, noclone)) int
+f1 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f2 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
+  int t2 = t1 & mask;
+  return t2 != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f3 (long int *a, int bit)
+{
+  long int mask = 1l << bit;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
+}
+
+__attribute__((noinline, noclone)) int
+f4 (int *a)
+{
+  int mask = 1 << 7;
+  return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f5 (int *a)
+{
+  int mask = 1 << 13;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f6 (int *a)
+{
+  int mask = 1 << 0;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) void
+f7 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
+    bar ();
+}
+
+__attribute__((noinline, noclone)) void
+f8 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
+    bar ();
+}
+
+__attribute__((noinline, noclone)) int
+f9 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f10 (int *a)
+{
+  int mask = 1 << 7;
+  return (__sync_fetch_and_xor (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f11 (int *a)
+{
+  int mask = 1 << 13;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f12 (int *a)
+{
+  int mask = 1 << 0;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f13 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f14 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f15 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f16 (int *a)
+{
+  int mask = 1 << 7;
+  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f17 (int *a)
+{
+  int mask = 1 << 13;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f18 (int *a)
+{
+  int mask = 1 << 0;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f19 (long int *a, int bit)
+{
+  long int mask = 1l << bit;
+  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f20 (long int *a)
+{
+  long int mask = 1l << 7;
+  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
+}
+
+__attribute__((noinline, noclone)) int
+f21 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__sync_fetch_and_or (a, mask) & mask);
+}
+
+__attribute__((noinline, noclone)) long int
+f22 (long int *a)
+{
+  long int mask = 1l << 7;
+  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
+}
+
+__attribute__((noinline, noclone)) long int
+f23 (long int *a)
+{
+  long int mask = 1l << 7;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
+}
+
+__attribute__((noinline, noclone)) short int
+f24 (short int *a)
+{
+  short int mask = 1 << 7;
+  return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) short int
+f25 (short int *a)
+{
+  short int mask = 1 << 7;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1b.c b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
new file mode 100644
index 00000000000..c4dab8135c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
@@ -0,0 +1,107 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -g" } */
+
+int cnt;
+
+__attribute__((noinline, noclone)) void
+bar (void)
+{
+  cnt++;
+}
+
+#include "pr102566-1a.c"
+
+int a;
+long int b;
+unsigned long int c;
+unsigned short int d;
+
+int
+main ()
+{
+  __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
+  if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
+      || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
+    __builtin_abort ();
+  if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
+      || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
+    __builtin_abort ();
+  __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
+  if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
+      || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
+    __builtin_abort ();
+  __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
+  if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
+      || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
+    __builtin_abort ();
+  if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
+      || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
+    __builtin_abort ();
+  if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
+      || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (cnt != 0
+      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
+      || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
+      || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
+      || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
+    __builtin_abort ();
+  if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
+      || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
+      || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
+  if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
+    __builtin_abort ();
+  if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
+      || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
+      || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
+      || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
+      || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
+  if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
+      || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
+    __builtin_abort ();
+  __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
+  if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
+      || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
+      || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
+      || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
+    __builtin_abort ();
+  __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
+  if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
+      || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
+      || cnt != 2)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-2.c b/gcc/testsuite/gcc.target/i386/pr102566-2.c
new file mode 100644
index 00000000000..00a7c349f2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-2.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3a.c b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
new file mode 100644
index 00000000000..8bf1cd6e1bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+  int mask = 1 << bit;
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3b.c b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
new file mode 100644
index 00000000000..d155ed367a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic long long int *v, int bit)
+{
+  long long int mask = 1ll << bit;
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsq" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-4.c b/gcc/testsuite/gcc.target/i386/pr102566-4.c
new file mode 100644
index 00000000000..2668ccf827c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-4.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+  unsigned int mask = 1 << bit;
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-5.c b/gcc/testsuite/gcc.target/i386/pr102566-5.c
new file mode 100644
index 00000000000..8bf1cd6e1bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-5.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+  int mask = 1 << bit;
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-6.c b/gcc/testsuite/gcc.target/i386/pr102566-6.c
new file mode 100644
index 00000000000..3dfe55ac683
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-6.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-7.c b/gcc/testsuite/gcc.target/i386/pr102566-7.c
new file mode 100644
index 00000000000..6bc0ae0f320
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-7.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+typedef int __attribute__ ((mode (__word__))) int_type;
+
+#define BIT (1 << 0)
+
+bool
+foo0 (_Atomic int_type *v)
+{
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~1;
+}
+
+bool
+foo1 (_Atomic int_type *v)
+{
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~2;
+}
+
+bool
+foo2 (_Atomic int_type *v)
+{
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~3;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "bts" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8a.c b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
new file mode 100644
index 00000000000..168e3db78c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8b.c b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
new file mode 100644
index 00000000000..392da3098e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
@@ -0,0 +1,32 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic long long *v)
+{
+#define BIT (1ll << 0)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo30 (_Atomic long long *v)
+{
+#define BIT (1ll << 62)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo31 (_Atomic long long *v)
+{
+#define BIT (1ll << 63)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9a.c b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
new file mode 100644
index 00000000000..3fa2a3ef043
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9b.c b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
new file mode 100644
index 00000000000..38ddbdc630f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
@@ -0,0 +1,32 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic long long *v)
+{
+#define BIT (1ll << 0)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo30 (_Atomic long long *v)
+{
+#define BIT (1ll << 62)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo31 (_Atomic long long *v)
+{
+#define BIT (1ll << 63)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index 70ce6a4d5b8..d14774549b8 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -3243,6 +3243,90 @@ optimize_unreachable (gimple_stmt_iterator i)
   return ret;
 }
 
+/* Convert
+   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+   _7 = ~_1;
+   _5 = (_Bool) _7;
+   to
+   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+   _8 = _1 & 1;
+   _5 = _8 == 0;
+   and convert
+   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+   _7 = ~_1;
+   _4 = (_Bool) _7;
+   to
+   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+   _8 = _1 & 1;
+   _4 = (_Bool) _8;
+
+   USE_STMT is the gimplt statement which uses the return value of
+   __atomic_fetch_or_*.  LHS is the return value of __atomic_fetch_or_*.
+   MASK is the mask passed to __atomic_fetch_or_*.
+ */
+
+static gimple *
+convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
+			tree lhs, tree mask)
+{
+  tree and_mask;
+  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+    {
+      /* MASK must be ~1.  */
+      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
+					   ~HOST_WIDE_INT_1), mask, 0))
+	return nullptr;
+      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
+    }
+  else
+    {
+      /* MASK must be 1.  */
+      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs), 1), mask, 0))
+	return nullptr;
+      and_mask = mask;
+    }
+
+  tree use_lhs = gimple_assign_lhs (use_stmt);
+
+  use_operand_p use_p;
+  gimple *use_not_stmt;
+
+  if (!single_imm_use (use_lhs, &use_p, &use_not_stmt)
+      || !is_gimple_assign (use_not_stmt))
+    return nullptr;
+
+  if (gimple_assign_rhs_code (use_not_stmt) != NOP_EXPR)
+    return nullptr;
+
+  tree use_not_lhs = gimple_assign_lhs (use_not_stmt);
+  if (TREE_CODE (TREE_TYPE (use_not_lhs)) != BOOLEAN_TYPE)
+    return nullptr;
+
+  gimple_stmt_iterator gsi;
+  gsi = gsi_for_stmt (use_stmt);
+  gsi_remove (&gsi, true);
+  tree var = make_ssa_name (TREE_TYPE (lhs));
+  use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
+  gsi = gsi_for_stmt (use_not_stmt);
+  gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
+  lhs = gimple_assign_lhs (use_not_stmt);
+  gimple *g = gimple_build_assign (lhs, EQ_EXPR, var,
+				   build_zero_cst (TREE_TYPE (mask)));
+  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+  gsi = gsi_for_stmt (use_not_stmt);
+  gsi_remove (&gsi, true);
+  return use_stmt;
+}
+
+/* match.pd function to match atomic_bit_test_and pattern which
+   has nop_convert:
+     _1 = __atomic_fetch_or_4 (&v, 1, 0);
+     _2 = (int) _1;
+     _5 = _2 & 1;
+ */
+extern bool gimple_nop_atomic_bit_test_and_p (tree, tree *,
+					      tree (*) (tree));
+
 /* Optimize
      mask_2 = 1 << cnt_1;
      _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
@@ -3269,7 +3353,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
   tree lhs = gimple_call_lhs (call);
   use_operand_p use_p;
   gimple *use_stmt;
-  tree mask, bit;
+  tree mask;
   optab optab;
 
   if (!flag_inline_atomics
@@ -3279,10 +3363,267 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
       || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
       || !single_imm_use (lhs, &use_p, &use_stmt)
       || !is_gimple_assign (use_stmt)
-      || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
       || !gimple_vdef (call))
     return;
 
+  tree bit = nullptr;
+
+  mask = gimple_call_arg (call, 1);
+  tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
+  if (rhs_code != BIT_AND_EXPR)
+    {
+      if (rhs_code != NOP_EXPR && rhs_code != BIT_NOT_EXPR)
+	return;
+
+      tree use_lhs = gimple_assign_lhs (use_stmt);
+      if (TREE_CODE (use_lhs) == SSA_NAME
+	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
+	return;
+
+      tree use_rhs = gimple_assign_rhs1 (use_stmt);
+      if (lhs != use_rhs)
+	return;
+
+      gimple *g;
+      gimple_stmt_iterator gsi;
+      tree var;
+      int ibit = -1;
+
+      if (rhs_code == BIT_NOT_EXPR)
+	{
+	  g = convert_atomic_bit_not (fn, use_stmt, lhs, mask);
+	  if (!g)
+	    return;
+	  use_stmt = g;
+	  ibit = 0;
+	}
+      else if (TREE_CODE (TREE_TYPE (use_lhs)) == BOOLEAN_TYPE)
+	{
+	  tree and_mask;
+	  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+	    {
+	      /* MASK must be ~1.  */
+	      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
+						   ~HOST_WIDE_INT_1),
+				    mask, 0))
+		return;
+
+	      /* Convert
+		 _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+		 _4 = (_Bool) _1;
+		 to
+		 _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+		 _5 = _1 & 1;
+		 _4 = (_Bool) _5;
+	       */
+	      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
+	    }
+	  else
+	    {
+	      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
+	      if (!operand_equal_p (and_mask, mask, 0))
+		return;
+
+	      /* Convert
+		 _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+		 _4 = (_Bool) _1;
+		 to
+		 _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+		 _5 = _1 & 1;
+		 _4 = (_Bool) _5;
+	       */
+	    }
+	  var = make_ssa_name (TREE_TYPE (use_rhs));
+	  replace_uses_by (use_rhs, var);
+	  g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
+				   and_mask);
+	  gsi = gsi_for_stmt (use_stmt);
+	  gsi_insert_before (&gsi, g, GSI_NEW_STMT);
+	  use_stmt = g;
+	  ibit = 0;
+	}
+      else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
+	       == TYPE_PRECISION (TREE_TYPE (use_rhs)))
+	{
+	  gimple *use_nop_stmt;
+	  if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
+	      || !is_gimple_assign (use_nop_stmt))
+	    return;
+	  rhs_code = gimple_assign_rhs_code (use_nop_stmt);
+	  if (rhs_code != BIT_AND_EXPR)
+	    {
+	      tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
+	      if (TREE_CODE (use_nop_lhs) == SSA_NAME
+		  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
+		return;
+	      if (rhs_code == BIT_NOT_EXPR)
+		{
+		  g = convert_atomic_bit_not (fn, use_nop_stmt, lhs,
+					      mask);
+		  if (!g)
+		    return;
+		  /* Convert
+		     _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
+		     _2 = (int) _1;
+		     _7 = ~_2;
+		     _5 = (_Bool) _7;
+		     to
+		     _1 = __atomic_fetch_or_4 (ptr_6, ~1, _3);
+		     _8 = _1 & 1;
+		     _5 = _8 == 0;
+		     and convert
+		     _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
+		     _2 = (int) _1;
+		     _7 = ~_2;
+		     _5 = (_Bool) _7;
+		     to
+		     _1 = __atomic_fetch_and_4 (ptr_6, 1, _3);
+		     _8 = _1 & 1;
+		     _5 = _8 == 0;
+		   */
+		  gsi = gsi_for_stmt (use_stmt);
+		  gsi_remove (&gsi, true);
+		  use_stmt = g;
+		  ibit = 0;
+		}
+	      else
+		{
+		  if (TREE_CODE (TREE_TYPE (use_nop_lhs)) != BOOLEAN_TYPE)
+		    return;
+		  if (rhs_code != GE_EXPR && rhs_code != LT_EXPR)
+		    return;
+		  tree cmp_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
+		  if (use_lhs != cmp_rhs1)
+		    return;
+		  tree cmp_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
+		  if (!integer_zerop (cmp_rhs2))
+		    return;
+
+		  tree and_mask;
+
+		  unsigned HOST_WIDE_INT bytes
+		    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (use_rhs)));
+		  ibit = bytes * BITS_PER_UNIT - 1;
+		  unsigned HOST_WIDE_INT highest
+		    = HOST_WIDE_INT_1U << ibit;
+
+		  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+		    {
+		      /* Get the signed maximum of the USE_RHS type.  */
+		      and_mask = build_int_cst (TREE_TYPE (use_rhs),
+						highest - 1);
+		      if (!operand_equal_p (and_mask, mask, 0))
+			return;
+
+		      /* Convert
+			 _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
+			 _5 = (signed int) _1;
+			 _4 = _5 < 0 or _5 >= 0;
+			 to
+			 _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
+			 _6 = _1 & 0x80000000;
+			 _4 = _6 != 0 or _6 == 0;
+		       */
+		      and_mask = build_int_cst (TREE_TYPE (use_rhs),
+						highest);
+		    }
+		  else
+		    {
+		      /* Get the signed minimum of the USE_RHS type.  */
+		      and_mask = build_int_cst (TREE_TYPE (use_rhs),
+						highest);
+		      if (!operand_equal_p (and_mask, mask, 0))
+			return;
+
+		      /* Convert
+			 _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
+			 _5 = (signed int) _1;
+			 _4 = _5 < 0 or _5 >= 0;
+			 to
+			 _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
+			 _6 = _1 & 0x80000000;
+			 _4 = _6 != 0 or _6 == 0;
+		       */
+		    }
+		  var = make_ssa_name (TREE_TYPE (use_rhs));
+		  gsi = gsi_for_stmt (use_stmt);
+		  gsi_remove (&gsi, true);
+		  g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
+					   and_mask);
+		  gsi = gsi_for_stmt (use_nop_stmt);
+		  gsi_insert_before (&gsi, g, GSI_NEW_STMT);
+		  use_stmt = g;
+		  g = gimple_build_assign (use_nop_lhs,
+					   (rhs_code == GE_EXPR
+					    ? EQ_EXPR : NE_EXPR),
+					   var,
+					   build_zero_cst (TREE_TYPE (use_rhs)));
+		  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+		  gsi = gsi_for_stmt (use_nop_stmt);
+		  gsi_remove (&gsi, true);
+		}
+	    }
+	  else
+	    {
+	      tree and_expr = gimple_assign_lhs (use_nop_stmt);
+	      tree res_mask[2];
+	      if (!gimple_nop_atomic_bit_test_and_p (and_expr,
+						     &res_mask[0], NULL))
+		return;
+	      mask = res_mask[1];
+	      if (TREE_CODE (mask) == INTEGER_CST)
+		{
+		  ibit = tree_log2 (mask);
+		  gcc_assert (ibit >= 0);
+		}
+	      else
+		{
+		  g = SSA_NAME_DEF_STMT (mask);
+		  gcc_assert (is_gimple_assign (g));
+		  bit = gimple_assign_rhs2 (g);
+		}
+	      /* Convert
+		 _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
+		 _2 = (int) _1;
+		 _5 = _2 & mask;
+		 to
+		 _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
+		 _6 = _1 & mask;
+		 _5 = (int) _6;
+		 and convert
+		 _1 = ~mask_7;
+		 _2 = (unsigned int) _1;
+		 _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
+		 _4 = (int) _3;
+		 _5 = _4 & mask_7;
+		 to
+		 _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
+		 _12 = _3 & mask_7;
+		 _5 = (int) _12;
+	       */
+	      replace_uses_by (use_lhs, lhs);
+	      tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
+	      var = make_ssa_name (TREE_TYPE (use_nop_lhs));
+	      gimple_assign_set_lhs (use_nop_stmt, var);
+	      gsi = gsi_for_stmt (use_stmt);
+	      gsi_remove (&gsi, true);
+	      release_defs (use_stmt);
+	      gsi_remove (gsip, true);
+	      g = gimple_build_assign (use_nop_lhs, NOP_EXPR, var);
+	      gsi = gsi_for_stmt (use_nop_stmt);
+	      gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+	      use_stmt = use_nop_stmt;
+	    }
+	}
+
+      if (!bit)
+	{
+	  if (ibit < 0)
+	    gcc_unreachable ();
+	  bit = build_int_cst (TREE_TYPE (lhs), ibit);
+	}
+    }
+
   switch (fn)
     {
     case IFN_ATOMIC_BIT_TEST_AND_SET:
@@ -3301,51 +3642,76 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
   if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
     return;
 
-  mask = gimple_call_arg (call, 1);
   tree use_lhs = gimple_assign_lhs (use_stmt);
   if (!use_lhs)
     return;
 
-  if (TREE_CODE (mask) == INTEGER_CST)
-    {
-      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
-	mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
-      mask = fold_convert (TREE_TYPE (lhs), mask);
-      int ibit = tree_log2 (mask);
-      if (ibit < 0)
-	return;
-      bit = build_int_cst (TREE_TYPE (lhs), ibit);
-    }
-  else if (TREE_CODE (mask) == SSA_NAME)
+  if (!bit)
     {
-      gimple *g = SSA_NAME_DEF_STMT (mask);
-      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+      if (TREE_CODE (mask) == INTEGER_CST)
 	{
-	  if (!is_gimple_assign (g)
-	      || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
+	  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+	    mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
+	  mask = fold_convert (TREE_TYPE (lhs), mask);
+	  int ibit = tree_log2 (mask);
+	  if (ibit < 0)
 	    return;
-	  mask = gimple_assign_rhs1 (g);
-	  if (TREE_CODE (mask) != SSA_NAME)
+	  bit = build_int_cst (TREE_TYPE (lhs), ibit);
+	}
+      else if (TREE_CODE (mask) == SSA_NAME)
+	{
+	  gimple *g = SSA_NAME_DEF_STMT (mask);
+	  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+	    {
+	      if (!is_gimple_assign (g)
+		  || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
+		return;
+	      mask = gimple_assign_rhs1 (g);
+	      if (TREE_CODE (mask) != SSA_NAME)
+		return;
+	      g = SSA_NAME_DEF_STMT (mask);
+	    }
+	  if (!is_gimple_assign (g))
 	    return;
-	  g = SSA_NAME_DEF_STMT (mask);
+	  rhs_code = gimple_assign_rhs_code (g);
+	  if (rhs_code != LSHIFT_EXPR)
+	    {
+	      if (rhs_code != NOP_EXPR)
+		return;
+
+	      /* Handle
+		 _1 = 1 << bit_4(D);
+		 mask_5 = (unsigned int) _1;
+		 _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
+		 _3 = _2 & mask_5;
+		 */
+	      tree nop_lhs = gimple_assign_lhs (g);
+	      tree nop_rhs = gimple_assign_rhs1 (g);
+	      if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
+		  != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
+		return;
+	      g = SSA_NAME_DEF_STMT (nop_rhs);
+	      if (!is_gimple_assign (g)
+		  || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
+		return;
+	    }
+	  if (!integer_onep (gimple_assign_rhs1 (g)))
+	    return;
+	  bit = gimple_assign_rhs2 (g);
 	}
-      if (!is_gimple_assign (g)
-	  || gimple_assign_rhs_code (g) != LSHIFT_EXPR
-	  || !integer_onep (gimple_assign_rhs1 (g)))
+      else
 	return;
-      bit = gimple_assign_rhs2 (g);
-    }
-  else
-    return;
 
-  if (gimple_assign_rhs1 (use_stmt) == lhs)
-    {
-      if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
+      if (gimple_assign_rhs1 (use_stmt) == lhs)
+	{
+	  if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
+	    return;
+	}
+      else if (gimple_assign_rhs2 (use_stmt) != lhs
+	       || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
+				    mask, 0))
 	return;
     }
-  else if (gimple_assign_rhs2 (use_stmt) != lhs
-	   || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
-    return;
 
   bool use_bool = true;
   bool has_debug_uses = false;
@@ -3434,18 +3800,20 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
 	 of the specified bit after the atomic operation (makes only sense
 	 for xor, otherwise the bit content is compile time known),
 	 we need to invert the bit.  */
-      g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
-			       BIT_XOR_EXPR, new_lhs,
-			       use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
-					: mask);
-      new_lhs = gimple_assign_lhs (g);
+      tree mask_convert = mask;
+      gimple_seq stmts = NULL;
+      if (!use_bool)
+	mask_convert = gimple_convert (&stmts, TREE_TYPE (lhs), mask);
+      new_lhs = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (lhs), new_lhs,
+			      use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
+				       : mask_convert);
       if (throws)
 	{
-	  gsi_insert_on_edge_immediate (e, g);
-	  gsi = gsi_for_stmt (g);
+	  gsi_insert_seq_on_edge_immediate (e, stmts);
+	  gsi = gsi_for_stmt (gimple_seq_last (stmts));
 	}
       else
-	gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+	gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
     }
   if (use_bool && has_debug_uses)
     {
-- 
2.18.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v5] Improve integer bit test on __atomic_fetch_[or|and]_* returns
  2021-11-04  1:27       ` [PATCH v5] " liuhongt
@ 2021-11-09 12:48         ` Richard Biener
  2021-11-10  5:20           ` [PATCH] " liuhongt
  0 siblings, 1 reply; 8+ messages in thread
From: Richard Biener @ 2021-11-09 12:48 UTC (permalink / raw)
  To: liuhongt; +Cc: GCC Patches

On Thu, Nov 4, 2021 at 2:28 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> Sorry for the slow reply:

Likewise ;)

> Here is update according to comments
> 1. Define new match function in match.pd.
> 2. Adjust code for below
>    >> +             gsi_remove (gsip, true);
>    >> +             var = build1 (NOP_EXPR, TREE_TYPE (use_nop_lhs), var);
>    >
>    >instead of building a GENERIC NOP you could use the
>    >
>    >gassign *gimple_build_assign (tree, enum tree_code, tree CXX_MEM_STAT_INFO);
>    >
>    >overload.
>    >You could use
>    >
>    >        gimple_seq stmts = NULL;
>    >        mask_convert = gimple_convert (&stmts, TREE_TYPE (lhs), mask);
>    >        new_lhs = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (lhs), new_lhs,
>    >                                               use_bool ?
>    >build_int_cst (TREE_TYPE (lhs), 1) : mask_convert);
>    >
>    >>        if (throws)
>    >>         {
>    >> -         gsi_insert_on_edge_immediate (e, g);
>    >
>    >gsi_insert_seq_on_edge_immediate (e, stmts);
>    >
>    >to simplify this.  The conversion will be only generated if necessary.
>
> Bootstrapped and regtest on x86-64-pc-linux-gnu{-m32,}
> Ok for trunk?
>
> Improve integer bit test on __atomic_fetch_[or|and]_* returns
>
> commit adedd5c173388ae505470df152b9cb3947339566
> Author: Jakub Jelinek <jakub@redhat.com>
> Date:   Tue May 3 13:37:25 2016 +0200
>
>     re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')
>
> optimized bit test on __atomic_fetch_or_* and __atomic_fetch_and_* returns
> with lock bts/btr/btc by turning
>
>   mask_2 = 1 << cnt_1;
>   _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
>   _5 = _4 & mask_2;
>
> into
>
>   _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
>   _5 = _4;
>
> and
>
>   mask_6 = 1 << bit_5(D);
>   _1 = ~mask_6;
>   _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
>   _3 = _2 & mask_6;
>   _4 = _3 != 0;
>
> into
>
>   mask_6 = 1 << bit_5(D);
>   _1 = ~mask_6;
>   _11 = .ATOMIC_BIT_TEST_AND_RESET (v_8(D), bit_5(D), 1, 0);
>   _4 = _11 != 0;
>
> But it failed to optimize many equivalent, but slighly different cases:
>
> 1.
>   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
>   _4 = (_Bool) _1;
> 2.
>   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
>   _4 = (_Bool) _1;
> 3.
>   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
>   _7 = ~_1;
>   _5 = (_Bool) _7;
> 4.
>   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
>   _7 = ~_1;
>   _5 = (_Bool) _7;
> 5.
>   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
>   _2 = (int) _1;
>   _7 = ~_2;
>   _5 = (_Bool) _7;
> 6.
>   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
>   _2 = (int) _1;
>   _7 = ~_2;
>   _5 = (_Bool) _7;
> 7.
>   _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
>   _5 = (signed int) _1;
>   _4 = _5 < 0;
> 8.
>   _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
>   _5 = (signed int) _1;
>   _4 = _5 < 0;
> 9.
>   _1 = 1 << bit_4(D);
>   mask_5 = (unsigned int) _1;
>   _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
>   _3 = _2 & mask_5;
> 10.
>   mask_7 = 1 << bit_6(D);
>   _1 = ~mask_7;
>   _2 = (unsigned int) _1;
>   _3 = __atomic_fetch_and_4 (v_9(D), _2, 0);
>   _4 = (int) _3;
>   _5 = _4 & mask_7;
>
> We make
>
>   mask_2 = 1 << cnt_1;
>   _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
>   _5 = _4 & mask_2;
>
> and
>
>   mask_6 = 1 << bit_5(D);
>   _1 = ~mask_6;
>   _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
>   _3 = _2 & mask_6;
>   _4 = _3 != 0;
>
> the canonical forms for this optimization and transform cases 1-9 to the
> equivalent canonical form.  For cases 10 and 11, we simply remove the cast
> before __atomic_fetch_or_4/__atomic_fetch_and_4 with
>
>   _1 = 1 << bit_4(D);
>   _2 = __atomic_fetch_or_4 (v_7(D), _1, 0);
>   _3 = _2 & _1;
>
> and
>
>   mask_7 = 1 << bit_6(D);
>   _1 = ~mask_7;
>   _3 = __atomic_fetch_and_4 (v_9(D), _1, 0);
>   _6 = _3 & mask_7;
>   _5 = (int) _6;
>
> 2021-11-04  H.J. Lu  <hongjiu.lu@intel.com>
>             Hongtao Liu  <hongtao.liu@intel.com>
> gcc/
>
>         PR middle-end/102566
>         * match.pd (nop_atomic_bit_test_and_p): New match.
>         * tree-ssa-ccp.c (convert_atomic_bit_not): New function.
>         (gimple_nop_atomic_bit_test_and_p): New prototype.
>         (optimize_atomic_bit_test_and): Transform equivalent, but slighly
>         different cases to their canonical forms.
>
> gcc/testsuite/
>
>         PR middle-end/102566
>         * g++.target/i386/pr102566-1.C: New test.
>         * g++.target/i386/pr102566-2.C: Likewise.
>         * g++.target/i386/pr102566-3.C: Likewise.
>         * g++.target/i386/pr102566-4.C: Likewise.
>         * g++.target/i386/pr102566-5a.C: Likewise.
>         * g++.target/i386/pr102566-5b.C: Likewise.
>         * g++.target/i386/pr102566-6a.C: Likewise.
>         * g++.target/i386/pr102566-6b.C: Likewise.
>         * gcc.target/i386/pr102566-1a.c: Likewise.
>         * gcc.target/i386/pr102566-1b.c: Likewise.
>         * gcc.target/i386/pr102566-2.c: Likewise.
>         * gcc.target/i386/pr102566-3a.c: Likewise.
>         * gcc.target/i386/pr102566-3b.c: Likewise.
>         * gcc.target/i386/pr102566-4.c: Likewise.
>         * gcc.target/i386/pr102566-5.c: Likewise.
>         * gcc.target/i386/pr102566-6.c: Likewise.
>         * gcc.target/i386/pr102566-7.c: Likewise.
>         * gcc.target/i386/pr102566-8a.c: Likewise.
>         * gcc.target/i386/pr102566-8b.c: Likewise.
>         * gcc.target/i386/pr102566-9a.c: Likewise.
>         * gcc.target/i386/pr102566-9b.c: Likewise.
>         * gcc.target/i386/pr102566-10a.c: Likewise.
>         * gcc.target/i386/pr102566-10b.c: Likewise.
>         * gcc.target/i386/pr102566-11.c: Likewise.
>         * gcc.target/i386/pr102566-12.c: Likewise.
>         * gcc.target/i386/pr102566-13.c: New test.
>         * gcc.target/i386/pr102566-14.c: New test.
> ---
>  gcc/match.pd                                 | 125 +++++
>  gcc/testsuite/g++.target/i386/pr102566-1.C   |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-2.C   |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-3.C   |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-4.C   |  29 ++
>  gcc/testsuite/g++.target/i386/pr102566-5a.C  |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-5b.C  |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-6a.C  |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-6b.C  |  31 ++
>  gcc/testsuite/gcc.target/i386/pr102566-10a.c |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-10b.c |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-11.c  |  28 ++
>  gcc/testsuite/gcc.target/i386/pr102566-12.c  |  28 ++
>  gcc/testsuite/gcc.target/i386/pr102566-13.c  |  66 +++
>  gcc/testsuite/gcc.target/i386/pr102566-14.c  |  65 +++
>  gcc/testsuite/gcc.target/i386/pr102566-1a.c  | 188 ++++++++
>  gcc/testsuite/gcc.target/i386/pr102566-1b.c  | 107 +++++
>  gcc/testsuite/gcc.target/i386/pr102566-2.c   |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-3a.c  |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-3b.c  |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-4.c   |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-5.c   |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-6.c   |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-7.c   |  30 ++
>  gcc/testsuite/gcc.target/i386/pr102566-8a.c  |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-8b.c  |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-9a.c  |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-9b.c  |  32 ++
>  gcc/tree-ssa-ccp.c                           | 452 +++++++++++++++++--
>  29 files changed, 1575 insertions(+), 42 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-2.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-3.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-4.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5a.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5b.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6a.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6b.C
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-11.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-12.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-13.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-14.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-7.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9b.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 0734c45700c..7888401be02 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -104,6 +104,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  (define_operator_list COND_TERNARY
>    IFN_COND_FMA IFN_COND_FMS IFN_COND_FNMA IFN_COND_FNMS)
>
> +/* __atomic_fetch_or_*, __atomic_fetch_xor_*, __atomic_xor_fetch_*  */
> +(define_operator_list ATOMIC_FETCH_OR_XOR_N
> +  BUILT_IN_ATOMIC_FETCH_OR_1 BUILT_IN_ATOMIC_FETCH_OR_2
> +  BUILT_IN_ATOMIC_FETCH_OR_4 BUILT_IN_ATOMIC_FETCH_OR_8
> +  BUILT_IN_ATOMIC_FETCH_OR_16
> +  BUILT_IN_ATOMIC_FETCH_XOR_1 BUILT_IN_ATOMIC_FETCH_XOR_2
> +  BUILT_IN_ATOMIC_FETCH_XOR_4 BUILT_IN_ATOMIC_FETCH_XOR_8
> +  BUILT_IN_ATOMIC_FETCH_XOR_16
> +  BUILT_IN_ATOMIC_XOR_FETCH_1 BUILT_IN_ATOMIC_XOR_FETCH_2
> +  BUILT_IN_ATOMIC_XOR_FETCH_4 BUILT_IN_ATOMIC_XOR_FETCH_8
> +  BUILT_IN_ATOMIC_XOR_FETCH_16)
> +/* __sync_fetch_and_or_*, __sync_fetch_and_xor_*, __sync_xor_and_fetch_*  */
> +(define_operator_list SYNC_FETCH_OR_XOR_N
> +  BUILT_IN_SYNC_FETCH_AND_OR_1 BUILT_IN_SYNC_FETCH_AND_OR_2
> +  BUILT_IN_SYNC_FETCH_AND_OR_4 BUILT_IN_SYNC_FETCH_AND_OR_8
> +  BUILT_IN_SYNC_FETCH_AND_OR_16
> +  BUILT_IN_SYNC_FETCH_AND_XOR_1 BUILT_IN_SYNC_FETCH_AND_XOR_2
> +  BUILT_IN_SYNC_FETCH_AND_XOR_4 BUILT_IN_SYNC_FETCH_AND_XOR_8
> +  BUILT_IN_SYNC_FETCH_AND_XOR_16
> +  BUILT_IN_SYNC_XOR_AND_FETCH_1 BUILT_IN_SYNC_XOR_AND_FETCH_2
> +  BUILT_IN_SYNC_XOR_AND_FETCH_4 BUILT_IN_SYNC_XOR_AND_FETCH_8
> +  BUILT_IN_SYNC_XOR_AND_FETCH_16)
> +/* __atomic_fetch_and_*.  */
> +(define_operator_list ATOMIC_FETCH_AND_N
> +  BUILT_IN_ATOMIC_FETCH_AND_1 BUILT_IN_ATOMIC_FETCH_AND_2
> +  BUILT_IN_ATOMIC_FETCH_AND_4 BUILT_IN_ATOMIC_FETCH_AND_8
> +  BUILT_IN_ATOMIC_FETCH_AND_16)
> +/* __sync_fetch_and_and_*.  */
> +(define_operator_list SYNC_FETCH_AND_AND_N
> +  BUILT_IN_SYNC_FETCH_AND_AND_1 BUILT_IN_SYNC_FETCH_AND_AND_2
> +  BUILT_IN_SYNC_FETCH_AND_AND_4 BUILT_IN_SYNC_FETCH_AND_AND_8
> +  BUILT_IN_SYNC_FETCH_AND_AND_16)
> +
>  /* With nop_convert? combine convert? and view_convert? in one pattern
>     plus conditionalize on tree_nop_conversion_p conversions.  */
>  (match (nop_convert @0)
> @@ -3931,6 +3964,98 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    (vec_cond @0 (op! @3 @1) (op! @3 @2))))
>  #endif
>
> +#if GIMPLE
> +(match (nop_atomic_bit_test_and_p @0 @1)
> + (bit_and:c (nop_convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3))
> +           INTEGER_CST@1)

no need for the :c on the bit_and when the 2nd operand is an
INTEGER_CST (likewise below)

> + (with {
> +        int ibit = tree_log2 (@0);
> +        int ibit2 = tree_log2 (@1);
> +       }
> +  (if (single_use (@4)
> +      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)

I wonder whether we should handle both of these in the caller to make
this a pure IL structure
match?  At your preference.

> +      && ibit == ibit2
> +      && ibit >= 0))))
> +
> +(match (nop_atomic_bit_test_and_p @0 @1)
> + (bit_and:c (nop_convert?@3 (SYNC_FETCH_OR_XOR_N @2 INTEGER_CST@0))
> +           INTEGER_CST@1)
> + (with {
> +        int ibit = tree_log2 (@0);
> +        int ibit2 = tree_log2 (@1);
> +       }
> +  (if (single_use (@3)
> +      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
> +      && ibit == ibit2
> +      && ibit >= 0))))
> +
> +(match (nop_atomic_bit_test_and_p @0 @1)
> + (bit_and:c
> +  (nop_convert?@4
> +   (ATOMIC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@5 @6)) @3))
> +  @1)
> + (if (single_use (@4)
> +     && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)
> +     && operand_equal_p (@0, @1))))

usually for the equality you'd write

    (ATOMIC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@5 @6)) @3))
     @0)

thus use @0 in both @0 and @1 places.  Does that not work here?  (the
nop_atomic_bit_test_and_p
arguments then would be @0 @0).  Likewise below.

> +
> +(match (nop_atomic_bit_test_and_p @0 @1)
> + (bit_and:c
> +  (nop_convert?@4
> +   (SYNC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@3 @5))))
> +  @1)
> + (if (single_use (@4)
> +     && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)
> +     && operand_equal_p (@0, @1))))
> +
> +(match (nop_atomic_bit_test_and_p @0 @1)
> + (bit_and:c@4 (nop_convert?@3 (ATOMIC_FETCH_AND_N @2 INTEGER_CST@0 @5))
> +             INTEGER_CST@1)
> + (with {
> +        tree mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (@0), @0);
> +        mask = fold_convert (TREE_TYPE (@4), mask);

it's prefered to use wide_int for this, so

             int ibit = wi::exact_log2 (wi::bit_not (wi::to_wide (@0)));

likewise below.

> +        int ibit = tree_log2 (mask);
> +        int ibit2 = tree_log2 (@1);
> +       }
> +  (if (single_use (@3)
> +      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
> +      && ibit == ibit2
> +      && ibit >= 0))))
> +
> +(match (nop_atomic_bit_test_and_p @0 @1)
> + (bit_and:c@4
> +  (nop_convert?@3 (SYNC_FETCH_AND_AND_N @2 INTEGER_CST@0))
> +  INTEGER_CST@1)
> + (with {
> +        tree mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (@0), @0);
> +        mask = fold_convert (TREE_TYPE (@4), mask);
> +        int ibit = tree_log2 (mask);
> +        int ibit2 = tree_log2 (@1);
> +       }
> +  (if (single_use (@3)
> +      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
> +      && ibit == ibit2
> +      && ibit >= 0))))
> +
> +(match (nop_atomic_bit_test_and_p @0 @1)
> + (bit_and:c
> +  (nop_convert?@3
> +   (ATOMIC_FETCH_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7))) @5))
> +   @1)
> + (if (single_use (@3)
> +     && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
> +    && operand_equal_p (@0, @1))))
> +
> +(match (nop_atomic_bit_test_and_p @0 @1)
> + (bit_and:c
> +  (nop_convert?@3
> +   (SYNC_FETCH_AND_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7)))))
> +   @1)
> + (if (single_use (@3)
> +     && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
> +     && operand_equal_p (@0, @1))))
> +
> +#endif
> +
>  /* (v ? w : 0) ? a : b is just (v & w) ? a : b
>     Currently disabled after pass lvec because ARM understands
>     VEC_COND_EXPR<v==w,-1,0> but not a plain v==w fed to BIT_IOR_EXPR.  */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-1.C b/gcc/testsuite/g++.target/i386/pr102566-1.C
> new file mode 100644
> index 00000000000..94a66d717cc
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-1.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<int> &i)
> +{
> +#define BIT (1 << 0)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<int> &i)
> +{
> +#define BIT (1 << 30)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<int> &i)
> +{
> +#define BIT (1 << 31)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-2.C b/gcc/testsuite/g++.target/i386/pr102566-2.C
> new file mode 100644
> index 00000000000..4f2aea961c2
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-2.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-3.C b/gcc/testsuite/g++.target/i386/pr102566-3.C
> new file mode 100644
> index 00000000000..e88921dd155
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-3.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-4.C b/gcc/testsuite/g++.target/i386/pr102566-4.C
> new file mode 100644
> index 00000000000..44d1362ac2e
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-4.C
> @@ -0,0 +1,29 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +typedef int __attribute__ ((mode (__word__))) int_type;
> +
> +#define BIT (1 << 0)
> +
> +bool
> +tbit0 (std::atomic<int_type> &i)
> +{
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~1;
> +}
> +
> +bool
> +tbit30 (std::atomic<int_type> &i)
> +{
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~2;
> +}
> +
> +bool
> +tbit31 (std::atomic<int_type> &i)
> +{
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~4;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "bts" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-5a.C b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> new file mode 100644
> index 00000000000..f9595bee2ab
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-5b.C b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> new file mode 100644
> index 00000000000..d917b27a918
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 0)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 30)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 63)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-6a.C b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> new file mode 100644
> index 00000000000..01d495eda23
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-6b.C b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> new file mode 100644
> index 00000000000..adc11fcbf2d
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 0)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 30)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 63)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10a.c b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> new file mode 100644
> index 00000000000..1c1f86a9659
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> +  int mask = 1 << bit;
> +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10b.c b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> new file mode 100644
> index 00000000000..0bf39824ea6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic long long int *v, int bit)
> +{
> +  long long int mask = 1ll << bit;
> +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-11.c b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> new file mode 100644
> index 00000000000..2c8f8c4e59a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +#define MASK 0x1234
> +
> +bool
> +foo1 (_Atomic int *v)
> +{
> +  return atomic_fetch_or_explicit (v, MASK, memory_order_relaxed) & MASK;
> +}
> +
> +bool
> +foo2 (_Atomic unsigned int *v, int mask)
> +{
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +bool
> +foo3 (_Atomic unsigned int *v, int mask)
> +{
> +  return !(atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask);
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "bts" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-12.c b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> new file mode 100644
> index 00000000000..4603a77612c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +#define MASK 0x1234
> +
> +bool
> +foo1 (_Atomic long *v)
> +{
> +  return atomic_fetch_and_explicit (v, ~MASK, memory_order_relaxed) & MASK;
> +}
> +
> +bool
> +foo2 (_Atomic long *v, long mask)
> +{
> +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> +}
> +
> +bool
> +foo3 (_Atomic long *v, long mask)
> +{
> +  return !(atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask);
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "btr" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-13.c b/gcc/testsuite/gcc.target/i386/pr102566-13.c
> new file mode 100644
> index 00000000000..2657a2f62ae
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-13.c
> @@ -0,0 +1,66 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +#define FOO(TYPE,MASK)                                                 \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a)                    \
> +  {                                                                    \
> +    TYPE mask = 1 << MASK;                                             \
> +    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;       \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a)                   \
> +  {                                                                    \
> +    TYPE mask = 1 << MASK;                                             \
> +    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;      \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a)                   \
> +  {                                                                    \
> +    TYPE mask = 1 << MASK;                                             \
> +    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;      \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a)                   \
> +  {                                                                    \
> +    TYPE mask = 1 << MASK;                                             \
> +    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;     \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a)                  \
> +  {                                                                    \
> +    TYPE mask = 1 << MASK;                                             \
> +    return __sync_fetch_and_or (a, mask) & mask;                       \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a)                 \
> +  {                                                                    \
> +    TYPE mask = 1 << MASK;                                             \
> +    return __sync_fetch_and_xor (a, mask) & mask;                      \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a)                 \
> +  {                                                                    \
> +    TYPE mask = 1 << MASK;                                             \
> +    return __sync_xor_and_fetch (a, mask) & mask;                      \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a)                 \
> +  {                                                                    \
> +    TYPE mask = 1 << MASK;                                             \
> +    return __sync_fetch_and_and (a, ~mask) & mask;                     \
> +  }                                                                    \
> +
> +FOO(short, 0);
> +FOO(short, 7);
> +FOO(short, 15);
> +FOO(int, 0);
> +FOO(int, 15);
> +FOO(int, 31);
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 12 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 24 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 12 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-14.c b/gcc/testsuite/gcc.target/i386/pr102566-14.c
> new file mode 100644
> index 00000000000..24681c1da18
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-14.c
> @@ -0,0 +1,65 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +typedef long long int64;
> +
> +#define FOO(TYPE,MASK)                                                 \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a)                    \
> +  {                                                                    \
> +    TYPE mask = 1ll << MASK;                                           \
> +    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;       \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a)                   \
> +  {                                                                    \
> +    TYPE mask = 1ll << MASK;                                           \
> +    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;      \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a)                   \
> +  {                                                                    \
> +    TYPE mask = 1ll << MASK;                                           \
> +    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;      \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a)                   \
> +  {                                                                    \
> +    TYPE mask = 1ll << MASK;                                           \
> +    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;     \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a)                  \
> +  {                                                                    \
> +    TYPE mask = 1ll << MASK;                                           \
> +    return __sync_fetch_and_or (a, mask) & mask;                       \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a)                 \
> +  {                                                                    \
> +    TYPE mask = 1ll << MASK;                                           \
> +    return __sync_fetch_and_xor (a, mask) & mask;                      \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a)                 \
> +  {                                                                    \
> +    TYPE mask = 1ll << MASK;                                           \
> +    return __sync_xor_and_fetch (a, mask) & mask;                      \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a)                 \
> +  {                                                                    \
> +    TYPE mask = 1ll << MASK;                                           \
> +    return __sync_fetch_and_and (a, ~mask) & mask;                     \
> +  }                                                                    \
> +
> +
> +FOO(int64, 0);
> +FOO(int64, 32);
> +FOO(int64, 63);
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 6 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 12 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1a.c b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> new file mode 100644
> index 00000000000..a915de354e5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> @@ -0,0 +1,188 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +void bar (void);
> +
> +__attribute__((noinline, noclone)) int
> +f1 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f2 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
> +  int t2 = t1 & mask;
> +  return t2 != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f3 (long int *a, int bit)
> +{
> +  long int mask = 1l << bit;
> +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f4 (int *a)
> +{
> +  int mask = 1 << 7;
> +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f5 (int *a)
> +{
> +  int mask = 1 << 13;
> +  return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f6 (int *a)
> +{
> +  int mask = 1 << 0;
> +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f7 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
> +    bar ();
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f8 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
> +    bar ();
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f9 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f10 (int *a)
> +{
> +  int mask = 1 << 7;
> +  return (__sync_fetch_and_xor (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f11 (int *a)
> +{
> +  int mask = 1 << 13;
> +  return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f12 (int *a)
> +{
> +  int mask = 1 << 0;
> +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f13 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f14 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f15 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f16 (int *a)
> +{
> +  int mask = 1 << 7;
> +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f17 (int *a)
> +{
> +  int mask = 1 << 13;
> +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f18 (int *a)
> +{
> +  int mask = 1 << 0;
> +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f19 (long int *a, int bit)
> +{
> +  long int mask = 1l << bit;
> +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f20 (long int *a)
> +{
> +  long int mask = 1l << 7;
> +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f21 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__sync_fetch_and_or (a, mask) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f22 (long int *a)
> +{
> +  long int mask = 1l << 7;
> +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f23 (long int *a)
> +{
> +  long int mask = 1l << 7;
> +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) short int
> +f24 (short int *a)
> +{
> +  short int mask = 1 << 7;
> +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) short int
> +f25 (short int *a)
> +{
> +  short int mask = 1 << 7;
> +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1b.c b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> new file mode 100644
> index 00000000000..c4dab8135c7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> @@ -0,0 +1,107 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -g" } */
> +
> +int cnt;
> +
> +__attribute__((noinline, noclone)) void
> +bar (void)
> +{
> +  cnt++;
> +}
> +
> +#include "pr102566-1a.c"
> +
> +int a;
> +long int b;
> +unsigned long int c;
> +unsigned short int d;
> +
> +int
> +main ()
> +{
> +  __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
> +  if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
> +      || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
> +    __builtin_abort ();
> +  if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
> +      || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
> +    __builtin_abort ();
> +  __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
> +  if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
> +      || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
> +    __builtin_abort ();
> +  __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
> +  if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
> +      || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
> +    __builtin_abort ();
> +  if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> +      || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
> +    __builtin_abort ();
> +  if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
> +      || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (cnt != 0
> +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> +      || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> +      || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> +      || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> +    __builtin_abort ();
> +  if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> +      || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> +    __builtin_abort ();
> +  if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> +      || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> +    __builtin_abort ();
> +  __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
> +  if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> +    __builtin_abort ();
> +  if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> +      || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> +    __builtin_abort ();
> +  if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> +      || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> +    __builtin_abort ();
> +  if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> +      || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> +    __builtin_abort ();
> +  if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> +      || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> +    __builtin_abort ();
> +  __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
> +  if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
> +      || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
> +    __builtin_abort ();
> +  __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
> +  if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> +      || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> +    __builtin_abort ();
> +  if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> +      || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> +    __builtin_abort ();
> +  if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
> +      || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
> +    __builtin_abort ();
> +  __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
> +  if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> +      || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> +      || cnt != 2)
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-2.c b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> new file mode 100644
> index 00000000000..00a7c349f2a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3a.c b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> new file mode 100644
> index 00000000000..8bf1cd6e1bd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> +  int mask = 1 << bit;
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3b.c b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> new file mode 100644
> index 00000000000..d155ed367a1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic long long int *v, int bit)
> +{
> +  long long int mask = 1ll << bit;
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsq" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-4.c b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> new file mode 100644
> index 00000000000..2668ccf827c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> +  unsigned int mask = 1 << bit;
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-5.c b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> new file mode 100644
> index 00000000000..8bf1cd6e1bd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> +  int mask = 1 << bit;
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-6.c b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> new file mode 100644
> index 00000000000..3dfe55ac683
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-7.c b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> new file mode 100644
> index 00000000000..6bc0ae0f320
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +typedef int __attribute__ ((mode (__word__))) int_type;
> +
> +#define BIT (1 << 0)
> +
> +bool
> +foo0 (_Atomic int_type *v)
> +{
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~1;
> +}
> +
> +bool
> +foo1 (_Atomic int_type *v)
> +{
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~2;
> +}
> +
> +bool
> +foo2 (_Atomic int_type *v)
> +{
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~3;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "bts" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8a.c b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> new file mode 100644
> index 00000000000..168e3db78c9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8b.c b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> new file mode 100644
> index 00000000000..392da3098e0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 0)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 62)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 63)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9a.c b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> new file mode 100644
> index 00000000000..3fa2a3ef043
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9b.c b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> new file mode 100644
> index 00000000000..38ddbdc630f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 0)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 62)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 63)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
> index 70ce6a4d5b8..d14774549b8 100644
> --- a/gcc/tree-ssa-ccp.c
> +++ b/gcc/tree-ssa-ccp.c
> @@ -3243,6 +3243,90 @@ optimize_unreachable (gimple_stmt_iterator i)
>    return ret;
>  }
>
> +/* Convert
> +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> +   _7 = ~_1;
> +   _5 = (_Bool) _7;
> +   to
> +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> +   _8 = _1 & 1;
> +   _5 = _8 == 0;
> +   and convert
> +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> +   _7 = ~_1;
> +   _4 = (_Bool) _7;
> +   to
> +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> +   _8 = _1 & 1;
> +   _4 = (_Bool) _8;
> +
> +   USE_STMT is the gimplt statement which uses the return value of
> +   __atomic_fetch_or_*.  LHS is the return value of __atomic_fetch_or_*.
> +   MASK is the mask passed to __atomic_fetch_or_*.
> + */
> +
> +static gimple *
> +convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
> +                       tree lhs, tree mask)
> +{
> +  tree and_mask;
> +  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +    {
> +      /* MASK must be ~1.  */
> +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> +                                          ~HOST_WIDE_INT_1), mask, 0))
> +       return nullptr;
> +      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> +    }
> +  else
> +    {
> +      /* MASK must be 1.  */
> +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs), 1), mask, 0))
> +       return nullptr;
> +      and_mask = mask;
> +    }
> +
> +  tree use_lhs = gimple_assign_lhs (use_stmt);
> +
> +  use_operand_p use_p;
> +  gimple *use_not_stmt;
> +
> +  if (!single_imm_use (use_lhs, &use_p, &use_not_stmt)
> +      || !is_gimple_assign (use_not_stmt))
> +    return nullptr;
> +
> +  if (gimple_assign_rhs_code (use_not_stmt) != NOP_EXPR)

  CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_not_stmt))

Otherwise looks OK.

Thanks,
Richard.

> +    return nullptr;
> +
> +  tree use_not_lhs = gimple_assign_lhs (use_not_stmt);
> +  if (TREE_CODE (TREE_TYPE (use_not_lhs)) != BOOLEAN_TYPE)
> +    return nullptr;
> +
> +  gimple_stmt_iterator gsi;
> +  gsi = gsi_for_stmt (use_stmt);
> +  gsi_remove (&gsi, true);
> +  tree var = make_ssa_name (TREE_TYPE (lhs));
> +  use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
> +  gsi = gsi_for_stmt (use_not_stmt);
> +  gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
> +  lhs = gimple_assign_lhs (use_not_stmt);
> +  gimple *g = gimple_build_assign (lhs, EQ_EXPR, var,
> +                                  build_zero_cst (TREE_TYPE (mask)));
> +  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +  gsi = gsi_for_stmt (use_not_stmt);
> +  gsi_remove (&gsi, true);
> +  return use_stmt;
> +}
> +
> +/* match.pd function to match atomic_bit_test_and pattern which
> +   has nop_convert:
> +     _1 = __atomic_fetch_or_4 (&v, 1, 0);
> +     _2 = (int) _1;
> +     _5 = _2 & 1;
> + */
> +extern bool gimple_nop_atomic_bit_test_and_p (tree, tree *,
> +                                             tree (*) (tree));
> +
>  /* Optimize
>       mask_2 = 1 << cnt_1;
>       _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> @@ -3269,7 +3353,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
>    tree lhs = gimple_call_lhs (call);
>    use_operand_p use_p;
>    gimple *use_stmt;
> -  tree mask, bit;
> +  tree mask;
>    optab optab;
>
>    if (!flag_inline_atomics
> @@ -3279,10 +3363,267 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
>        || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
>        || !single_imm_use (lhs, &use_p, &use_stmt)
>        || !is_gimple_assign (use_stmt)
> -      || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
>        || !gimple_vdef (call))
>      return;
>
> +  tree bit = nullptr;
> +
> +  mask = gimple_call_arg (call, 1);
> +  tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
> +  if (rhs_code != BIT_AND_EXPR)
> +    {
> +      if (rhs_code != NOP_EXPR && rhs_code != BIT_NOT_EXPR)
> +       return;
> +
> +      tree use_lhs = gimple_assign_lhs (use_stmt);
> +      if (TREE_CODE (use_lhs) == SSA_NAME
> +         && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
> +       return;
> +
> +      tree use_rhs = gimple_assign_rhs1 (use_stmt);
> +      if (lhs != use_rhs)
> +       return;
> +
> +      gimple *g;
> +      gimple_stmt_iterator gsi;
> +      tree var;
> +      int ibit = -1;
> +
> +      if (rhs_code == BIT_NOT_EXPR)
> +       {
> +         g = convert_atomic_bit_not (fn, use_stmt, lhs, mask);
> +         if (!g)
> +           return;
> +         use_stmt = g;
> +         ibit = 0;
> +       }
> +      else if (TREE_CODE (TREE_TYPE (use_lhs)) == BOOLEAN_TYPE)
> +       {
> +         tree and_mask;
> +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +           {
> +             /* MASK must be ~1.  */
> +             if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> +                                                  ~HOST_WIDE_INT_1),
> +                                   mask, 0))
> +               return;
> +
> +             /* Convert
> +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> +                _4 = (_Bool) _1;
> +                to
> +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> +                _5 = _1 & 1;
> +                _4 = (_Bool) _5;
> +              */
> +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> +           }
> +         else
> +           {
> +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> +             if (!operand_equal_p (and_mask, mask, 0))
> +               return;
> +
> +             /* Convert
> +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> +                _4 = (_Bool) _1;
> +                to
> +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> +                _5 = _1 & 1;
> +                _4 = (_Bool) _5;
> +              */
> +           }
> +         var = make_ssa_name (TREE_TYPE (use_rhs));
> +         replace_uses_by (use_rhs, var);
> +         g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> +                                  and_mask);
> +         gsi = gsi_for_stmt (use_stmt);
> +         gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> +         use_stmt = g;
> +         ibit = 0;
> +       }
> +      else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
> +              == TYPE_PRECISION (TREE_TYPE (use_rhs)))
> +       {
> +         gimple *use_nop_stmt;
> +         if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
> +             || !is_gimple_assign (use_nop_stmt))
> +           return;
> +         rhs_code = gimple_assign_rhs_code (use_nop_stmt);
> +         if (rhs_code != BIT_AND_EXPR)
> +           {
> +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> +             if (TREE_CODE (use_nop_lhs) == SSA_NAME
> +                 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
> +               return;
> +             if (rhs_code == BIT_NOT_EXPR)
> +               {
> +                 g = convert_atomic_bit_not (fn, use_nop_stmt, lhs,
> +                                             mask);
> +                 if (!g)
> +                   return;
> +                 /* Convert
> +                    _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> +                    _2 = (int) _1;
> +                    _7 = ~_2;
> +                    _5 = (_Bool) _7;
> +                    to
> +                    _1 = __atomic_fetch_or_4 (ptr_6, ~1, _3);
> +                    _8 = _1 & 1;
> +                    _5 = _8 == 0;
> +                    and convert
> +                    _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> +                    _2 = (int) _1;
> +                    _7 = ~_2;
> +                    _5 = (_Bool) _7;
> +                    to
> +                    _1 = __atomic_fetch_and_4 (ptr_6, 1, _3);
> +                    _8 = _1 & 1;
> +                    _5 = _8 == 0;
> +                  */
> +                 gsi = gsi_for_stmt (use_stmt);
> +                 gsi_remove (&gsi, true);
> +                 use_stmt = g;
> +                 ibit = 0;
> +               }
> +             else
> +               {
> +                 if (TREE_CODE (TREE_TYPE (use_nop_lhs)) != BOOLEAN_TYPE)
> +                   return;
> +                 if (rhs_code != GE_EXPR && rhs_code != LT_EXPR)
> +                   return;
> +                 tree cmp_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
> +                 if (use_lhs != cmp_rhs1)
> +                   return;
> +                 tree cmp_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
> +                 if (!integer_zerop (cmp_rhs2))
> +                   return;
> +
> +                 tree and_mask;
> +
> +                 unsigned HOST_WIDE_INT bytes
> +                   = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (use_rhs)));
> +                 ibit = bytes * BITS_PER_UNIT - 1;
> +                 unsigned HOST_WIDE_INT highest
> +                   = HOST_WIDE_INT_1U << ibit;
> +
> +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +                   {
> +                     /* Get the signed maximum of the USE_RHS type.  */
> +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> +                                               highest - 1);
> +                     if (!operand_equal_p (and_mask, mask, 0))
> +                       return;
> +
> +                     /* Convert
> +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> +                        _5 = (signed int) _1;
> +                        _4 = _5 < 0 or _5 >= 0;
> +                        to
> +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> +                        _6 = _1 & 0x80000000;
> +                        _4 = _6 != 0 or _6 == 0;
> +                      */
> +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> +                                               highest);
> +                   }
> +                 else
> +                   {
> +                     /* Get the signed minimum of the USE_RHS type.  */
> +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> +                                               highest);
> +                     if (!operand_equal_p (and_mask, mask, 0))
> +                       return;
> +
> +                     /* Convert
> +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> +                        _5 = (signed int) _1;
> +                        _4 = _5 < 0 or _5 >= 0;
> +                        to
> +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> +                        _6 = _1 & 0x80000000;
> +                        _4 = _6 != 0 or _6 == 0;
> +                      */
> +                   }
> +                 var = make_ssa_name (TREE_TYPE (use_rhs));
> +                 gsi = gsi_for_stmt (use_stmt);
> +                 gsi_remove (&gsi, true);
> +                 g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> +                                          and_mask);
> +                 gsi = gsi_for_stmt (use_nop_stmt);
> +                 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> +                 use_stmt = g;
> +                 g = gimple_build_assign (use_nop_lhs,
> +                                          (rhs_code == GE_EXPR
> +                                           ? EQ_EXPR : NE_EXPR),
> +                                          var,
> +                                          build_zero_cst (TREE_TYPE (use_rhs)));
> +                 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +                 gsi = gsi_for_stmt (use_nop_stmt);
> +                 gsi_remove (&gsi, true);
> +               }
> +           }
> +         else
> +           {
> +             tree and_expr = gimple_assign_lhs (use_nop_stmt);
> +             tree res_mask[2];
> +             if (!gimple_nop_atomic_bit_test_and_p (and_expr,
> +                                                    &res_mask[0], NULL))
> +               return;
> +             mask = res_mask[1];
> +             if (TREE_CODE (mask) == INTEGER_CST)
> +               {
> +                 ibit = tree_log2 (mask);
> +                 gcc_assert (ibit >= 0);
> +               }
> +             else
> +               {
> +                 g = SSA_NAME_DEF_STMT (mask);
> +                 gcc_assert (is_gimple_assign (g));
> +                 bit = gimple_assign_rhs2 (g);
> +               }
> +             /* Convert
> +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> +                _2 = (int) _1;
> +                _5 = _2 & mask;
> +                to
> +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> +                _6 = _1 & mask;
> +                _5 = (int) _6;
> +                and convert
> +                _1 = ~mask_7;
> +                _2 = (unsigned int) _1;
> +                _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
> +                _4 = (int) _3;
> +                _5 = _4 & mask_7;
> +                to
> +                _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
> +                _12 = _3 & mask_7;
> +                _5 = (int) _12;
> +              */
> +             replace_uses_by (use_lhs, lhs);
> +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> +             var = make_ssa_name (TREE_TYPE (use_nop_lhs));
> +             gimple_assign_set_lhs (use_nop_stmt, var);
> +             gsi = gsi_for_stmt (use_stmt);
> +             gsi_remove (&gsi, true);
> +             release_defs (use_stmt);
> +             gsi_remove (gsip, true);
> +             g = gimple_build_assign (use_nop_lhs, NOP_EXPR, var);
> +             gsi = gsi_for_stmt (use_nop_stmt);
> +             gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +             use_stmt = use_nop_stmt;
> +           }
> +       }
> +
> +      if (!bit)
> +       {
> +         if (ibit < 0)
> +           gcc_unreachable ();
> +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> +       }
> +    }
> +
>    switch (fn)
>      {
>      case IFN_ATOMIC_BIT_TEST_AND_SET:
> @@ -3301,51 +3642,76 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
>    if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
>      return;
>
> -  mask = gimple_call_arg (call, 1);
>    tree use_lhs = gimple_assign_lhs (use_stmt);
>    if (!use_lhs)
>      return;
>
> -  if (TREE_CODE (mask) == INTEGER_CST)
> -    {
> -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> -       mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> -      mask = fold_convert (TREE_TYPE (lhs), mask);
> -      int ibit = tree_log2 (mask);
> -      if (ibit < 0)
> -       return;
> -      bit = build_int_cst (TREE_TYPE (lhs), ibit);
> -    }
> -  else if (TREE_CODE (mask) == SSA_NAME)
> +  if (!bit)
>      {
> -      gimple *g = SSA_NAME_DEF_STMT (mask);
> -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +      if (TREE_CODE (mask) == INTEGER_CST)
>         {
> -         if (!is_gimple_assign (g)
> -             || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +           mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> +         mask = fold_convert (TREE_TYPE (lhs), mask);
> +         int ibit = tree_log2 (mask);
> +         if (ibit < 0)
>             return;
> -         mask = gimple_assign_rhs1 (g);
> -         if (TREE_CODE (mask) != SSA_NAME)
> +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> +       }
> +      else if (TREE_CODE (mask) == SSA_NAME)
> +       {
> +         gimple *g = SSA_NAME_DEF_STMT (mask);
> +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +           {
> +             if (!is_gimple_assign (g)
> +                 || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> +               return;
> +             mask = gimple_assign_rhs1 (g);
> +             if (TREE_CODE (mask) != SSA_NAME)
> +               return;
> +             g = SSA_NAME_DEF_STMT (mask);
> +           }
> +         if (!is_gimple_assign (g))
>             return;
> -         g = SSA_NAME_DEF_STMT (mask);
> +         rhs_code = gimple_assign_rhs_code (g);
> +         if (rhs_code != LSHIFT_EXPR)
> +           {
> +             if (rhs_code != NOP_EXPR)
> +               return;
> +
> +             /* Handle
> +                _1 = 1 << bit_4(D);
> +                mask_5 = (unsigned int) _1;
> +                _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
> +                _3 = _2 & mask_5;
> +                */
> +             tree nop_lhs = gimple_assign_lhs (g);
> +             tree nop_rhs = gimple_assign_rhs1 (g);
> +             if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
> +                 != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
> +               return;
> +             g = SSA_NAME_DEF_STMT (nop_rhs);
> +             if (!is_gimple_assign (g)
> +                 || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
> +               return;
> +           }
> +         if (!integer_onep (gimple_assign_rhs1 (g)))
> +           return;
> +         bit = gimple_assign_rhs2 (g);
>         }
> -      if (!is_gimple_assign (g)
> -         || gimple_assign_rhs_code (g) != LSHIFT_EXPR
> -         || !integer_onep (gimple_assign_rhs1 (g)))
> +      else
>         return;
> -      bit = gimple_assign_rhs2 (g);
> -    }
> -  else
> -    return;
>
> -  if (gimple_assign_rhs1 (use_stmt) == lhs)
> -    {
> -      if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> +      if (gimple_assign_rhs1 (use_stmt) == lhs)
> +       {
> +         if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> +           return;
> +       }
> +      else if (gimple_assign_rhs2 (use_stmt) != lhs
> +              || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
> +                                   mask, 0))
>         return;
>      }
> -  else if (gimple_assign_rhs2 (use_stmt) != lhs
> -          || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
> -    return;
>
>    bool use_bool = true;
>    bool has_debug_uses = false;
> @@ -3434,18 +3800,20 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
>          of the specified bit after the atomic operation (makes only sense
>          for xor, otherwise the bit content is compile time known),
>          we need to invert the bit.  */
> -      g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
> -                              BIT_XOR_EXPR, new_lhs,
> -                              use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
> -                                       : mask);
> -      new_lhs = gimple_assign_lhs (g);
> +      tree mask_convert = mask;
> +      gimple_seq stmts = NULL;
> +      if (!use_bool)
> +       mask_convert = gimple_convert (&stmts, TREE_TYPE (lhs), mask);
> +      new_lhs = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (lhs), new_lhs,
> +                             use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
> +                                      : mask_convert);
>        if (throws)
>         {
> -         gsi_insert_on_edge_immediate (e, g);
> -         gsi = gsi_for_stmt (g);
> +         gsi_insert_seq_on_edge_immediate (e, stmts);
> +         gsi = gsi_for_stmt (gimple_seq_last (stmts));
>         }
>        else
> -       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +       gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
>      }
>    if (use_bool && has_debug_uses)
>      {
> --
> 2.18.1
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] Improve integer bit test on __atomic_fetch_[or|and]_* returns
  2021-11-09 12:48         ` Richard Biener
@ 2021-11-10  5:20           ` liuhongt
  2021-11-10  8:28             ` Richard Biener
  0 siblings, 1 reply; 8+ messages in thread
From: liuhongt @ 2021-11-10  5:20 UTC (permalink / raw)
  To: gcc-patches

> >
> > +#if GIMPLE
> > +(match (nop_atomic_bit_test_and_p @0 @1)
> > + (bit_and:c (nop_convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3))
> > +           INTEGER_CST@1)
>
> no need for the :c on the bit_and when the 2nd operand is an

Changed.

> INTEGER_CST (likewise below)
>
> > + (with {
> > +        int ibit = tree_log2 (@0);
> > +        int ibit2 = tree_log2 (@1);
> > +       }
> > +  (if (single_use (@4)
> > +      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)
>
> I wonder whether we should handle both of these in the caller to make
> this a pure IL structure
> match?  At your preference.
>

Changed.
Add a new parameter to nop_atomic_bit_test_and_p for @4 and test @4 in the caller.

> > +      && ibit == ibit2
> > +      && ibit >= 0))))
> > +
> > +(match (nop_atomic_bit_test_and_p @0 @1)
> > + (bit_and:c (nop_convert?@3 (SYNC_FETCH_OR_XOR_N @2 INTEGER_CST@0))
> > +           INTEGER_CST@1)
> > + (with {
> > +        int ibit = tree_log2 (@0);
> > +        int ibit2 = tree_log2 (@1);
> > +       }
> > +  (if (single_use (@3)
> > +      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
> > +      && ibit == ibit2
> > +      && ibit >= 0))))
> > +
> > +(match (nop_atomic_bit_test_and_p @0 @1)
> > + (bit_and:c
> > +  (nop_convert?@4
> > +   (ATOMIC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@5 @6)) @3))
> > +  @1)
> > + (if (single_use (@4)
> > +     && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)
> > +     && operand_equal_p (@0, @1))))
>
> usually for the equality you'd write
>
>     (ATOMIC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@5 @6)) @3))
>      @0)
>
> thus use @0 in both @0 and @1 places.  Does that not work here?  (the
> nop_atomic_bit_test_and_p
> arguments then would be @0 @0).  Likewise below.
>

It works, changed.

> > +
> > +(match (nop_atomic_bit_test_and_p @0 @1)
> > + (bit_and:c
> > +  (nop_convert?@4
> > +   (SYNC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@3 @5))))
> > +  @1)
> > + (if (single_use (@4)
> > +     && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)
> > +     && operand_equal_p (@0, @1))))
> > +
> > +(match (nop_atomic_bit_test_and_p @0 @1)
> > + (bit_and:c@4 (nop_convert?@3 (ATOMIC_FETCH_AND_N @2 INTEGER_CST@0 @5))
> > +             INTEGER_CST@1)
> > + (with {
> > +        tree mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (@0), @0);
> > +        mask = fold_convert (TREE_TYPE (@4), mask);
>
> it's prefered to use wide_int for this, so
>
>              int ibit = wi::exact_log2 (wi::bit_not (wi::to_wide (@0)));
>
> likewise below.

Changed, with a bit adjustment
int ibit = wi::exact_log2 (wi::zext (wi::bit_not (wi::to_wide (@0)),
    	   		  	    TYPE_PRECISION(type)));

wi::zext is needed when upper bits are all ones after bit_not operation.
> > +  if (!single_imm_use (use_lhs, &use_p, &use_not_stmt)
> > +      || !is_gimple_assign (use_not_stmt))
> > +    return nullptr;
> > +
> > +  if (gimple_assign_rhs_code (use_not_stmt) != NOP_EXPR)
>
>   CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_not_stmt))

Changed.

Update patch:


2021-11-04  H.J. Lu  <hongjiu.lu@intel.com>
            Hongtao Liu  <hongtao.liu@intel.com>
gcc/

	PR middle-end/102566
	* match.pd (nop_atomic_bit_test_and_p): New match.
	* tree-ssa-ccp.c (convert_atomic_bit_not): New function.
	(gimple_nop_atomic_bit_test_and_p): New prototype.
	(optimize_atomic_bit_test_and): Transform equivalent, but slighly
	different cases to their canonical forms.

gcc/testsuite/

	PR middle-end/102566
	* g++.target/i386/pr102566-1.C: New test.
	* g++.target/i386/pr102566-2.C: Likewise.
	* g++.target/i386/pr102566-3.C: Likewise.
	* g++.target/i386/pr102566-4.C: Likewise.
	* g++.target/i386/pr102566-5a.C: Likewise.
	* g++.target/i386/pr102566-5b.C: Likewise.
	* g++.target/i386/pr102566-6a.C: Likewise.
	* g++.target/i386/pr102566-6b.C: Likewise.
	* gcc.target/i386/pr102566-1a.c: Likewise.
	* gcc.target/i386/pr102566-1b.c: Likewise.
	* gcc.target/i386/pr102566-2.c: Likewise.
	* gcc.target/i386/pr102566-3a.c: Likewise.
	* gcc.target/i386/pr102566-3b.c: Likewise.
	* gcc.target/i386/pr102566-4.c: Likewise.
	* gcc.target/i386/pr102566-5.c: Likewise.
	* gcc.target/i386/pr102566-6.c: Likewise.
	* gcc.target/i386/pr102566-7.c: Likewise.
	* gcc.target/i386/pr102566-8a.c: Likewise.
	* gcc.target/i386/pr102566-8b.c: Likewise.
	* gcc.target/i386/pr102566-9a.c: Likewise.
	* gcc.target/i386/pr102566-9b.c: Likewise.
	* gcc.target/i386/pr102566-10a.c: Likewise.
	* gcc.target/i386/pr102566-10b.c: Likewise.
	* gcc.target/i386/pr102566-11.c: Likewise.
	* gcc.target/i386/pr102566-12.c: Likewise.
	* gcc.target/i386/pr102566-13.c: New test.
	* gcc.target/i386/pr102566-14.c: New test.
---
 gcc/match.pd                                 | 103 +++++
 gcc/testsuite/g++.target/i386/pr102566-1.C   |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-2.C   |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-3.C   |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-4.C   |  29 ++
 gcc/testsuite/g++.target/i386/pr102566-5a.C  |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-5b.C  |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-6a.C  |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-6b.C  |  31 ++
 gcc/testsuite/gcc.target/i386/pr102566-10a.c |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-10b.c |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-11.c  |  28 ++
 gcc/testsuite/gcc.target/i386/pr102566-12.c  |  28 ++
 gcc/testsuite/gcc.target/i386/pr102566-13.c  |  66 +++
 gcc/testsuite/gcc.target/i386/pr102566-14.c  |  65 +++
 gcc/testsuite/gcc.target/i386/pr102566-1a.c  | 188 ++++++++
 gcc/testsuite/gcc.target/i386/pr102566-1b.c  | 107 +++++
 gcc/testsuite/gcc.target/i386/pr102566-2.c   |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-3a.c  |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-3b.c  |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-4.c   |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-5.c   |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-6.c   |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-7.c   |  30 ++
 gcc/testsuite/gcc.target/i386/pr102566-8a.c  |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-8b.c  |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-9a.c  |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-9b.c  |  32 ++
 gcc/tree-ssa-ccp.c                           | 456 +++++++++++++++++--
 29 files changed, 1557 insertions(+), 42 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-2.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-3.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-4.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5a.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5b.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6a.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6b.C
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-13.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-14.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9b.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 986b052bc93..77ea2780c95 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -105,6 +105,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (define_operator_list COND_TERNARY
   IFN_COND_FMA IFN_COND_FMS IFN_COND_FNMA IFN_COND_FNMS)
 
+/* __atomic_fetch_or_*, __atomic_fetch_xor_*, __atomic_xor_fetch_*  */
+(define_operator_list ATOMIC_FETCH_OR_XOR_N
+  BUILT_IN_ATOMIC_FETCH_OR_1 BUILT_IN_ATOMIC_FETCH_OR_2
+  BUILT_IN_ATOMIC_FETCH_OR_4 BUILT_IN_ATOMIC_FETCH_OR_8
+  BUILT_IN_ATOMIC_FETCH_OR_16
+  BUILT_IN_ATOMIC_FETCH_XOR_1 BUILT_IN_ATOMIC_FETCH_XOR_2
+  BUILT_IN_ATOMIC_FETCH_XOR_4 BUILT_IN_ATOMIC_FETCH_XOR_8
+  BUILT_IN_ATOMIC_FETCH_XOR_16
+  BUILT_IN_ATOMIC_XOR_FETCH_1 BUILT_IN_ATOMIC_XOR_FETCH_2
+  BUILT_IN_ATOMIC_XOR_FETCH_4 BUILT_IN_ATOMIC_XOR_FETCH_8
+  BUILT_IN_ATOMIC_XOR_FETCH_16)
+/* __sync_fetch_and_or_*, __sync_fetch_and_xor_*, __sync_xor_and_fetch_*  */
+(define_operator_list SYNC_FETCH_OR_XOR_N
+  BUILT_IN_SYNC_FETCH_AND_OR_1 BUILT_IN_SYNC_FETCH_AND_OR_2
+  BUILT_IN_SYNC_FETCH_AND_OR_4 BUILT_IN_SYNC_FETCH_AND_OR_8
+  BUILT_IN_SYNC_FETCH_AND_OR_16
+  BUILT_IN_SYNC_FETCH_AND_XOR_1 BUILT_IN_SYNC_FETCH_AND_XOR_2
+  BUILT_IN_SYNC_FETCH_AND_XOR_4 BUILT_IN_SYNC_FETCH_AND_XOR_8
+  BUILT_IN_SYNC_FETCH_AND_XOR_16
+  BUILT_IN_SYNC_XOR_AND_FETCH_1 BUILT_IN_SYNC_XOR_AND_FETCH_2
+  BUILT_IN_SYNC_XOR_AND_FETCH_4 BUILT_IN_SYNC_XOR_AND_FETCH_8
+  BUILT_IN_SYNC_XOR_AND_FETCH_16)
+/* __atomic_fetch_and_*.  */
+(define_operator_list ATOMIC_FETCH_AND_N
+  BUILT_IN_ATOMIC_FETCH_AND_1 BUILT_IN_ATOMIC_FETCH_AND_2
+  BUILT_IN_ATOMIC_FETCH_AND_4 BUILT_IN_ATOMIC_FETCH_AND_8
+  BUILT_IN_ATOMIC_FETCH_AND_16)
+/* __sync_fetch_and_and_*.  */
+(define_operator_list SYNC_FETCH_AND_AND_N
+  BUILT_IN_SYNC_FETCH_AND_AND_1 BUILT_IN_SYNC_FETCH_AND_AND_2
+  BUILT_IN_SYNC_FETCH_AND_AND_4 BUILT_IN_SYNC_FETCH_AND_AND_8
+  BUILT_IN_SYNC_FETCH_AND_AND_16)
+
 /* With nop_convert? combine convert? and view_convert? in one pattern
    plus conditionalize on tree_nop_conversion_p conversions.  */
 (match (nop_convert @0)
@@ -3976,6 +4009,76 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (vec_cond @0 (op! @3 @1) (op! @3 @2))))
 #endif
 
+#if GIMPLE
+(match (nop_atomic_bit_test_and_p @0 @1 @4)
+ (bit_and (nop_convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3))
+	   INTEGER_CST@1)
+ (with {
+	 int ibit = tree_log2 (@0);
+	 int ibit2 = tree_log2 (@1);
+       }
+  (if (ibit == ibit2
+      && ibit >= 0))))
+
+(match (nop_atomic_bit_test_and_p @0 @1 @3)
+ (bit_and (nop_convert?@3 (SYNC_FETCH_OR_XOR_N @2 INTEGER_CST@0))
+	  INTEGER_CST@1)
+ (with {
+	 int ibit = tree_log2 (@0);
+	 int ibit2 = tree_log2 (@1);
+       }
+  (if (ibit == ibit2
+      && ibit >= 0))))
+
+(match (nop_atomic_bit_test_and_p @0 @0 @4)
+ (bit_and:c
+  (nop_convert?@4
+   (ATOMIC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@5 @6)) @3))
+  @0))
+
+(match (nop_atomic_bit_test_and_p @0 @0 @4)
+ (bit_and:c
+  (nop_convert?@4
+   (SYNC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@3 @5))))
+  @0))
+
+(match (nop_atomic_bit_test_and_p @0 @1 @3)
+ (bit_and@4 (nop_convert?@3 (ATOMIC_FETCH_AND_N @2 INTEGER_CST@0 @5))
+	    INTEGER_CST@1)
+ (with {
+	 int ibit = wi::exact_log2 (wi::zext (wi::bit_not (wi::to_wide (@0)),
+					      TYPE_PRECISION(type)));
+	 int ibit2 = tree_log2 (@1);
+       }
+  (if (ibit == ibit2
+      && ibit >= 0))))
+
+(match (nop_atomic_bit_test_and_p @0 @1 @3)
+ (bit_and@4
+  (nop_convert?@3 (SYNC_FETCH_AND_AND_N @2 INTEGER_CST@0))
+  INTEGER_CST@1)
+ (with {
+	 int ibit = wi::exact_log2 (wi::zext (wi::bit_not (wi::to_wide (@0)),
+					      TYPE_PRECISION(type)));
+	 int ibit2 = tree_log2 (@1);
+       }
+  (if (ibit == ibit2
+      && ibit >= 0))))
+
+(match (nop_atomic_bit_test_and_p @0 @0 @3)
+ (bit_and:c
+  (nop_convert?@3
+   (ATOMIC_FETCH_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7))) @5))
+   @0))
+
+(match (nop_atomic_bit_test_and_p @0 @0 @3)
+ (bit_and:c
+  (nop_convert?@3
+   (SYNC_FETCH_AND_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7)))))
+   @0))
+
+#endif
+
 /* (v ? w : 0) ? a : b is just (v & w) ? a : b
    Currently disabled after pass lvec because ARM understands
    VEC_COND_EXPR<v==w,-1,0> but not a plain v==w fed to BIT_IOR_EXPR.  */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-1.C b/gcc/testsuite/g++.target/i386/pr102566-1.C
new file mode 100644
index 00000000000..94a66d717cc
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-1.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<int> &i)
+{
+#define BIT (1 << 0)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<int> &i)
+{
+#define BIT (1 << 30)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<int> &i)
+{
+#define BIT (1 << 31)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-2.C b/gcc/testsuite/g++.target/i386/pr102566-2.C
new file mode 100644
index 00000000000..4f2aea961c2
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-2.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-3.C b/gcc/testsuite/g++.target/i386/pr102566-3.C
new file mode 100644
index 00000000000..e88921dd155
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-3.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-4.C b/gcc/testsuite/g++.target/i386/pr102566-4.C
new file mode 100644
index 00000000000..44d1362ac2e
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-4.C
@@ -0,0 +1,29 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+typedef int __attribute__ ((mode (__word__))) int_type;
+
+#define BIT (1 << 0)
+
+bool
+tbit0 (std::atomic<int_type> &i)
+{
+  return i.fetch_or(BIT, std::memory_order_relaxed) & ~1;
+}
+
+bool
+tbit30 (std::atomic<int_type> &i)
+{
+  return i.fetch_or(BIT, std::memory_order_relaxed) & ~2;
+}
+
+bool
+tbit31 (std::atomic<int_type> &i)
+{
+  return i.fetch_or(BIT, std::memory_order_relaxed) & ~4;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "bts" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-5a.C b/gcc/testsuite/g++.target/i386/pr102566-5a.C
new file mode 100644
index 00000000000..f9595bee2ab
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-5a.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-5b.C b/gcc/testsuite/g++.target/i386/pr102566-5b.C
new file mode 100644
index 00000000000..d917b27a918
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-5b.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 0)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 30)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 63)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-6a.C b/gcc/testsuite/g++.target/i386/pr102566-6a.C
new file mode 100644
index 00000000000..01d495eda23
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-6a.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-6b.C b/gcc/testsuite/g++.target/i386/pr102566-6b.C
new file mode 100644
index 00000000000..adc11fcbf2d
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-6b.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 0)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 30)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 63)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10a.c b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
new file mode 100644
index 00000000000..1c1f86a9659
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+  int mask = 1 << bit;
+  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10b.c b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
new file mode 100644
index 00000000000..0bf39824ea6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic long long int *v, int bit)
+{
+  long long int mask = 1ll << bit;
+  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-11.c b/gcc/testsuite/gcc.target/i386/pr102566-11.c
new file mode 100644
index 00000000000..2c8f8c4e59a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-11.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define MASK 0x1234
+
+bool
+foo1 (_Atomic int *v)
+{
+  return atomic_fetch_or_explicit (v, MASK, memory_order_relaxed) & MASK;
+}
+
+bool
+foo2 (_Atomic unsigned int *v, int mask)
+{
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+bool
+foo3 (_Atomic unsigned int *v, int mask)
+{
+  return !(atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask);
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "bts" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-12.c b/gcc/testsuite/gcc.target/i386/pr102566-12.c
new file mode 100644
index 00000000000..4603a77612c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-12.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define MASK 0x1234
+
+bool
+foo1 (_Atomic long *v)
+{
+  return atomic_fetch_and_explicit (v, ~MASK, memory_order_relaxed) & MASK;
+}
+
+bool
+foo2 (_Atomic long *v, long mask)
+{
+  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
+}
+
+bool
+foo3 (_Atomic long *v, long mask)
+{
+  return !(atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask);
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "btr" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-13.c b/gcc/testsuite/gcc.target/i386/pr102566-13.c
new file mode 100644
index 00000000000..2657a2f62ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-13.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define FOO(TYPE,MASK)							\
+  __attribute__((noinline,noclone)) TYPE				\
+  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_or (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_xor (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_xor_and_fetch (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_and (a, ~mask) & mask;			\
+  }									\
+
+FOO(short, 0);
+FOO(short, 7);
+FOO(short, 15);
+FOO(int, 0);
+FOO(int, 15);
+FOO(int, 31);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 12 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 24 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 12 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-14.c b/gcc/testsuite/gcc.target/i386/pr102566-14.c
new file mode 100644
index 00000000000..24681c1da18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-14.c
@@ -0,0 +1,65 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+typedef long long int64;
+
+#define FOO(TYPE,MASK)							\
+  __attribute__((noinline,noclone)) TYPE				\
+  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_or (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_xor (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_xor_and_fetch (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) TYPE				\
+  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_and (a, ~mask) & mask;			\
+  }									\
+
+
+FOO(int64, 0);
+FOO(int64, 32);
+FOO(int64, 63);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 6 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 12 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1a.c b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
new file mode 100644
index 00000000000..a915de354e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
@@ -0,0 +1,188 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void bar (void);
+
+__attribute__((noinline, noclone)) int
+f1 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f2 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
+  int t2 = t1 & mask;
+  return t2 != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f3 (long int *a, int bit)
+{
+  long int mask = 1l << bit;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
+}
+
+__attribute__((noinline, noclone)) int
+f4 (int *a)
+{
+  int mask = 1 << 7;
+  return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f5 (int *a)
+{
+  int mask = 1 << 13;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f6 (int *a)
+{
+  int mask = 1 << 0;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) void
+f7 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
+    bar ();
+}
+
+__attribute__((noinline, noclone)) void
+f8 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
+    bar ();
+}
+
+__attribute__((noinline, noclone)) int
+f9 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f10 (int *a)
+{
+  int mask = 1 << 7;
+  return (__sync_fetch_and_xor (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f11 (int *a)
+{
+  int mask = 1 << 13;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f12 (int *a)
+{
+  int mask = 1 << 0;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f13 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f14 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f15 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f16 (int *a)
+{
+  int mask = 1 << 7;
+  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f17 (int *a)
+{
+  int mask = 1 << 13;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f18 (int *a)
+{
+  int mask = 1 << 0;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f19 (long int *a, int bit)
+{
+  long int mask = 1l << bit;
+  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f20 (long int *a)
+{
+  long int mask = 1l << 7;
+  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
+}
+
+__attribute__((noinline, noclone)) int
+f21 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__sync_fetch_and_or (a, mask) & mask);
+}
+
+__attribute__((noinline, noclone)) long int
+f22 (long int *a)
+{
+  long int mask = 1l << 7;
+  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
+}
+
+__attribute__((noinline, noclone)) long int
+f23 (long int *a)
+{
+  long int mask = 1l << 7;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
+}
+
+__attribute__((noinline, noclone)) short int
+f24 (short int *a)
+{
+  short int mask = 1 << 7;
+  return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) short int
+f25 (short int *a)
+{
+  short int mask = 1 << 7;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1b.c b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
new file mode 100644
index 00000000000..c4dab8135c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
@@ -0,0 +1,107 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -g" } */
+
+int cnt;
+
+__attribute__((noinline, noclone)) void
+bar (void)
+{
+  cnt++;
+}
+
+#include "pr102566-1a.c"
+
+int a;
+long int b;
+unsigned long int c;
+unsigned short int d;
+
+int
+main ()
+{
+  __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
+  if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
+      || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
+    __builtin_abort ();
+  if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
+      || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
+    __builtin_abort ();
+  __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
+  if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
+      || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
+    __builtin_abort ();
+  __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
+  if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
+      || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
+    __builtin_abort ();
+  if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
+      || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
+    __builtin_abort ();
+  if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
+      || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (cnt != 0
+      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
+      || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
+      || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
+      || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
+    __builtin_abort ();
+  if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
+      || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
+      || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
+  if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
+    __builtin_abort ();
+  if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
+      || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
+      || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
+      || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
+      || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
+  if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
+      || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
+    __builtin_abort ();
+  __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
+  if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
+      || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
+      || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
+      || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
+    __builtin_abort ();
+  __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
+  if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
+      || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
+      || cnt != 2)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-2.c b/gcc/testsuite/gcc.target/i386/pr102566-2.c
new file mode 100644
index 00000000000..00a7c349f2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-2.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3a.c b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
new file mode 100644
index 00000000000..8bf1cd6e1bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+  int mask = 1 << bit;
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3b.c b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
new file mode 100644
index 00000000000..d155ed367a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic long long int *v, int bit)
+{
+  long long int mask = 1ll << bit;
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsq" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-4.c b/gcc/testsuite/gcc.target/i386/pr102566-4.c
new file mode 100644
index 00000000000..2668ccf827c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-4.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+  unsigned int mask = 1 << bit;
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-5.c b/gcc/testsuite/gcc.target/i386/pr102566-5.c
new file mode 100644
index 00000000000..8bf1cd6e1bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-5.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+  int mask = 1 << bit;
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-6.c b/gcc/testsuite/gcc.target/i386/pr102566-6.c
new file mode 100644
index 00000000000..3dfe55ac683
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-6.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-7.c b/gcc/testsuite/gcc.target/i386/pr102566-7.c
new file mode 100644
index 00000000000..6bc0ae0f320
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-7.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+typedef int __attribute__ ((mode (__word__))) int_type;
+
+#define BIT (1 << 0)
+
+bool
+foo0 (_Atomic int_type *v)
+{
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~1;
+}
+
+bool
+foo1 (_Atomic int_type *v)
+{
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~2;
+}
+
+bool
+foo2 (_Atomic int_type *v)
+{
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~3;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "bts" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8a.c b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
new file mode 100644
index 00000000000..168e3db78c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8b.c b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
new file mode 100644
index 00000000000..392da3098e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
@@ -0,0 +1,32 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic long long *v)
+{
+#define BIT (1ll << 0)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo30 (_Atomic long long *v)
+{
+#define BIT (1ll << 62)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo31 (_Atomic long long *v)
+{
+#define BIT (1ll << 63)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9a.c b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
new file mode 100644
index 00000000000..3fa2a3ef043
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9b.c b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
new file mode 100644
index 00000000000..38ddbdc630f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
@@ -0,0 +1,32 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic long long *v)
+{
+#define BIT (1ll << 0)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo30 (_Atomic long long *v)
+{
+#define BIT (1ll << 62)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo31 (_Atomic long long *v)
+{
+#define BIT (1ll << 63)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index 60ae5e6601f..0f79e9f05bd 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -3243,6 +3243,90 @@ optimize_unreachable (gimple_stmt_iterator i)
   return ret;
 }
 
+/* Convert
+   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+   _7 = ~_1;
+   _5 = (_Bool) _7;
+   to
+   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+   _8 = _1 & 1;
+   _5 = _8 == 0;
+   and convert
+   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+   _7 = ~_1;
+   _4 = (_Bool) _7;
+   to
+   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+   _8 = _1 & 1;
+   _4 = (_Bool) _8;
+
+   USE_STMT is the gimplt statement which uses the return value of
+   __atomic_fetch_or_*.  LHS is the return value of __atomic_fetch_or_*.
+   MASK is the mask passed to __atomic_fetch_or_*.
+ */
+
+static gimple *
+convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
+			tree lhs, tree mask)
+{
+  tree and_mask;
+  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+    {
+      /* MASK must be ~1.  */
+      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
+					   ~HOST_WIDE_INT_1), mask, 0))
+	return nullptr;
+      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
+    }
+  else
+    {
+      /* MASK must be 1.  */
+      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs), 1), mask, 0))
+	return nullptr;
+      and_mask = mask;
+    }
+
+  tree use_lhs = gimple_assign_lhs (use_stmt);
+
+  use_operand_p use_p;
+  gimple *use_not_stmt;
+
+  if (!single_imm_use (use_lhs, &use_p, &use_not_stmt)
+      || !is_gimple_assign (use_not_stmt))
+    return nullptr;
+
+  if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_not_stmt)))
+    return nullptr;
+
+  tree use_not_lhs = gimple_assign_lhs (use_not_stmt);
+  if (TREE_CODE (TREE_TYPE (use_not_lhs)) != BOOLEAN_TYPE)
+    return nullptr;
+
+  gimple_stmt_iterator gsi;
+  gsi = gsi_for_stmt (use_stmt);
+  gsi_remove (&gsi, true);
+  tree var = make_ssa_name (TREE_TYPE (lhs));
+  use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
+  gsi = gsi_for_stmt (use_not_stmt);
+  gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
+  lhs = gimple_assign_lhs (use_not_stmt);
+  gimple *g = gimple_build_assign (lhs, EQ_EXPR, var,
+				   build_zero_cst (TREE_TYPE (mask)));
+  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+  gsi = gsi_for_stmt (use_not_stmt);
+  gsi_remove (&gsi, true);
+  return use_stmt;
+}
+
+/* match.pd function to match atomic_bit_test_and pattern which
+   has nop_convert:
+     _1 = __atomic_fetch_or_4 (&v, 1, 0);
+     _2 = (int) _1;
+     _5 = _2 & 1;
+ */
+extern bool gimple_nop_atomic_bit_test_and_p (tree, tree *,
+					      tree (*) (tree));
+
 /* Optimize
      mask_2 = 1 << cnt_1;
      _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
@@ -3269,7 +3353,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
   tree lhs = gimple_call_lhs (call);
   use_operand_p use_p;
   gimple *use_stmt;
-  tree mask, bit;
+  tree mask;
   optab optab;
 
   if (!flag_inline_atomics
@@ -3279,10 +3363,271 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
       || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
       || !single_imm_use (lhs, &use_p, &use_stmt)
       || !is_gimple_assign (use_stmt)
-      || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
       || !gimple_vdef (call))
     return;
 
+  tree bit = nullptr;
+
+  mask = gimple_call_arg (call, 1);
+  tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
+  if (rhs_code != BIT_AND_EXPR)
+    {
+      if (rhs_code != NOP_EXPR && rhs_code != BIT_NOT_EXPR)
+	return;
+
+      tree use_lhs = gimple_assign_lhs (use_stmt);
+      if (TREE_CODE (use_lhs) == SSA_NAME
+	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
+	return;
+
+      tree use_rhs = gimple_assign_rhs1 (use_stmt);
+      if (lhs != use_rhs)
+	return;
+
+      gimple *g;
+      gimple_stmt_iterator gsi;
+      tree var;
+      int ibit = -1;
+
+      if (rhs_code == BIT_NOT_EXPR)
+	{
+	  g = convert_atomic_bit_not (fn, use_stmt, lhs, mask);
+	  if (!g)
+	    return;
+	  use_stmt = g;
+	  ibit = 0;
+	}
+      else if (TREE_CODE (TREE_TYPE (use_lhs)) == BOOLEAN_TYPE)
+	{
+	  tree and_mask;
+	  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+	    {
+	      /* MASK must be ~1.  */
+	      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
+						   ~HOST_WIDE_INT_1),
+				    mask, 0))
+		return;
+
+	      /* Convert
+		 _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+		 _4 = (_Bool) _1;
+		 to
+		 _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+		 _5 = _1 & 1;
+		 _4 = (_Bool) _5;
+	       */
+	      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
+	    }
+	  else
+	    {
+	      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
+	      if (!operand_equal_p (and_mask, mask, 0))
+		return;
+
+	      /* Convert
+		 _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+		 _4 = (_Bool) _1;
+		 to
+		 _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+		 _5 = _1 & 1;
+		 _4 = (_Bool) _5;
+	       */
+	    }
+	  var = make_ssa_name (TREE_TYPE (use_rhs));
+	  replace_uses_by (use_rhs, var);
+	  g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
+				   and_mask);
+	  gsi = gsi_for_stmt (use_stmt);
+	  gsi_insert_before (&gsi, g, GSI_NEW_STMT);
+	  use_stmt = g;
+	  ibit = 0;
+	}
+      else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
+	       == TYPE_PRECISION (TREE_TYPE (use_rhs)))
+	{
+	  gimple *use_nop_stmt;
+	  if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
+	      || !is_gimple_assign (use_nop_stmt))
+	    return;
+	  rhs_code = gimple_assign_rhs_code (use_nop_stmt);
+	  if (rhs_code != BIT_AND_EXPR)
+	    {
+	      tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
+	      if (TREE_CODE (use_nop_lhs) == SSA_NAME
+		  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
+		return;
+	      if (rhs_code == BIT_NOT_EXPR)
+		{
+		  g = convert_atomic_bit_not (fn, use_nop_stmt, lhs,
+					      mask);
+		  if (!g)
+		    return;
+		  /* Convert
+		     _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
+		     _2 = (int) _1;
+		     _7 = ~_2;
+		     _5 = (_Bool) _7;
+		     to
+		     _1 = __atomic_fetch_or_4 (ptr_6, ~1, _3);
+		     _8 = _1 & 1;
+		     _5 = _8 == 0;
+		     and convert
+		     _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
+		     _2 = (int) _1;
+		     _7 = ~_2;
+		     _5 = (_Bool) _7;
+		     to
+		     _1 = __atomic_fetch_and_4 (ptr_6, 1, _3);
+		     _8 = _1 & 1;
+		     _5 = _8 == 0;
+		   */
+		  gsi = gsi_for_stmt (use_stmt);
+		  gsi_remove (&gsi, true);
+		  use_stmt = g;
+		  ibit = 0;
+		}
+	      else
+		{
+		  if (TREE_CODE (TREE_TYPE (use_nop_lhs)) != BOOLEAN_TYPE)
+		    return;
+		  if (rhs_code != GE_EXPR && rhs_code != LT_EXPR)
+		    return;
+		  tree cmp_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
+		  if (use_lhs != cmp_rhs1)
+		    return;
+		  tree cmp_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
+		  if (!integer_zerop (cmp_rhs2))
+		    return;
+
+		  tree and_mask;
+
+		  unsigned HOST_WIDE_INT bytes
+		    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (use_rhs)));
+		  ibit = bytes * BITS_PER_UNIT - 1;
+		  unsigned HOST_WIDE_INT highest
+		    = HOST_WIDE_INT_1U << ibit;
+
+		  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+		    {
+		      /* Get the signed maximum of the USE_RHS type.  */
+		      and_mask = build_int_cst (TREE_TYPE (use_rhs),
+						highest - 1);
+		      if (!operand_equal_p (and_mask, mask, 0))
+			return;
+
+		      /* Convert
+			 _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
+			 _5 = (signed int) _1;
+			 _4 = _5 < 0 or _5 >= 0;
+			 to
+			 _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
+			 _6 = _1 & 0x80000000;
+			 _4 = _6 != 0 or _6 == 0;
+		       */
+		      and_mask = build_int_cst (TREE_TYPE (use_rhs),
+						highest);
+		    }
+		  else
+		    {
+		      /* Get the signed minimum of the USE_RHS type.  */
+		      and_mask = build_int_cst (TREE_TYPE (use_rhs),
+						highest);
+		      if (!operand_equal_p (and_mask, mask, 0))
+			return;
+
+		      /* Convert
+			 _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
+			 _5 = (signed int) _1;
+			 _4 = _5 < 0 or _5 >= 0;
+			 to
+			 _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
+			 _6 = _1 & 0x80000000;
+			 _4 = _6 != 0 or _6 == 0;
+		       */
+		    }
+		  var = make_ssa_name (TREE_TYPE (use_rhs));
+		  gsi = gsi_for_stmt (use_stmt);
+		  gsi_remove (&gsi, true);
+		  g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
+					   and_mask);
+		  gsi = gsi_for_stmt (use_nop_stmt);
+		  gsi_insert_before (&gsi, g, GSI_NEW_STMT);
+		  use_stmt = g;
+		  g = gimple_build_assign (use_nop_lhs,
+					   (rhs_code == GE_EXPR
+					    ? EQ_EXPR : NE_EXPR),
+					   var,
+					   build_zero_cst (TREE_TYPE (use_rhs)));
+		  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+		  gsi = gsi_for_stmt (use_nop_stmt);
+		  gsi_remove (&gsi, true);
+		}
+	    }
+	  else
+	    {
+	      tree and_expr = gimple_assign_lhs (use_nop_stmt);
+	      tree match_op[3];
+	      gimple *g;
+	      if (!gimple_nop_atomic_bit_test_and_p (and_expr,
+						     &match_op[0], NULL)
+		  || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (match_op[2])
+		  || !single_imm_use (match_op[2], &use_p, &g)
+		  || !is_gimple_assign (g))
+		return;
+	      mask = match_op[1];
+	      if (TREE_CODE (mask) == INTEGER_CST)
+		{
+		  ibit = tree_log2 (mask);
+		  gcc_assert (ibit >= 0);
+		}
+	      else
+		{
+		  g = SSA_NAME_DEF_STMT (mask);
+		  gcc_assert (is_gimple_assign (g));
+		  bit = gimple_assign_rhs2 (g);
+		}
+	      /* Convert
+		 _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
+		 _2 = (int) _1;
+		 _5 = _2 & mask;
+		 to
+		 _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
+		 _6 = _1 & mask;
+		 _5 = (int) _6;
+		 and convert
+		 _1 = ~mask_7;
+		 _2 = (unsigned int) _1;
+		 _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
+		 _4 = (int) _3;
+		 _5 = _4 & mask_7;
+		 to
+		 _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
+		 _12 = _3 & mask_7;
+		 _5 = (int) _12;
+	       */
+	      replace_uses_by (use_lhs, lhs);
+	      tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
+	      var = make_ssa_name (TREE_TYPE (use_nop_lhs));
+	      gimple_assign_set_lhs (use_nop_stmt, var);
+	      gsi = gsi_for_stmt (use_stmt);
+	      gsi_remove (&gsi, true);
+	      release_defs (use_stmt);
+	      gsi_remove (gsip, true);
+	      g = gimple_build_assign (use_nop_lhs, NOP_EXPR, var);
+	      gsi = gsi_for_stmt (use_nop_stmt);
+	      gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+	      use_stmt = use_nop_stmt;
+	    }
+	}
+
+      if (!bit)
+	{
+	  if (ibit < 0)
+	    gcc_unreachable ();
+	  bit = build_int_cst (TREE_TYPE (lhs), ibit);
+	}
+    }
+
   switch (fn)
     {
     case IFN_ATOMIC_BIT_TEST_AND_SET:
@@ -3301,51 +3646,76 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
   if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
     return;
 
-  mask = gimple_call_arg (call, 1);
   tree use_lhs = gimple_assign_lhs (use_stmt);
   if (!use_lhs)
     return;
 
-  if (TREE_CODE (mask) == INTEGER_CST)
-    {
-      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
-	mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
-      mask = fold_convert (TREE_TYPE (lhs), mask);
-      int ibit = tree_log2 (mask);
-      if (ibit < 0)
-	return;
-      bit = build_int_cst (TREE_TYPE (lhs), ibit);
-    }
-  else if (TREE_CODE (mask) == SSA_NAME)
+  if (!bit)
     {
-      gimple *g = SSA_NAME_DEF_STMT (mask);
-      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+      if (TREE_CODE (mask) == INTEGER_CST)
 	{
-	  if (!is_gimple_assign (g)
-	      || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
+	  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+	    mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
+	  mask = fold_convert (TREE_TYPE (lhs), mask);
+	  int ibit = tree_log2 (mask);
+	  if (ibit < 0)
 	    return;
-	  mask = gimple_assign_rhs1 (g);
-	  if (TREE_CODE (mask) != SSA_NAME)
+	  bit = build_int_cst (TREE_TYPE (lhs), ibit);
+	}
+      else if (TREE_CODE (mask) == SSA_NAME)
+	{
+	  gimple *g = SSA_NAME_DEF_STMT (mask);
+	  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+	    {
+	      if (!is_gimple_assign (g)
+		  || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
+		return;
+	      mask = gimple_assign_rhs1 (g);
+	      if (TREE_CODE (mask) != SSA_NAME)
+		return;
+	      g = SSA_NAME_DEF_STMT (mask);
+	    }
+	  if (!is_gimple_assign (g))
 	    return;
-	  g = SSA_NAME_DEF_STMT (mask);
+	  rhs_code = gimple_assign_rhs_code (g);
+	  if (rhs_code != LSHIFT_EXPR)
+	    {
+	      if (rhs_code != NOP_EXPR)
+		return;
+
+	      /* Handle
+		 _1 = 1 << bit_4(D);
+		 mask_5 = (unsigned int) _1;
+		 _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
+		 _3 = _2 & mask_5;
+		 */
+	      tree nop_lhs = gimple_assign_lhs (g);
+	      tree nop_rhs = gimple_assign_rhs1 (g);
+	      if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
+		  != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
+		return;
+	      g = SSA_NAME_DEF_STMT (nop_rhs);
+	      if (!is_gimple_assign (g)
+		  || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
+		return;
+	    }
+	  if (!integer_onep (gimple_assign_rhs1 (g)))
+	    return;
+	  bit = gimple_assign_rhs2 (g);
 	}
-      if (!is_gimple_assign (g)
-	  || gimple_assign_rhs_code (g) != LSHIFT_EXPR
-	  || !integer_onep (gimple_assign_rhs1 (g)))
+      else
 	return;
-      bit = gimple_assign_rhs2 (g);
-    }
-  else
-    return;
 
-  if (gimple_assign_rhs1 (use_stmt) == lhs)
-    {
-      if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
+      if (gimple_assign_rhs1 (use_stmt) == lhs)
+	{
+	  if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
+	    return;
+	}
+      else if (gimple_assign_rhs2 (use_stmt) != lhs
+	       || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
+				    mask, 0))
 	return;
     }
-  else if (gimple_assign_rhs2 (use_stmt) != lhs
-	   || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
-    return;
 
   bool use_bool = true;
   bool has_debug_uses = false;
@@ -3434,18 +3804,20 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
 	 of the specified bit after the atomic operation (makes only sense
 	 for xor, otherwise the bit content is compile time known),
 	 we need to invert the bit.  */
-      g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
-			       BIT_XOR_EXPR, new_lhs,
-			       use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
-					: mask);
-      new_lhs = gimple_assign_lhs (g);
+      tree mask_convert = mask;
+      gimple_seq stmts = NULL;
+      if (!use_bool)
+	mask_convert = gimple_convert (&stmts, TREE_TYPE (lhs), mask);
+      new_lhs = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (lhs), new_lhs,
+			      use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
+				       : mask_convert);
       if (throws)
 	{
-	  gsi_insert_on_edge_immediate (e, g);
-	  gsi = gsi_for_stmt (g);
+	  gsi_insert_seq_on_edge_immediate (e, stmts);
+	  gsi = gsi_for_stmt (gimple_seq_last (stmts));
 	}
       else
-	gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+	gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
     }
   if (use_bool && has_debug_uses)
     {
-- 
2.18.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Improve integer bit test on __atomic_fetch_[or|and]_* returns
  2021-11-10  5:20           ` [PATCH] " liuhongt
@ 2021-11-10  8:28             ` Richard Biener
  0 siblings, 0 replies; 8+ messages in thread
From: Richard Biener @ 2021-11-10  8:28 UTC (permalink / raw)
  To: liuhongt; +Cc: GCC Patches

On Wed, Nov 10, 2021 at 6:21 AM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> > >
> > > +#if GIMPLE
> > > +(match (nop_atomic_bit_test_and_p @0 @1)
> > > + (bit_and:c (nop_convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3))
> > > +           INTEGER_CST@1)
> >
> > no need for the :c on the bit_and when the 2nd operand is an
>
> Changed.
>
> > INTEGER_CST (likewise below)
> >
> > > + (with {
> > > +        int ibit = tree_log2 (@0);
> > > +        int ibit2 = tree_log2 (@1);
> > > +       }
> > > +  (if (single_use (@4)
> > > +      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)
> >
> > I wonder whether we should handle both of these in the caller to make
> > this a pure IL structure
> > match?  At your preference.
> >
>
> Changed.
> Add a new parameter to nop_atomic_bit_test_and_p for @4 and test @4 in the caller.
>
> > > +      && ibit == ibit2
> > > +      && ibit >= 0))))
> > > +
> > > +(match (nop_atomic_bit_test_and_p @0 @1)
> > > + (bit_and:c (nop_convert?@3 (SYNC_FETCH_OR_XOR_N @2 INTEGER_CST@0))
> > > +           INTEGER_CST@1)
> > > + (with {
> > > +        int ibit = tree_log2 (@0);
> > > +        int ibit2 = tree_log2 (@1);
> > > +       }
> > > +  (if (single_use (@3)
> > > +      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
> > > +      && ibit == ibit2
> > > +      && ibit >= 0))))
> > > +
> > > +(match (nop_atomic_bit_test_and_p @0 @1)
> > > + (bit_and:c
> > > +  (nop_convert?@4
> > > +   (ATOMIC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@5 @6)) @3))
> > > +  @1)
> > > + (if (single_use (@4)
> > > +     && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)
> > > +     && operand_equal_p (@0, @1))))
> >
> > usually for the equality you'd write
> >
> >     (ATOMIC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@5 @6)) @3))
> >      @0)
> >
> > thus use @0 in both @0 and @1 places.  Does that not work here?  (the
> > nop_atomic_bit_test_and_p
> > arguments then would be @0 @0).  Likewise below.
> >
>
> It works, changed.
>
> > > +
> > > +(match (nop_atomic_bit_test_and_p @0 @1)
> > > + (bit_and:c
> > > +  (nop_convert?@4
> > > +   (SYNC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@3 @5))))
> > > +  @1)
> > > + (if (single_use (@4)
> > > +     && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)
> > > +     && operand_equal_p (@0, @1))))
> > > +
> > > +(match (nop_atomic_bit_test_and_p @0 @1)
> > > + (bit_and:c@4 (nop_convert?@3 (ATOMIC_FETCH_AND_N @2 INTEGER_CST@0 @5))
> > > +             INTEGER_CST@1)
> > > + (with {
> > > +        tree mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (@0), @0);
> > > +        mask = fold_convert (TREE_TYPE (@4), mask);
> >
> > it's prefered to use wide_int for this, so
> >
> >              int ibit = wi::exact_log2 (wi::bit_not (wi::to_wide (@0)));
> >
> > likewise below.
>
> Changed, with a bit adjustment
> int ibit = wi::exact_log2 (wi::zext (wi::bit_not (wi::to_wide (@0)),
>                                     TYPE_PRECISION(type)));
>
> wi::zext is needed when upper bits are all ones after bit_not operation.
> > > +  if (!single_imm_use (use_lhs, &use_p, &use_not_stmt)
> > > +      || !is_gimple_assign (use_not_stmt))
> > > +    return nullptr;
> > > +
> > > +  if (gimple_assign_rhs_code (use_not_stmt) != NOP_EXPR)
> >
> >   CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_not_stmt))
>
> Changed.
>
> Update patch:

OK.

Thanks,
Richard.

>
> 2021-11-04  H.J. Lu  <hongjiu.lu@intel.com>
>             Hongtao Liu  <hongtao.liu@intel.com>
> gcc/
>
>         PR middle-end/102566
>         * match.pd (nop_atomic_bit_test_and_p): New match.
>         * tree-ssa-ccp.c (convert_atomic_bit_not): New function.
>         (gimple_nop_atomic_bit_test_and_p): New prototype.
>         (optimize_atomic_bit_test_and): Transform equivalent, but slighly
>         different cases to their canonical forms.
>
> gcc/testsuite/
>
>         PR middle-end/102566
>         * g++.target/i386/pr102566-1.C: New test.
>         * g++.target/i386/pr102566-2.C: Likewise.
>         * g++.target/i386/pr102566-3.C: Likewise.
>         * g++.target/i386/pr102566-4.C: Likewise.
>         * g++.target/i386/pr102566-5a.C: Likewise.
>         * g++.target/i386/pr102566-5b.C: Likewise.
>         * g++.target/i386/pr102566-6a.C: Likewise.
>         * g++.target/i386/pr102566-6b.C: Likewise.
>         * gcc.target/i386/pr102566-1a.c: Likewise.
>         * gcc.target/i386/pr102566-1b.c: Likewise.
>         * gcc.target/i386/pr102566-2.c: Likewise.
>         * gcc.target/i386/pr102566-3a.c: Likewise.
>         * gcc.target/i386/pr102566-3b.c: Likewise.
>         * gcc.target/i386/pr102566-4.c: Likewise.
>         * gcc.target/i386/pr102566-5.c: Likewise.
>         * gcc.target/i386/pr102566-6.c: Likewise.
>         * gcc.target/i386/pr102566-7.c: Likewise.
>         * gcc.target/i386/pr102566-8a.c: Likewise.
>         * gcc.target/i386/pr102566-8b.c: Likewise.
>         * gcc.target/i386/pr102566-9a.c: Likewise.
>         * gcc.target/i386/pr102566-9b.c: Likewise.
>         * gcc.target/i386/pr102566-10a.c: Likewise.
>         * gcc.target/i386/pr102566-10b.c: Likewise.
>         * gcc.target/i386/pr102566-11.c: Likewise.
>         * gcc.target/i386/pr102566-12.c: Likewise.
>         * gcc.target/i386/pr102566-13.c: New test.
>         * gcc.target/i386/pr102566-14.c: New test.
> ---
>  gcc/match.pd                                 | 103 +++++
>  gcc/testsuite/g++.target/i386/pr102566-1.C   |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-2.C   |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-3.C   |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-4.C   |  29 ++
>  gcc/testsuite/g++.target/i386/pr102566-5a.C  |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-5b.C  |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-6a.C  |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-6b.C  |  31 ++
>  gcc/testsuite/gcc.target/i386/pr102566-10a.c |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-10b.c |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-11.c  |  28 ++
>  gcc/testsuite/gcc.target/i386/pr102566-12.c  |  28 ++
>  gcc/testsuite/gcc.target/i386/pr102566-13.c  |  66 +++
>  gcc/testsuite/gcc.target/i386/pr102566-14.c  |  65 +++
>  gcc/testsuite/gcc.target/i386/pr102566-1a.c  | 188 ++++++++
>  gcc/testsuite/gcc.target/i386/pr102566-1b.c  | 107 +++++
>  gcc/testsuite/gcc.target/i386/pr102566-2.c   |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-3a.c  |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-3b.c  |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-4.c   |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-5.c   |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-6.c   |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-7.c   |  30 ++
>  gcc/testsuite/gcc.target/i386/pr102566-8a.c  |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-8b.c  |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-9a.c  |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-9b.c  |  32 ++
>  gcc/tree-ssa-ccp.c                           | 456 +++++++++++++++++--
>  29 files changed, 1557 insertions(+), 42 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-2.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-3.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-4.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5a.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5b.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6a.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6b.C
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-11.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-12.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-13.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-14.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-7.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9b.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 986b052bc93..77ea2780c95 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -105,6 +105,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  (define_operator_list COND_TERNARY
>    IFN_COND_FMA IFN_COND_FMS IFN_COND_FNMA IFN_COND_FNMS)
>
> +/* __atomic_fetch_or_*, __atomic_fetch_xor_*, __atomic_xor_fetch_*  */
> +(define_operator_list ATOMIC_FETCH_OR_XOR_N
> +  BUILT_IN_ATOMIC_FETCH_OR_1 BUILT_IN_ATOMIC_FETCH_OR_2
> +  BUILT_IN_ATOMIC_FETCH_OR_4 BUILT_IN_ATOMIC_FETCH_OR_8
> +  BUILT_IN_ATOMIC_FETCH_OR_16
> +  BUILT_IN_ATOMIC_FETCH_XOR_1 BUILT_IN_ATOMIC_FETCH_XOR_2
> +  BUILT_IN_ATOMIC_FETCH_XOR_4 BUILT_IN_ATOMIC_FETCH_XOR_8
> +  BUILT_IN_ATOMIC_FETCH_XOR_16
> +  BUILT_IN_ATOMIC_XOR_FETCH_1 BUILT_IN_ATOMIC_XOR_FETCH_2
> +  BUILT_IN_ATOMIC_XOR_FETCH_4 BUILT_IN_ATOMIC_XOR_FETCH_8
> +  BUILT_IN_ATOMIC_XOR_FETCH_16)
> +/* __sync_fetch_and_or_*, __sync_fetch_and_xor_*, __sync_xor_and_fetch_*  */
> +(define_operator_list SYNC_FETCH_OR_XOR_N
> +  BUILT_IN_SYNC_FETCH_AND_OR_1 BUILT_IN_SYNC_FETCH_AND_OR_2
> +  BUILT_IN_SYNC_FETCH_AND_OR_4 BUILT_IN_SYNC_FETCH_AND_OR_8
> +  BUILT_IN_SYNC_FETCH_AND_OR_16
> +  BUILT_IN_SYNC_FETCH_AND_XOR_1 BUILT_IN_SYNC_FETCH_AND_XOR_2
> +  BUILT_IN_SYNC_FETCH_AND_XOR_4 BUILT_IN_SYNC_FETCH_AND_XOR_8
> +  BUILT_IN_SYNC_FETCH_AND_XOR_16
> +  BUILT_IN_SYNC_XOR_AND_FETCH_1 BUILT_IN_SYNC_XOR_AND_FETCH_2
> +  BUILT_IN_SYNC_XOR_AND_FETCH_4 BUILT_IN_SYNC_XOR_AND_FETCH_8
> +  BUILT_IN_SYNC_XOR_AND_FETCH_16)
> +/* __atomic_fetch_and_*.  */
> +(define_operator_list ATOMIC_FETCH_AND_N
> +  BUILT_IN_ATOMIC_FETCH_AND_1 BUILT_IN_ATOMIC_FETCH_AND_2
> +  BUILT_IN_ATOMIC_FETCH_AND_4 BUILT_IN_ATOMIC_FETCH_AND_8
> +  BUILT_IN_ATOMIC_FETCH_AND_16)
> +/* __sync_fetch_and_and_*.  */
> +(define_operator_list SYNC_FETCH_AND_AND_N
> +  BUILT_IN_SYNC_FETCH_AND_AND_1 BUILT_IN_SYNC_FETCH_AND_AND_2
> +  BUILT_IN_SYNC_FETCH_AND_AND_4 BUILT_IN_SYNC_FETCH_AND_AND_8
> +  BUILT_IN_SYNC_FETCH_AND_AND_16)
> +
>  /* With nop_convert? combine convert? and view_convert? in one pattern
>     plus conditionalize on tree_nop_conversion_p conversions.  */
>  (match (nop_convert @0)
> @@ -3976,6 +4009,76 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    (vec_cond @0 (op! @3 @1) (op! @3 @2))))
>  #endif
>
> +#if GIMPLE
> +(match (nop_atomic_bit_test_and_p @0 @1 @4)
> + (bit_and (nop_convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3))
> +          INTEGER_CST@1)
> + (with {
> +        int ibit = tree_log2 (@0);
> +        int ibit2 = tree_log2 (@1);
> +       }
> +  (if (ibit == ibit2
> +      && ibit >= 0))))
> +
> +(match (nop_atomic_bit_test_and_p @0 @1 @3)
> + (bit_and (nop_convert?@3 (SYNC_FETCH_OR_XOR_N @2 INTEGER_CST@0))
> +         INTEGER_CST@1)
> + (with {
> +        int ibit = tree_log2 (@0);
> +        int ibit2 = tree_log2 (@1);
> +       }
> +  (if (ibit == ibit2
> +      && ibit >= 0))))
> +
> +(match (nop_atomic_bit_test_and_p @0 @0 @4)
> + (bit_and:c
> +  (nop_convert?@4
> +   (ATOMIC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@5 @6)) @3))
> +  @0))
> +
> +(match (nop_atomic_bit_test_and_p @0 @0 @4)
> + (bit_and:c
> +  (nop_convert?@4
> +   (SYNC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@3 @5))))
> +  @0))
> +
> +(match (nop_atomic_bit_test_and_p @0 @1 @3)
> + (bit_and@4 (nop_convert?@3 (ATOMIC_FETCH_AND_N @2 INTEGER_CST@0 @5))
> +           INTEGER_CST@1)
> + (with {
> +        int ibit = wi::exact_log2 (wi::zext (wi::bit_not (wi::to_wide (@0)),
> +                                             TYPE_PRECISION(type)));
> +        int ibit2 = tree_log2 (@1);
> +       }
> +  (if (ibit == ibit2
> +      && ibit >= 0))))
> +
> +(match (nop_atomic_bit_test_and_p @0 @1 @3)
> + (bit_and@4
> +  (nop_convert?@3 (SYNC_FETCH_AND_AND_N @2 INTEGER_CST@0))
> +  INTEGER_CST@1)
> + (with {
> +        int ibit = wi::exact_log2 (wi::zext (wi::bit_not (wi::to_wide (@0)),
> +                                             TYPE_PRECISION(type)));
> +        int ibit2 = tree_log2 (@1);
> +       }
> +  (if (ibit == ibit2
> +      && ibit >= 0))))
> +
> +(match (nop_atomic_bit_test_and_p @0 @0 @3)
> + (bit_and:c
> +  (nop_convert?@3
> +   (ATOMIC_FETCH_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7))) @5))
> +   @0))
> +
> +(match (nop_atomic_bit_test_and_p @0 @0 @3)
> + (bit_and:c
> +  (nop_convert?@3
> +   (SYNC_FETCH_AND_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7)))))
> +   @0))
> +
> +#endif
> +
>  /* (v ? w : 0) ? a : b is just (v & w) ? a : b
>     Currently disabled after pass lvec because ARM understands
>     VEC_COND_EXPR<v==w,-1,0> but not a plain v==w fed to BIT_IOR_EXPR.  */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-1.C b/gcc/testsuite/g++.target/i386/pr102566-1.C
> new file mode 100644
> index 00000000000..94a66d717cc
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-1.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<int> &i)
> +{
> +#define BIT (1 << 0)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<int> &i)
> +{
> +#define BIT (1 << 30)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<int> &i)
> +{
> +#define BIT (1 << 31)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-2.C b/gcc/testsuite/g++.target/i386/pr102566-2.C
> new file mode 100644
> index 00000000000..4f2aea961c2
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-2.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-3.C b/gcc/testsuite/g++.target/i386/pr102566-3.C
> new file mode 100644
> index 00000000000..e88921dd155
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-3.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-4.C b/gcc/testsuite/g++.target/i386/pr102566-4.C
> new file mode 100644
> index 00000000000..44d1362ac2e
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-4.C
> @@ -0,0 +1,29 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +typedef int __attribute__ ((mode (__word__))) int_type;
> +
> +#define BIT (1 << 0)
> +
> +bool
> +tbit0 (std::atomic<int_type> &i)
> +{
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~1;
> +}
> +
> +bool
> +tbit30 (std::atomic<int_type> &i)
> +{
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~2;
> +}
> +
> +bool
> +tbit31 (std::atomic<int_type> &i)
> +{
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~4;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "bts" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-5a.C b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> new file mode 100644
> index 00000000000..f9595bee2ab
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-5b.C b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> new file mode 100644
> index 00000000000..d917b27a918
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 0)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 30)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 63)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-6a.C b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> new file mode 100644
> index 00000000000..01d495eda23
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-6b.C b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> new file mode 100644
> index 00000000000..adc11fcbf2d
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 0)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 30)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 63)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10a.c b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> new file mode 100644
> index 00000000000..1c1f86a9659
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> +  int mask = 1 << bit;
> +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10b.c b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> new file mode 100644
> index 00000000000..0bf39824ea6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic long long int *v, int bit)
> +{
> +  long long int mask = 1ll << bit;
> +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-11.c b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> new file mode 100644
> index 00000000000..2c8f8c4e59a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +#define MASK 0x1234
> +
> +bool
> +foo1 (_Atomic int *v)
> +{
> +  return atomic_fetch_or_explicit (v, MASK, memory_order_relaxed) & MASK;
> +}
> +
> +bool
> +foo2 (_Atomic unsigned int *v, int mask)
> +{
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +bool
> +foo3 (_Atomic unsigned int *v, int mask)
> +{
> +  return !(atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask);
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "bts" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-12.c b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> new file mode 100644
> index 00000000000..4603a77612c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +#define MASK 0x1234
> +
> +bool
> +foo1 (_Atomic long *v)
> +{
> +  return atomic_fetch_and_explicit (v, ~MASK, memory_order_relaxed) & MASK;
> +}
> +
> +bool
> +foo2 (_Atomic long *v, long mask)
> +{
> +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> +}
> +
> +bool
> +foo3 (_Atomic long *v, long mask)
> +{
> +  return !(atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask);
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "btr" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-13.c b/gcc/testsuite/gcc.target/i386/pr102566-13.c
> new file mode 100644
> index 00000000000..2657a2f62ae
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-13.c
> @@ -0,0 +1,66 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +#define FOO(TYPE,MASK)                                                 \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a)                    \
> +  {                                                                    \
> +    TYPE mask = 1 << MASK;                                             \
> +    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;       \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a)                   \
> +  {                                                                    \
> +    TYPE mask = 1 << MASK;                                             \
> +    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;      \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a)                   \
> +  {                                                                    \
> +    TYPE mask = 1 << MASK;                                             \
> +    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;      \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a)                   \
> +  {                                                                    \
> +    TYPE mask = 1 << MASK;                                             \
> +    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;     \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a)                  \
> +  {                                                                    \
> +    TYPE mask = 1 << MASK;                                             \
> +    return __sync_fetch_and_or (a, mask) & mask;                       \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a)                 \
> +  {                                                                    \
> +    TYPE mask = 1 << MASK;                                             \
> +    return __sync_fetch_and_xor (a, mask) & mask;                      \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a)                 \
> +  {                                                                    \
> +    TYPE mask = 1 << MASK;                                             \
> +    return __sync_xor_and_fetch (a, mask) & mask;                      \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a)                 \
> +  {                                                                    \
> +    TYPE mask = 1 << MASK;                                             \
> +    return __sync_fetch_and_and (a, ~mask) & mask;                     \
> +  }                                                                    \
> +
> +FOO(short, 0);
> +FOO(short, 7);
> +FOO(short, 15);
> +FOO(int, 0);
> +FOO(int, 15);
> +FOO(int, 31);
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 12 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 24 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 12 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-14.c b/gcc/testsuite/gcc.target/i386/pr102566-14.c
> new file mode 100644
> index 00000000000..24681c1da18
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-14.c
> @@ -0,0 +1,65 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +typedef long long int64;
> +
> +#define FOO(TYPE,MASK)                                                 \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a)                    \
> +  {                                                                    \
> +    TYPE mask = 1ll << MASK;                                           \
> +    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;       \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a)                   \
> +  {                                                                    \
> +    TYPE mask = 1ll << MASK;                                           \
> +    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;      \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a)                   \
> +  {                                                                    \
> +    TYPE mask = 1ll << MASK;                                           \
> +    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;      \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a)                   \
> +  {                                                                    \
> +    TYPE mask = 1ll << MASK;                                           \
> +    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;     \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a)                  \
> +  {                                                                    \
> +    TYPE mask = 1ll << MASK;                                           \
> +    return __sync_fetch_and_or (a, mask) & mask;                       \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a)                 \
> +  {                                                                    \
> +    TYPE mask = 1ll << MASK;                                           \
> +    return __sync_fetch_and_xor (a, mask) & mask;                      \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a)                 \
> +  {                                                                    \
> +    TYPE mask = 1ll << MASK;                                           \
> +    return __sync_xor_and_fetch (a, mask) & mask;                      \
> +  }                                                                    \
> +  __attribute__((noinline,noclone)) TYPE                               \
> +  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a)                 \
> +  {                                                                    \
> +    TYPE mask = 1ll << MASK;                                           \
> +    return __sync_fetch_and_and (a, ~mask) & mask;                     \
> +  }                                                                    \
> +
> +
> +FOO(int64, 0);
> +FOO(int64, 32);
> +FOO(int64, 63);
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 6 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 12 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1a.c b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> new file mode 100644
> index 00000000000..a915de354e5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> @@ -0,0 +1,188 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +void bar (void);
> +
> +__attribute__((noinline, noclone)) int
> +f1 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f2 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
> +  int t2 = t1 & mask;
> +  return t2 != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f3 (long int *a, int bit)
> +{
> +  long int mask = 1l << bit;
> +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f4 (int *a)
> +{
> +  int mask = 1 << 7;
> +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f5 (int *a)
> +{
> +  int mask = 1 << 13;
> +  return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f6 (int *a)
> +{
> +  int mask = 1 << 0;
> +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f7 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
> +    bar ();
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f8 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
> +    bar ();
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f9 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f10 (int *a)
> +{
> +  int mask = 1 << 7;
> +  return (__sync_fetch_and_xor (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f11 (int *a)
> +{
> +  int mask = 1 << 13;
> +  return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f12 (int *a)
> +{
> +  int mask = 1 << 0;
> +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f13 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f14 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f15 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f16 (int *a)
> +{
> +  int mask = 1 << 7;
> +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f17 (int *a)
> +{
> +  int mask = 1 << 13;
> +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f18 (int *a)
> +{
> +  int mask = 1 << 0;
> +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f19 (long int *a, int bit)
> +{
> +  long int mask = 1l << bit;
> +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f20 (long int *a)
> +{
> +  long int mask = 1l << 7;
> +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f21 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__sync_fetch_and_or (a, mask) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f22 (long int *a)
> +{
> +  long int mask = 1l << 7;
> +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f23 (long int *a)
> +{
> +  long int mask = 1l << 7;
> +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) short int
> +f24 (short int *a)
> +{
> +  short int mask = 1 << 7;
> +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) short int
> +f25 (short int *a)
> +{
> +  short int mask = 1 << 7;
> +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1b.c b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> new file mode 100644
> index 00000000000..c4dab8135c7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> @@ -0,0 +1,107 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -g" } */
> +
> +int cnt;
> +
> +__attribute__((noinline, noclone)) void
> +bar (void)
> +{
> +  cnt++;
> +}
> +
> +#include "pr102566-1a.c"
> +
> +int a;
> +long int b;
> +unsigned long int c;
> +unsigned short int d;
> +
> +int
> +main ()
> +{
> +  __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
> +  if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
> +      || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
> +    __builtin_abort ();
> +  if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
> +      || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
> +    __builtin_abort ();
> +  __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
> +  if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
> +      || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
> +    __builtin_abort ();
> +  __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
> +  if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
> +      || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
> +    __builtin_abort ();
> +  if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> +      || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
> +    __builtin_abort ();
> +  if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
> +      || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (cnt != 0
> +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> +      || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> +      || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> +      || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> +    __builtin_abort ();
> +  if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> +      || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> +    __builtin_abort ();
> +  if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> +      || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> +    __builtin_abort ();
> +  __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
> +  if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> +    __builtin_abort ();
> +  if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> +      || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> +    __builtin_abort ();
> +  if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> +      || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> +    __builtin_abort ();
> +  if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> +      || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> +    __builtin_abort ();
> +  if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> +      || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> +    __builtin_abort ();
> +  __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
> +  if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
> +      || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
> +    __builtin_abort ();
> +  __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
> +  if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> +      || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> +    __builtin_abort ();
> +  if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> +      || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> +    __builtin_abort ();
> +  if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
> +      || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
> +    __builtin_abort ();
> +  __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
> +  if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> +      || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> +      || cnt != 2)
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-2.c b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> new file mode 100644
> index 00000000000..00a7c349f2a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3a.c b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> new file mode 100644
> index 00000000000..8bf1cd6e1bd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> +  int mask = 1 << bit;
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3b.c b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> new file mode 100644
> index 00000000000..d155ed367a1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic long long int *v, int bit)
> +{
> +  long long int mask = 1ll << bit;
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsq" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-4.c b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> new file mode 100644
> index 00000000000..2668ccf827c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> +  unsigned int mask = 1 << bit;
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-5.c b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> new file mode 100644
> index 00000000000..8bf1cd6e1bd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> +  int mask = 1 << bit;
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-6.c b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> new file mode 100644
> index 00000000000..3dfe55ac683
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-7.c b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> new file mode 100644
> index 00000000000..6bc0ae0f320
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +typedef int __attribute__ ((mode (__word__))) int_type;
> +
> +#define BIT (1 << 0)
> +
> +bool
> +foo0 (_Atomic int_type *v)
> +{
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~1;
> +}
> +
> +bool
> +foo1 (_Atomic int_type *v)
> +{
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~2;
> +}
> +
> +bool
> +foo2 (_Atomic int_type *v)
> +{
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~3;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "bts" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8a.c b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> new file mode 100644
> index 00000000000..168e3db78c9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8b.c b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> new file mode 100644
> index 00000000000..392da3098e0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 0)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 62)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 63)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9a.c b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> new file mode 100644
> index 00000000000..3fa2a3ef043
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9b.c b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> new file mode 100644
> index 00000000000..38ddbdc630f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 0)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 62)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 63)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
> index 60ae5e6601f..0f79e9f05bd 100644
> --- a/gcc/tree-ssa-ccp.c
> +++ b/gcc/tree-ssa-ccp.c
> @@ -3243,6 +3243,90 @@ optimize_unreachable (gimple_stmt_iterator i)
>    return ret;
>  }
>
> +/* Convert
> +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> +   _7 = ~_1;
> +   _5 = (_Bool) _7;
> +   to
> +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> +   _8 = _1 & 1;
> +   _5 = _8 == 0;
> +   and convert
> +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> +   _7 = ~_1;
> +   _4 = (_Bool) _7;
> +   to
> +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> +   _8 = _1 & 1;
> +   _4 = (_Bool) _8;
> +
> +   USE_STMT is the gimplt statement which uses the return value of
> +   __atomic_fetch_or_*.  LHS is the return value of __atomic_fetch_or_*.
> +   MASK is the mask passed to __atomic_fetch_or_*.
> + */
> +
> +static gimple *
> +convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
> +                       tree lhs, tree mask)
> +{
> +  tree and_mask;
> +  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +    {
> +      /* MASK must be ~1.  */
> +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> +                                          ~HOST_WIDE_INT_1), mask, 0))
> +       return nullptr;
> +      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> +    }
> +  else
> +    {
> +      /* MASK must be 1.  */
> +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs), 1), mask, 0))
> +       return nullptr;
> +      and_mask = mask;
> +    }
> +
> +  tree use_lhs = gimple_assign_lhs (use_stmt);
> +
> +  use_operand_p use_p;
> +  gimple *use_not_stmt;
> +
> +  if (!single_imm_use (use_lhs, &use_p, &use_not_stmt)
> +      || !is_gimple_assign (use_not_stmt))
> +    return nullptr;
> +
> +  if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_not_stmt)))
> +    return nullptr;
> +
> +  tree use_not_lhs = gimple_assign_lhs (use_not_stmt);
> +  if (TREE_CODE (TREE_TYPE (use_not_lhs)) != BOOLEAN_TYPE)
> +    return nullptr;
> +
> +  gimple_stmt_iterator gsi;
> +  gsi = gsi_for_stmt (use_stmt);
> +  gsi_remove (&gsi, true);
> +  tree var = make_ssa_name (TREE_TYPE (lhs));
> +  use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
> +  gsi = gsi_for_stmt (use_not_stmt);
> +  gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
> +  lhs = gimple_assign_lhs (use_not_stmt);
> +  gimple *g = gimple_build_assign (lhs, EQ_EXPR, var,
> +                                  build_zero_cst (TREE_TYPE (mask)));
> +  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +  gsi = gsi_for_stmt (use_not_stmt);
> +  gsi_remove (&gsi, true);
> +  return use_stmt;
> +}
> +
> +/* match.pd function to match atomic_bit_test_and pattern which
> +   has nop_convert:
> +     _1 = __atomic_fetch_or_4 (&v, 1, 0);
> +     _2 = (int) _1;
> +     _5 = _2 & 1;
> + */
> +extern bool gimple_nop_atomic_bit_test_and_p (tree, tree *,
> +                                             tree (*) (tree));
> +
>  /* Optimize
>       mask_2 = 1 << cnt_1;
>       _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> @@ -3269,7 +3353,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
>    tree lhs = gimple_call_lhs (call);
>    use_operand_p use_p;
>    gimple *use_stmt;
> -  tree mask, bit;
> +  tree mask;
>    optab optab;
>
>    if (!flag_inline_atomics
> @@ -3279,10 +3363,271 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
>        || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
>        || !single_imm_use (lhs, &use_p, &use_stmt)
>        || !is_gimple_assign (use_stmt)
> -      || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
>        || !gimple_vdef (call))
>      return;
>
> +  tree bit = nullptr;
> +
> +  mask = gimple_call_arg (call, 1);
> +  tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
> +  if (rhs_code != BIT_AND_EXPR)
> +    {
> +      if (rhs_code != NOP_EXPR && rhs_code != BIT_NOT_EXPR)
> +       return;
> +
> +      tree use_lhs = gimple_assign_lhs (use_stmt);
> +      if (TREE_CODE (use_lhs) == SSA_NAME
> +         && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
> +       return;
> +
> +      tree use_rhs = gimple_assign_rhs1 (use_stmt);
> +      if (lhs != use_rhs)
> +       return;
> +
> +      gimple *g;
> +      gimple_stmt_iterator gsi;
> +      tree var;
> +      int ibit = -1;
> +
> +      if (rhs_code == BIT_NOT_EXPR)
> +       {
> +         g = convert_atomic_bit_not (fn, use_stmt, lhs, mask);
> +         if (!g)
> +           return;
> +         use_stmt = g;
> +         ibit = 0;
> +       }
> +      else if (TREE_CODE (TREE_TYPE (use_lhs)) == BOOLEAN_TYPE)
> +       {
> +         tree and_mask;
> +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +           {
> +             /* MASK must be ~1.  */
> +             if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> +                                                  ~HOST_WIDE_INT_1),
> +                                   mask, 0))
> +               return;
> +
> +             /* Convert
> +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> +                _4 = (_Bool) _1;
> +                to
> +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> +                _5 = _1 & 1;
> +                _4 = (_Bool) _5;
> +              */
> +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> +           }
> +         else
> +           {
> +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> +             if (!operand_equal_p (and_mask, mask, 0))
> +               return;
> +
> +             /* Convert
> +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> +                _4 = (_Bool) _1;
> +                to
> +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> +                _5 = _1 & 1;
> +                _4 = (_Bool) _5;
> +              */
> +           }
> +         var = make_ssa_name (TREE_TYPE (use_rhs));
> +         replace_uses_by (use_rhs, var);
> +         g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> +                                  and_mask);
> +         gsi = gsi_for_stmt (use_stmt);
> +         gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> +         use_stmt = g;
> +         ibit = 0;
> +       }
> +      else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
> +              == TYPE_PRECISION (TREE_TYPE (use_rhs)))
> +       {
> +         gimple *use_nop_stmt;
> +         if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
> +             || !is_gimple_assign (use_nop_stmt))
> +           return;
> +         rhs_code = gimple_assign_rhs_code (use_nop_stmt);
> +         if (rhs_code != BIT_AND_EXPR)
> +           {
> +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> +             if (TREE_CODE (use_nop_lhs) == SSA_NAME
> +                 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
> +               return;
> +             if (rhs_code == BIT_NOT_EXPR)
> +               {
> +                 g = convert_atomic_bit_not (fn, use_nop_stmt, lhs,
> +                                             mask);
> +                 if (!g)
> +                   return;
> +                 /* Convert
> +                    _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> +                    _2 = (int) _1;
> +                    _7 = ~_2;
> +                    _5 = (_Bool) _7;
> +                    to
> +                    _1 = __atomic_fetch_or_4 (ptr_6, ~1, _3);
> +                    _8 = _1 & 1;
> +                    _5 = _8 == 0;
> +                    and convert
> +                    _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> +                    _2 = (int) _1;
> +                    _7 = ~_2;
> +                    _5 = (_Bool) _7;
> +                    to
> +                    _1 = __atomic_fetch_and_4 (ptr_6, 1, _3);
> +                    _8 = _1 & 1;
> +                    _5 = _8 == 0;
> +                  */
> +                 gsi = gsi_for_stmt (use_stmt);
> +                 gsi_remove (&gsi, true);
> +                 use_stmt = g;
> +                 ibit = 0;
> +               }
> +             else
> +               {
> +                 if (TREE_CODE (TREE_TYPE (use_nop_lhs)) != BOOLEAN_TYPE)
> +                   return;
> +                 if (rhs_code != GE_EXPR && rhs_code != LT_EXPR)
> +                   return;
> +                 tree cmp_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
> +                 if (use_lhs != cmp_rhs1)
> +                   return;
> +                 tree cmp_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
> +                 if (!integer_zerop (cmp_rhs2))
> +                   return;
> +
> +                 tree and_mask;
> +
> +                 unsigned HOST_WIDE_INT bytes
> +                   = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (use_rhs)));
> +                 ibit = bytes * BITS_PER_UNIT - 1;
> +                 unsigned HOST_WIDE_INT highest
> +                   = HOST_WIDE_INT_1U << ibit;
> +
> +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +                   {
> +                     /* Get the signed maximum of the USE_RHS type.  */
> +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> +                                               highest - 1);
> +                     if (!operand_equal_p (and_mask, mask, 0))
> +                       return;
> +
> +                     /* Convert
> +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> +                        _5 = (signed int) _1;
> +                        _4 = _5 < 0 or _5 >= 0;
> +                        to
> +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> +                        _6 = _1 & 0x80000000;
> +                        _4 = _6 != 0 or _6 == 0;
> +                      */
> +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> +                                               highest);
> +                   }
> +                 else
> +                   {
> +                     /* Get the signed minimum of the USE_RHS type.  */
> +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> +                                               highest);
> +                     if (!operand_equal_p (and_mask, mask, 0))
> +                       return;
> +
> +                     /* Convert
> +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> +                        _5 = (signed int) _1;
> +                        _4 = _5 < 0 or _5 >= 0;
> +                        to
> +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> +                        _6 = _1 & 0x80000000;
> +                        _4 = _6 != 0 or _6 == 0;
> +                      */
> +                   }
> +                 var = make_ssa_name (TREE_TYPE (use_rhs));
> +                 gsi = gsi_for_stmt (use_stmt);
> +                 gsi_remove (&gsi, true);
> +                 g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> +                                          and_mask);
> +                 gsi = gsi_for_stmt (use_nop_stmt);
> +                 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> +                 use_stmt = g;
> +                 g = gimple_build_assign (use_nop_lhs,
> +                                          (rhs_code == GE_EXPR
> +                                           ? EQ_EXPR : NE_EXPR),
> +                                          var,
> +                                          build_zero_cst (TREE_TYPE (use_rhs)));
> +                 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +                 gsi = gsi_for_stmt (use_nop_stmt);
> +                 gsi_remove (&gsi, true);
> +               }
> +           }
> +         else
> +           {
> +             tree and_expr = gimple_assign_lhs (use_nop_stmt);
> +             tree match_op[3];
> +             gimple *g;
> +             if (!gimple_nop_atomic_bit_test_and_p (and_expr,
> +                                                    &match_op[0], NULL)
> +                 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (match_op[2])
> +                 || !single_imm_use (match_op[2], &use_p, &g)
> +                 || !is_gimple_assign (g))
> +               return;
> +             mask = match_op[1];
> +             if (TREE_CODE (mask) == INTEGER_CST)
> +               {
> +                 ibit = tree_log2 (mask);
> +                 gcc_assert (ibit >= 0);
> +               }
> +             else
> +               {
> +                 g = SSA_NAME_DEF_STMT (mask);
> +                 gcc_assert (is_gimple_assign (g));
> +                 bit = gimple_assign_rhs2 (g);
> +               }
> +             /* Convert
> +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> +                _2 = (int) _1;
> +                _5 = _2 & mask;
> +                to
> +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> +                _6 = _1 & mask;
> +                _5 = (int) _6;
> +                and convert
> +                _1 = ~mask_7;
> +                _2 = (unsigned int) _1;
> +                _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
> +                _4 = (int) _3;
> +                _5 = _4 & mask_7;
> +                to
> +                _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
> +                _12 = _3 & mask_7;
> +                _5 = (int) _12;
> +              */
> +             replace_uses_by (use_lhs, lhs);
> +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> +             var = make_ssa_name (TREE_TYPE (use_nop_lhs));
> +             gimple_assign_set_lhs (use_nop_stmt, var);
> +             gsi = gsi_for_stmt (use_stmt);
> +             gsi_remove (&gsi, true);
> +             release_defs (use_stmt);
> +             gsi_remove (gsip, true);
> +             g = gimple_build_assign (use_nop_lhs, NOP_EXPR, var);
> +             gsi = gsi_for_stmt (use_nop_stmt);
> +             gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +             use_stmt = use_nop_stmt;
> +           }
> +       }
> +
> +      if (!bit)
> +       {
> +         if (ibit < 0)
> +           gcc_unreachable ();
> +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> +       }
> +    }
> +
>    switch (fn)
>      {
>      case IFN_ATOMIC_BIT_TEST_AND_SET:
> @@ -3301,51 +3646,76 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
>    if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
>      return;
>
> -  mask = gimple_call_arg (call, 1);
>    tree use_lhs = gimple_assign_lhs (use_stmt);
>    if (!use_lhs)
>      return;
>
> -  if (TREE_CODE (mask) == INTEGER_CST)
> -    {
> -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> -       mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> -      mask = fold_convert (TREE_TYPE (lhs), mask);
> -      int ibit = tree_log2 (mask);
> -      if (ibit < 0)
> -       return;
> -      bit = build_int_cst (TREE_TYPE (lhs), ibit);
> -    }
> -  else if (TREE_CODE (mask) == SSA_NAME)
> +  if (!bit)
>      {
> -      gimple *g = SSA_NAME_DEF_STMT (mask);
> -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +      if (TREE_CODE (mask) == INTEGER_CST)
>         {
> -         if (!is_gimple_assign (g)
> -             || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +           mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> +         mask = fold_convert (TREE_TYPE (lhs), mask);
> +         int ibit = tree_log2 (mask);
> +         if (ibit < 0)
>             return;
> -         mask = gimple_assign_rhs1 (g);
> -         if (TREE_CODE (mask) != SSA_NAME)
> +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> +       }
> +      else if (TREE_CODE (mask) == SSA_NAME)
> +       {
> +         gimple *g = SSA_NAME_DEF_STMT (mask);
> +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +           {
> +             if (!is_gimple_assign (g)
> +                 || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> +               return;
> +             mask = gimple_assign_rhs1 (g);
> +             if (TREE_CODE (mask) != SSA_NAME)
> +               return;
> +             g = SSA_NAME_DEF_STMT (mask);
> +           }
> +         if (!is_gimple_assign (g))
>             return;
> -         g = SSA_NAME_DEF_STMT (mask);
> +         rhs_code = gimple_assign_rhs_code (g);
> +         if (rhs_code != LSHIFT_EXPR)
> +           {
> +             if (rhs_code != NOP_EXPR)
> +               return;
> +
> +             /* Handle
> +                _1 = 1 << bit_4(D);
> +                mask_5 = (unsigned int) _1;
> +                _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
> +                _3 = _2 & mask_5;
> +                */
> +             tree nop_lhs = gimple_assign_lhs (g);
> +             tree nop_rhs = gimple_assign_rhs1 (g);
> +             if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
> +                 != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
> +               return;
> +             g = SSA_NAME_DEF_STMT (nop_rhs);
> +             if (!is_gimple_assign (g)
> +                 || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
> +               return;
> +           }
> +         if (!integer_onep (gimple_assign_rhs1 (g)))
> +           return;
> +         bit = gimple_assign_rhs2 (g);
>         }
> -      if (!is_gimple_assign (g)
> -         || gimple_assign_rhs_code (g) != LSHIFT_EXPR
> -         || !integer_onep (gimple_assign_rhs1 (g)))
> +      else
>         return;
> -      bit = gimple_assign_rhs2 (g);
> -    }
> -  else
> -    return;
>
> -  if (gimple_assign_rhs1 (use_stmt) == lhs)
> -    {
> -      if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> +      if (gimple_assign_rhs1 (use_stmt) == lhs)
> +       {
> +         if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> +           return;
> +       }
> +      else if (gimple_assign_rhs2 (use_stmt) != lhs
> +              || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
> +                                   mask, 0))
>         return;
>      }
> -  else if (gimple_assign_rhs2 (use_stmt) != lhs
> -          || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
> -    return;
>
>    bool use_bool = true;
>    bool has_debug_uses = false;
> @@ -3434,18 +3804,20 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
>          of the specified bit after the atomic operation (makes only sense
>          for xor, otherwise the bit content is compile time known),
>          we need to invert the bit.  */
> -      g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
> -                              BIT_XOR_EXPR, new_lhs,
> -                              use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
> -                                       : mask);
> -      new_lhs = gimple_assign_lhs (g);
> +      tree mask_convert = mask;
> +      gimple_seq stmts = NULL;
> +      if (!use_bool)
> +       mask_convert = gimple_convert (&stmts, TREE_TYPE (lhs), mask);
> +      new_lhs = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (lhs), new_lhs,
> +                             use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
> +                                      : mask_convert);
>        if (throws)
>         {
> -         gsi_insert_on_edge_immediate (e, g);
> -         gsi = gsi_for_stmt (g);
> +         gsi_insert_seq_on_edge_immediate (e, stmts);
> +         gsi = gsi_for_stmt (gimple_seq_last (stmts));
>         }
>        else
> -       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +       gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
>      }
>    if (use_bool && has_debug_uses)
>      {
> --
> 2.18.1
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-11-10  8:29 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-10 13:49 [PATCH v4] Improve integer bit test on __atomic_fetch_[or|and]_* returns H.J. Lu
2021-10-13 12:34 ` Richard Biener
2021-10-21 11:15   ` Hongtao Liu
2021-10-26  8:16     ` Richard Biener
2021-11-04  1:27       ` [PATCH v5] " liuhongt
2021-11-09 12:48         ` Richard Biener
2021-11-10  5:20           ` [PATCH] " liuhongt
2021-11-10  8:28             ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).