public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] i386: Add peephole2 for __atomic_sub_fetch (x, y, z) == 0 [PR98737]
@ 2021-01-27  9:20 Jakub Jelinek
  2021-01-27 10:22 ` Uros Bizjak
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Jelinek @ 2021-01-27  9:20 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

Hi!

This patch adds a peephole2 for the optimization requested in the PR,
namely that we emit awful code for __atomic_sub_fetch (x, y, z) == 0
or __atomic_sub_fetch (x, y, z) != 0 when y is not constant.
This can't be done in the combiner which punts on combining UNSPEC_VOLATILE
into other insns.

For other ops we'd need different peephole2s, this one is specific with its
comparison instruction and negation that need to be matched.

Bootstrapped/regtested on x86_64-linux and i686-linux.  Is this ok for trunk
(as exception), or for GCC 12?

2021-01-27  Jakub Jelinek  <jakub@redhat.com>

	PR target/98737
	* config/i386/sync.md (neg; mov; lock xadd; add peephole2): New
	define_peephole2.
	(*atomic_fetch_sub_cmp<mode>): New define_insn.

	* gcc.target/i386/pr98737.c: New test.

--- gcc/config/i386/sync.md.jj	2021-01-04 10:25:45.392159555 +0100
+++ gcc/config/i386/sync.md	2021-01-26 16:03:13.911100510 +0100
@@ -777,6 +777,63 @@ (define_insn "*atomic_fetch_add_cmp<mode
   return "lock{%;} %K3add{<imodesuffix>}\t{%1, %0|%0, %1}";
 })
 
+;; Similarly, peephole for __sync_sub_fetch (x, b) == 0 into just
+;; lock sub followed by testing of flags instead of lock xadd, negation and
+;; comparison.
+(define_peephole2
+  [(parallel [(set (match_operand 0 "register_operand")
+		   (neg (match_dup 0)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_operand:SWI 1 "register_operand")
+	(match_operand:SWI 2 "register_operand"))
+   (parallel [(set (match_operand:SWI 3 "register_operand")
+		   (unspec_volatile:SWI
+		     [(match_operand:SWI 4 "memory_operand")
+		      (match_operand:SI 5 "const_int_operand")]
+		     UNSPECV_XCHG))
+	      (set (match_dup 4)
+		   (plus:SWI (match_dup 4)
+			     (match_dup 3)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (reg:CCZ FLAGS_REG)
+		   (compare:CCZ (neg:SWI
+				  (match_operand:SWI 6 "register_operand"))
+				(match_dup 3)))
+	      (clobber (match_dup 3))])]
+  "(GET_MODE (operands[0]) == <LEAMODE>mode
+    || GET_MODE (operands[0]) == <MODE>mode)
+   && reg_or_subregno (operands[0]) == reg_or_subregno (operands[2])
+   && (rtx_equal_p (operands[2], operands[3])
+       ? rtx_equal_p (operands[1], operands[6])
+       : (rtx_equal_p (operands[2], operands[6])
+	  && rtx_equal_p (operands[1], operands[3])))
+   && peep2_reg_dead_p (4, operands[6])
+   && peep2_reg_dead_p (4, operands[3])
+   && !reg_overlap_mentioned_p (operands[1], operands[4])
+   && !reg_overlap_mentioned_p (operands[2], operands[4])"
+  [(parallel [(set (reg:CCZ FLAGS_REG)
+		   (compare:CCZ
+		     (unspec_volatile:SWI [(match_dup 4) (match_dup 5)]
+					  UNSPECV_XCHG)
+		     (match_dup 2)))
+	      (set (match_dup 4)
+		   (minus:SWI (match_dup 4)
+			      (match_dup 2)))])])
+
+(define_insn "*atomic_fetch_sub_cmp<mode>"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ
+	  (unspec_volatile:SWI
+	    [(match_operand:SWI 0 "memory_operand" "+m")
+	     (match_operand:SI 2 "const_int_operand")]		;; model
+	    UNSPECV_XCHG)
+	  (match_operand:SWI 1 "register_operand" "r")))
+   (set (match_dup 0)
+	(minus:SWI (match_dup 0)
+		   (match_dup 1)))]
+  ""
+  "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}")
+
 ;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
 ;; In addition, it is always a full barrier, so we can ignore the memory model.
 (define_insn "atomic_exchange<mode>"
--- gcc/testsuite/gcc.target/i386/pr98737.c.jj	2021-01-26 15:59:24.640620178 +0100
+++ gcc/testsuite/gcc.target/i386/pr98737.c	2021-01-26 16:00:02.898205888 +0100
@@ -0,0 +1,38 @@
+/* PR target/98737 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-march=i686" { target ia32 } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*subq\t" { target lp64 } } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*subl\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*subw\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*subb\t" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*xadd" } } */
+
+long a;
+int b;
+short c;
+char d;
+
+int
+foo (long x)
+{
+  return __atomic_sub_fetch (&a, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+bar (int x)
+{
+  return __atomic_sub_fetch (&b, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+baz (short x)
+{
+  return __atomic_sub_fetch (&c, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+qux (char x)
+{
+  return __atomic_sub_fetch (&d, x, __ATOMIC_RELEASE) == 0;
+}

	Jakub


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] i386: Add peephole2 for __atomic_sub_fetch (x, y, z) == 0 [PR98737]
  2021-01-27  9:20 [PATCH] i386: Add peephole2 for __atomic_sub_fetch (x, y, z) == 0 [PR98737] Jakub Jelinek
@ 2021-01-27 10:22 ` Uros Bizjak
  2021-01-27 10:37   ` Jakub Jelinek
  0 siblings, 1 reply; 6+ messages in thread
From: Uros Bizjak @ 2021-01-27 10:22 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On Wed, Jan 27, 2021 at 10:20 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> This patch adds a peephole2 for the optimization requested in the PR,
> namely that we emit awful code for __atomic_sub_fetch (x, y, z) == 0
> or __atomic_sub_fetch (x, y, z) != 0 when y is not constant.
> This can't be done in the combiner which punts on combining UNSPEC_VOLATILE
> into other insns.
>
> For other ops we'd need different peephole2s, this one is specific with its
> comparison instruction and negation that need to be matched.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux.  Is this ok for trunk
> (as exception), or for GCC 12?

If there is no urgent need, I'd rather see to obey stage-4 and wait
for gcc-12. There is PR98375 meta bug to track gcc-12 pending patches.

> 2021-01-27  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/98737
>         * config/i386/sync.md (neg; mov; lock xadd; add peephole2): New
>         define_peephole2.
>         (*atomic_fetch_sub_cmp<mode>): New define_insn.
>
>         * gcc.target/i386/pr98737.c: New test.

OK, although this peephole is quite complex and matched sequence is
easily perturbed. Please note that reg-reg move is due to RA to
satisfy register constraint; if the value is already in the right
register, then the sequence won't match. Do we need additional pattern
with reg-reg move omitted?

In the PR, Ulrich suggested to also handle other arith/logic
operations, but matching these would be even harder, as they are
emitted using cmpxchg loop. Maybe middle-end could emit a special
version of the "boolean" atomic insn, if only flags are needed?

Uros.

> --- gcc/config/i386/sync.md.jj  2021-01-04 10:25:45.392159555 +0100
> +++ gcc/config/i386/sync.md     2021-01-26 16:03:13.911100510 +0100
> @@ -777,6 +777,63 @@ (define_insn "*atomic_fetch_add_cmp<mode
>    return "lock{%;} %K3add{<imodesuffix>}\t{%1, %0|%0, %1}";
>  })
>
> +;; Similarly, peephole for __sync_sub_fetch (x, b) == 0 into just
> +;; lock sub followed by testing of flags instead of lock xadd, negation and
> +;; comparison.
> +(define_peephole2
> +  [(parallel [(set (match_operand 0 "register_operand")
> +                  (neg (match_dup 0)))
> +             (clobber (reg:CC FLAGS_REG))])
> +   (set (match_operand:SWI 1 "register_operand")
> +       (match_operand:SWI 2 "register_operand"))
> +   (parallel [(set (match_operand:SWI 3 "register_operand")
> +                  (unspec_volatile:SWI
> +                    [(match_operand:SWI 4 "memory_operand")
> +                     (match_operand:SI 5 "const_int_operand")]
> +                    UNSPECV_XCHG))
> +             (set (match_dup 4)
> +                  (plus:SWI (match_dup 4)
> +                            (match_dup 3)))
> +             (clobber (reg:CC FLAGS_REG))])
> +   (parallel [(set (reg:CCZ FLAGS_REG)
> +                  (compare:CCZ (neg:SWI
> +                                 (match_operand:SWI 6 "register_operand"))
> +                               (match_dup 3)))
> +             (clobber (match_dup 3))])]
> +  "(GET_MODE (operands[0]) == <LEAMODE>mode
> +    || GET_MODE (operands[0]) == <MODE>mode)
> +   && reg_or_subregno (operands[0]) == reg_or_subregno (operands[2])
> +   && (rtx_equal_p (operands[2], operands[3])
> +       ? rtx_equal_p (operands[1], operands[6])
> +       : (rtx_equal_p (operands[2], operands[6])
> +         && rtx_equal_p (operands[1], operands[3])))
> +   && peep2_reg_dead_p (4, operands[6])
> +   && peep2_reg_dead_p (4, operands[3])
> +   && !reg_overlap_mentioned_p (operands[1], operands[4])
> +   && !reg_overlap_mentioned_p (operands[2], operands[4])"
> +  [(parallel [(set (reg:CCZ FLAGS_REG)
> +                  (compare:CCZ
> +                    (unspec_volatile:SWI [(match_dup 4) (match_dup 5)]
> +                                         UNSPECV_XCHG)
> +                    (match_dup 2)))
> +             (set (match_dup 4)
> +                  (minus:SWI (match_dup 4)
> +                             (match_dup 2)))])])
> +
> +(define_insn "*atomic_fetch_sub_cmp<mode>"
> +  [(set (reg:CCZ FLAGS_REG)
> +       (compare:CCZ
> +         (unspec_volatile:SWI
> +           [(match_operand:SWI 0 "memory_operand" "+m")
> +            (match_operand:SI 2 "const_int_operand")]          ;; model
> +           UNSPECV_XCHG)
> +         (match_operand:SWI 1 "register_operand" "r")))
> +   (set (match_dup 0)
> +       (minus:SWI (match_dup 0)
> +                  (match_dup 1)))]
> +  ""
> +  "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}")
> +
>  ;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
>  ;; In addition, it is always a full barrier, so we can ignore the memory model.
>  (define_insn "atomic_exchange<mode>"
> --- gcc/testsuite/gcc.target/i386/pr98737.c.jj  2021-01-26 15:59:24.640620178 +0100
> +++ gcc/testsuite/gcc.target/i386/pr98737.c     2021-01-26 16:00:02.898205888 +0100
> @@ -0,0 +1,38 @@
> +/* PR target/98737 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -masm=att" } */
> +/* { dg-additional-options "-march=i686" { target ia32 } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*subq\t" { target lp64 } } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*subl\t" } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*subw\t" } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*subb\t" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\r]\*xadd" } } */
> +
> +long a;
> +int b;
> +short c;
> +char d;
> +
> +int
> +foo (long x)
> +{
> +  return __atomic_sub_fetch (&a, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +bar (int x)
> +{
> +  return __atomic_sub_fetch (&b, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +baz (short x)
> +{
> +  return __atomic_sub_fetch (&c, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +qux (char x)
> +{
> +  return __atomic_sub_fetch (&d, x, __ATOMIC_RELEASE) == 0;
> +}
>
>         Jakub
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] i386: Add peephole2 for __atomic_sub_fetch (x, y, z) == 0 [PR98737]
  2021-01-27 10:22 ` Uros Bizjak
@ 2021-01-27 10:37   ` Jakub Jelinek
  2021-01-27 11:27     ` Ulrich Drepper
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Jelinek @ 2021-01-27 10:37 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches, Ulrich Drepper

On Wed, Jan 27, 2021 at 11:22:57AM +0100, Uros Bizjak wrote:
> > Bootstrapped/regtested on x86_64-linux and i686-linux.  Is this ok for trunk
> > (as exception), or for GCC 12?
> 
> If there is no urgent need, I'd rather see to obey stage-4 and wait
> for gcc-12. There is PR98375 meta bug to track gcc-12 pending patches.

Okay.

> > 2021-01-27  Jakub Jelinek  <jakub@redhat.com>
> >
> >         PR target/98737
> >         * config/i386/sync.md (neg; mov; lock xadd; add peephole2): New
> >         define_peephole2.
> >         (*atomic_fetch_sub_cmp<mode>): New define_insn.
> >
> >         * gcc.target/i386/pr98737.c: New test.
> 
> OK, although this peephole is quite complex and matched sequence is
> easily perturbed. Please note that reg-reg move is due to RA to
> satisfy register constraint; if the value is already in the right
> register, then the sequence won't match. Do we need additional pattern
> with reg-reg move omitted?

If there is no reg-reg move, then it is impossible to prove that it is a
negation.  The use of lock xadd forces addition instead of subtraction,
and additionally modifies its result, so for the comparison one needs
another register that holds the same value as the xadd initially.  And
we need to prove it is a negation.

> In the PR, Ulrich suggested to also handle other arith/logic
> operations, but matching these would be even harder, as they are
> emitted using cmpxchg loop. Maybe middle-end could emit a special
> version of the "boolean" atomic insn, if only flags are needed?

I guess we could add new optabs for the atomic builtins whose result
with the *_fetch operation rather than fetch_* is ==/!= compared against 0,
not sure if we could do anything else easily, because what exact kind of
comparison it is then is heavily machine dependent and the backend would
then need to emit everything including branches (like e.g. the addv<mode>4
etc. expanders).
Would equality comparison against 0 handle the most common cases.

The user can write it as
__atomic_sub_fetch (x, y, z) == 0
or
__atomic_fetch_sub (x, y, z) - y == 0
thouch, so the expansion code would need to be able to cope with both.
And the latter form is where all kinds of interfering optimizations pop up,
e.g. for the subtraction it will be actually optimized into
__atomic_fetch_sub (x, y, z) == y

	Jakub


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] i386: Add peephole2 for __atomic_sub_fetch (x, y, z) == 0 [PR98737]
  2021-01-27 10:37   ` Jakub Jelinek
@ 2021-01-27 11:27     ` Ulrich Drepper
  2021-12-15  9:22       ` [PATCH] i386, fab: Optimize __atomic_{add,sub,and,or,xor}_fetch (x, y, z) {==,!=,<,<=,>,>=} " Jakub Jelinek
  0 siblings, 1 reply; 6+ messages in thread
From: Ulrich Drepper @ 2021-01-27 11:27 UTC (permalink / raw)
  To: Jakub Jelinek, Uros Bizjak; +Cc: gcc-patches


[-- Attachment #1.1: Type: text/plain, Size: 419 bytes --]

On 1/27/21 11:37 AM, Jakub Jelinek wrote:
> Would equality comparison against 0 handle the most common cases.
> 
> The user can write it as
> __atomic_sub_fetch (x, y, z) == 0
> or
> __atomic_fetch_sub (x, y, z) - y == 0
> thouch, so the expansion code would need to be able to cope with both.

Please also keep !=0, <0, <=0, >0, and >=0 in mind.  They all can be
useful and can be handled with the flags.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 203 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] i386, fab: Optimize __atomic_{add,sub,and,or,xor}_fetch (x, y, z) {==,!=,<,<=,>,>=} 0 [PR98737]
  2021-01-27 11:27     ` Ulrich Drepper
@ 2021-12-15  9:22       ` Jakub Jelinek
  2021-12-15 10:54         ` Uros Bizjak
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Jelinek @ 2021-12-15  9:22 UTC (permalink / raw)
  To: Uros Bizjak, Jeff Law; +Cc: Ulrich Drepper, gcc-patches

On Wed, Jan 27, 2021 at 12:27:13PM +0100, Ulrich Drepper via Gcc-patches wrote:
> On 1/27/21 11:37 AM, Jakub Jelinek wrote:
> > Would equality comparison against 0 handle the most common cases.
> > 
> > The user can write it as
> > __atomic_sub_fetch (x, y, z) == 0
> > or
> > __atomic_fetch_sub (x, y, z) - y == 0
> > thouch, so the expansion code would need to be able to cope with both.
> 
> Please also keep !=0, <0, <=0, >0, and >=0 in mind.  They all can be
> useful and can be handled with the flags.

<= 0 and > 0 don't really work well with lock {add,sub,inc,dec}, x86 doesn't
have comparisons that would look solely at both SF and ZF and not at other
flags (and emitting two separate conditional jumps or two setcc insns and
oring them together looks awful).

But the rest can work.

Here is a patch that adds internal functions and optabs for these,
recognizes them at the same spot as e.g. .ATOMIC_BIT_TEST_AND* internal
functions (fold all builtins pass) and expands them appropriately (or for
the <= 0 and > 0 cases of +/- FAILs and let's middle-end fall back).

So far I have handled just the op_fetch builtins, IMHO instead of handling
also __atomic_fetch_sub (x, y, z) - y == 0 etc. we should canonicalize
__atomic_fetch_sub (x, y, z) - y to __atomic_sub_fetch (x, y, z) (and vice
versa).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-12-15  Jakub Jelinek  <jakub@redhat.com>

	PR target/98737
	* internal-fn.def (ATOMIC_ADD_FETCH_CMP_0, ATOMIC_SUB_FETCH_CMP_0,
	ATOMIC_AND_FETCH_CMP_0, ATOMIC_OR_FETCH_CMP_0, ATOMIC_XOR_FETCH_CMP_0):
	New internal fns.
	* internal-fn.h (ATOMIC_OP_FETCH_CMP_0_EQ, ATOMIC_OP_FETCH_CMP_0_NE,
	ATOMIC_OP_FETCH_CMP_0_LT, ATOMIC_OP_FETCH_CMP_0_LE,
	ATOMIC_OP_FETCH_CMP_0_GT, ATOMIC_OP_FETCH_CMP_0_GE): New enumerators.
	* internal-fn.c (expand_ATOMIC_ADD_FETCH_CMP_0,
	expand_ATOMIC_SUB_FETCH_CMP_0, expand_ATOMIC_AND_FETCH_CMP_0,
	expand_ATOMIC_OR_FETCH_CMP_0, expand_ATOMIC_XOR_FETCH_CMP_0): New
	functions.
	* optabs.def (atomic_add_fetch_cmp_0_optab,
	atomic_sub_fetch_cmp_0_optab, atomic_and_fetch_cmp_0_optab,
	atomic_or_fetch_cmp_0_optab, atomic_xor_fetch_cmp_0_optab): New
	direct optabs.
	* builtins.h (expand_ifn_atomic_op_fetch_cmp_0): Declare.
	* builtins.c (expand_ifn_atomic_op_fetch_cmp_0): New function.
	* tree-ssa-ccp.c: Include internal-fn.h.
	(optimize_atomic_bit_test_and): Add . before internal fn call
	in function comment.  Change return type from void to bool and
	return true only if successfully replaced.
	(optimize_atomic_op_fetch_cmp_0): New function.
	(pass_fold_builtins::execute): Use optimize_atomic_op_fetch_cmp_0
	for BUILT_IN_ATOMIC_{ADD,SUB,AND,OR,XOR}_FETCH_{1,2,4,8,16} and
	BUILT_IN_SYNC_{ADD,SUB,AND,OR,XOR}_AND_FETCH_{1,2,4,8,16},
	for *XOR* ones only if optimize_atomic_bit_test_and failed.
	* config/i386/sync.md (atomic_<plusminus_mnemonic>_fetch_cmp_0<mode>,
	atomic_<logic>_fetch_cmp_0<mode>): New define_expand patterns.
	(atomic_<plusminus_mnemonic>_fetch_cmp_0<mode>_1,
	atomic_<logic>_fetch_cmp_0<mode>_1): New define_insn patterns.

	* gcc.target/i386/pr98737-1.c: New test.
	* gcc.target/i386/pr98737-2.c: New test.
	* gcc.target/i386/pr98737-3.c: New test.
	* gcc.target/i386/pr98737-4.c: New test.
	* gcc.target/i386/pr98737-5.c: New test.
	* gcc.target/i386/pr98737-6.c: New test.
	* gcc.target/i386/pr98737-7.c: New test.

--- gcc/internal-fn.def.jj	2021-11-30 13:26:09.323329485 +0100
+++ gcc/internal-fn.def	2021-12-13 12:12:10.947053554 +0100
@@ -403,6 +403,11 @@ DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_SET
 DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_COMPLEMENT, ECF_LEAF, NULL)
 DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_RESET, ECF_LEAF, NULL)
 DEF_INTERNAL_FN (ATOMIC_COMPARE_EXCHANGE, ECF_LEAF, NULL)
+DEF_INTERNAL_FN (ATOMIC_ADD_FETCH_CMP_0, ECF_LEAF, NULL)
+DEF_INTERNAL_FN (ATOMIC_SUB_FETCH_CMP_0, ECF_LEAF, NULL)
+DEF_INTERNAL_FN (ATOMIC_AND_FETCH_CMP_0, ECF_LEAF, NULL)
+DEF_INTERNAL_FN (ATOMIC_OR_FETCH_CMP_0, ECF_LEAF, NULL)
+DEF_INTERNAL_FN (ATOMIC_XOR_FETCH_CMP_0, ECF_LEAF, NULL)
 
 /* To implement [[fallthrough]].  */
 DEF_INTERNAL_FN (FALLTHROUGH, ECF_LEAF | ECF_NOTHROW, NULL)
--- gcc/internal-fn.h.jj	2021-11-30 13:26:09.324329471 +0100
+++ gcc/internal-fn.h	2021-12-13 19:17:03.491728748 +0100
@@ -240,4 +240,13 @@ extern void expand_SHUFFLEVECTOR (intern
 
 extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
 
+enum {
+  ATOMIC_OP_FETCH_CMP_0_EQ = 0,
+  ATOMIC_OP_FETCH_CMP_0_NE = 1,
+  ATOMIC_OP_FETCH_CMP_0_LT = 2,
+  ATOMIC_OP_FETCH_CMP_0_LE = 3,
+  ATOMIC_OP_FETCH_CMP_0_GT = 4,
+  ATOMIC_OP_FETCH_CMP_0_GE = 5
+};
+
 #endif
--- gcc/internal-fn.c.jj	2021-12-02 19:41:52.635552695 +0100
+++ gcc/internal-fn.c	2021-12-13 12:19:51.504465053 +0100
@@ -3238,6 +3238,46 @@ expand_ATOMIC_COMPARE_EXCHANGE (internal
   expand_ifn_atomic_compare_exchange (call);
 }
 
+/* Expand atomic add fetch and cmp with 0.  */
+
+static void
+expand_ATOMIC_ADD_FETCH_CMP_0 (internal_fn, gcall *call)
+{
+  expand_ifn_atomic_op_fetch_cmp_0 (call);
+}
+
+/* Expand atomic sub fetch and cmp with 0.  */
+
+static void
+expand_ATOMIC_SUB_FETCH_CMP_0 (internal_fn, gcall *call)
+{
+  expand_ifn_atomic_op_fetch_cmp_0 (call);
+}
+
+/* Expand atomic and fetch and cmp with 0.  */
+
+static void
+expand_ATOMIC_AND_FETCH_CMP_0 (internal_fn, gcall *call)
+{
+  expand_ifn_atomic_op_fetch_cmp_0 (call);
+}
+
+/* Expand atomic or fetch and cmp with 0.  */
+
+static void
+expand_ATOMIC_OR_FETCH_CMP_0 (internal_fn, gcall *call)
+{
+  expand_ifn_atomic_op_fetch_cmp_0 (call);
+}
+
+/* Expand atomic xor fetch and cmp with 0.  */
+
+static void
+expand_ATOMIC_XOR_FETCH_CMP_0 (internal_fn, gcall *call)
+{
+  expand_ifn_atomic_op_fetch_cmp_0 (call);
+}
+
 /* Expand LAUNDER to assignment, lhs = arg0.  */
 
 static void
--- gcc/optabs.def.jj	2021-11-30 13:26:09.357328990 +0100
+++ gcc/optabs.def	2021-12-13 14:52:40.180933731 +0100
@@ -451,6 +451,11 @@ OPTAB_D (atomic_sub_fetch_optab, "atomic
 OPTAB_D (atomic_sub_optab, "atomic_sub$I$a")
 OPTAB_D (atomic_xor_fetch_optab, "atomic_xor_fetch$I$a")
 OPTAB_D (atomic_xor_optab, "atomic_xor$I$a")
+OPTAB_D (atomic_add_fetch_cmp_0_optab, "atomic_add_fetch_cmp_0$I$a")
+OPTAB_D (atomic_sub_fetch_cmp_0_optab, "atomic_sub_fetch_cmp_0$I$a")
+OPTAB_D (atomic_and_fetch_cmp_0_optab, "atomic_and_fetch_cmp_0$I$a")
+OPTAB_D (atomic_or_fetch_cmp_0_optab, "atomic_or_fetch_cmp_0$I$a")
+OPTAB_D (atomic_xor_fetch_cmp_0_optab, "atomic_xor_fetch_cmp_0$I$a")
 
 OPTAB_D (get_thread_pointer_optab, "get_thread_pointer$I$a")
 OPTAB_D (set_thread_pointer_optab, "set_thread_pointer$I$a")
--- gcc/builtins.h.jj	2021-11-30 13:26:09.254330489 +0100
+++ gcc/builtins.h	2021-12-13 15:00:29.585187247 +0100
@@ -123,6 +123,7 @@ extern void std_expand_builtin_va_start
 extern void expand_builtin_trap (void);
 extern void expand_ifn_atomic_bit_test_and (gcall *);
 extern void expand_ifn_atomic_compare_exchange (gcall *);
+extern void expand_ifn_atomic_op_fetch_cmp_0 (gcall *);
 extern rtx expand_builtin (tree, rtx, rtx, machine_mode, int);
 extern enum built_in_function builtin_mathfn_code (const_tree);
 extern tree fold_builtin_expect (location_t, tree, tree, tree, tree);
--- gcc/builtins.c.jj	2021-11-30 13:26:09.254330489 +0100
+++ gcc/builtins.c	2021-12-14 10:21:28.524814726 +0100
@@ -6275,6 +6275,93 @@ expand_ifn_atomic_bit_test_and (gcall *c
     emit_move_insn (target, result);
 }
 
+/* Expand IFN_ATOMIC_*_FETCH_CMP_0 internal function.  */
+
+void
+expand_ifn_atomic_op_fetch_cmp_0 (gcall *call)
+{
+  tree cmp = gimple_call_arg (call, 0);
+  tree ptr = gimple_call_arg (call, 1);
+  tree arg = gimple_call_arg (call, 2);
+  tree lhs = gimple_call_lhs (call);
+  enum memmodel model = MEMMODEL_SYNC_SEQ_CST;
+  machine_mode mode = TYPE_MODE (TREE_TYPE (cmp));
+  optab optab;
+  rtx_code code;
+  class expand_operand ops[5];
+
+  gcc_assert (flag_inline_atomics);
+
+  if (gimple_call_num_args (call) == 4)
+    model = get_memmodel (gimple_call_arg (call, 3));
+
+  rtx mem = get_builtin_sync_mem (ptr, mode);
+  rtx op = expand_expr_force_mode (arg, mode);
+
+  switch (gimple_call_internal_fn (call))
+    {
+    case IFN_ATOMIC_ADD_FETCH_CMP_0:
+      code = PLUS;
+      optab = atomic_add_fetch_cmp_0_optab;
+      break;
+    case IFN_ATOMIC_SUB_FETCH_CMP_0:
+      code = MINUS;
+      optab = atomic_sub_fetch_cmp_0_optab;
+      break;
+    case IFN_ATOMIC_AND_FETCH_CMP_0:
+      code = AND;
+      optab = atomic_and_fetch_cmp_0_optab;
+      break;
+    case IFN_ATOMIC_OR_FETCH_CMP_0:
+      code = IOR;
+      optab = atomic_or_fetch_cmp_0_optab;
+      break;
+    case IFN_ATOMIC_XOR_FETCH_CMP_0:
+      code = XOR;
+      optab = atomic_xor_fetch_cmp_0_optab;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  enum rtx_code comp = UNKNOWN;
+  switch (tree_to_uhwi (cmp))
+    {
+    case ATOMIC_OP_FETCH_CMP_0_EQ: comp = EQ; break;
+    case ATOMIC_OP_FETCH_CMP_0_NE: comp = NE; break;
+    case ATOMIC_OP_FETCH_CMP_0_GT: comp = GT; break;
+    case ATOMIC_OP_FETCH_CMP_0_GE: comp = GE; break;
+    case ATOMIC_OP_FETCH_CMP_0_LT: comp = LT; break;
+    case ATOMIC_OP_FETCH_CMP_0_LE: comp = LE; break;
+    default: gcc_unreachable ();
+    }
+
+  rtx target;
+  if (lhs == NULL_TREE)
+    target = gen_reg_rtx (TYPE_MODE (boolean_type_node));
+  else
+    target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  enum insn_code icode = direct_optab_handler (optab, mode);
+  gcc_assert (icode != CODE_FOR_nothing);
+  create_output_operand (&ops[0], target, TYPE_MODE (boolean_type_node));
+  create_fixed_operand (&ops[1], mem);
+  create_convert_operand_to (&ops[2], op, mode, true);
+  create_integer_operand (&ops[3], model);
+  create_integer_operand (&ops[4], comp);
+  if (maybe_expand_insn (icode, 5, ops))
+    return;
+
+  rtx result = expand_atomic_fetch_op (gen_reg_rtx (mode), mem, op,
+				       code, model, true);
+  if (lhs)
+    {
+      result = emit_store_flag_force (target, comp, result, const0_rtx, mode,
+				      0, 1);
+      if (result != target)
+	emit_move_insn (target, result);
+    }
+}
+
 /* Expand an atomic clear operation.
 	void _atomic_clear (BOOL *obj, enum memmodel)
    EXP is the call expression.  */
--- gcc/tree-ssa-ccp.c.jj	2021-11-24 09:54:11.572737923 +0100
+++ gcc/tree-ssa-ccp.c	2021-12-14 10:24:00.394632973 +0100
@@ -151,6 +151,7 @@ along with GCC; see the file COPYING3.
 #include "symbol-summary.h"
 #include "ipa-utils.h"
 #include "ipa-prop.h"
+#include "internal-fn.h"
 
 /* Possible lattice values.  */
 typedef enum
@@ -3333,7 +3334,7 @@ extern bool gimple_nop_convert (tree, tr
      _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
      _5 = _4 & mask_2;
    to
-     _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
+     _4 = .ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
      _5 = _4;
    If _5 is only used in _5 != 0 or _5 == 0 comparisons, 1
    is passed instead of 0, and the builtin just returns a zero
@@ -3345,7 +3346,7 @@ extern bool gimple_nop_convert (tree, tr
    the second argument to the builtin needs to be one's complement
    of the mask instead of mask.  */
 
-static void
+static bool
 optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
 			      enum internal_fn fn, bool has_model_arg,
 			      bool after)
@@ -3365,7 +3366,7 @@ optimize_atomic_bit_test_and (gimple_stm
       || !single_imm_use (lhs, &use_p, &use_stmt)
       || !is_gimple_assign (use_stmt)
       || !gimple_vdef (call))
-    return;
+    return false;
 
   switch (fn)
     {
@@ -3379,7 +3380,7 @@ optimize_atomic_bit_test_and (gimple_stm
       optab = atomic_bit_test_and_reset_optab;
       break;
     default:
-      return;
+      return false;
     }
 
   tree bit = nullptr;
@@ -3389,20 +3390,20 @@ optimize_atomic_bit_test_and (gimple_stm
   if (rhs_code != BIT_AND_EXPR)
     {
       if (rhs_code != NOP_EXPR && rhs_code != BIT_NOT_EXPR)
-	return;
+	return false;
 
       tree use_lhs = gimple_assign_lhs (use_stmt);
       if (TREE_CODE (use_lhs) == SSA_NAME
 	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
-	return;
+	return false;
 
       tree use_rhs = gimple_assign_rhs1 (use_stmt);
       if (lhs != use_rhs)
-	return;
+	return false;
 
       if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)))
 	  == CODE_FOR_nothing)
-	return;
+	return false;
 
       gimple *g;
       gimple_stmt_iterator gsi;
@@ -3413,7 +3414,7 @@ optimize_atomic_bit_test_and (gimple_stm
 	{
 	  g = convert_atomic_bit_not (fn, use_stmt, lhs, mask);
 	  if (!g)
-	    return;
+	    return false;
 	  use_stmt = g;
 	  ibit = 0;
 	}
@@ -3426,7 +3427,7 @@ optimize_atomic_bit_test_and (gimple_stm
 	      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
 						   ~HOST_WIDE_INT_1),
 				    mask, 0))
-		return;
+		return false;
 
 	      /* Convert
 		 _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
@@ -3442,7 +3443,7 @@ optimize_atomic_bit_test_and (gimple_stm
 	    {
 	      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
 	      if (!operand_equal_p (and_mask, mask, 0))
-		return;
+		return false;
 
 	      /* Convert
 		 _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
@@ -3468,20 +3469,20 @@ optimize_atomic_bit_test_and (gimple_stm
 	  gimple *use_nop_stmt;
 	  if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
 	      || !is_gimple_assign (use_nop_stmt))
-	    return;
+	    return false;
 	  tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
 	  rhs_code = gimple_assign_rhs_code (use_nop_stmt);
 	  if (rhs_code != BIT_AND_EXPR)
 	    {
 	      if (TREE_CODE (use_nop_lhs) == SSA_NAME
 		  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
-		return;
+		return false;
 	      if (rhs_code == BIT_NOT_EXPR)
 		{
 		  g = convert_atomic_bit_not (fn, use_nop_stmt, lhs,
 					      mask);
 		  if (!g)
-		    return;
+		    return false;
 		  /* Convert
 		     _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
 		     _2 = (int) _1;
@@ -3509,15 +3510,15 @@ optimize_atomic_bit_test_and (gimple_stm
 	      else
 		{
 		  if (TREE_CODE (TREE_TYPE (use_nop_lhs)) != BOOLEAN_TYPE)
-		    return;
+		    return false;
 		  if (rhs_code != GE_EXPR && rhs_code != LT_EXPR)
-		    return;
+		    return false;
 		  tree cmp_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
 		  if (use_lhs != cmp_rhs1)
-		    return;
+		    return false;
 		  tree cmp_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
 		  if (!integer_zerop (cmp_rhs2))
-		    return;
+		    return false;
 
 		  tree and_mask;
 
@@ -3533,7 +3534,7 @@ optimize_atomic_bit_test_and (gimple_stm
 		      and_mask = build_int_cst (TREE_TYPE (use_rhs),
 						highest - 1);
 		      if (!operand_equal_p (and_mask, mask, 0))
-			return;
+			return false;
 
 		      /* Convert
 			 _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
@@ -3553,7 +3554,7 @@ optimize_atomic_bit_test_and (gimple_stm
 		      and_mask = build_int_cst (TREE_TYPE (use_rhs),
 						highest);
 		      if (!operand_equal_p (and_mask, mask, 0))
-			return;
+			return false;
 
 		      /* Convert
 			 _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
@@ -3592,7 +3593,7 @@ optimize_atomic_bit_test_and (gimple_stm
 		  || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (match_op[2])
 		  || !single_imm_use (match_op[2], &use_p, &g)
 		  || !is_gimple_assign (g))
-		return;
+		return false;
 	      mask = match_op[0];
 	      if (TREE_CODE (match_op[1]) == INTEGER_CST)
 		{
@@ -3650,7 +3651,7 @@ optimize_atomic_bit_test_and (gimple_stm
 	    }
 	}
       else
-	return;
+	return false;
 
       if (!bit)
 	{
@@ -3661,11 +3662,11 @@ optimize_atomic_bit_test_and (gimple_stm
     }
   else if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)))
 	   == CODE_FOR_nothing)
-    return;
+    return false;
 
   tree use_lhs = gimple_assign_lhs (use_stmt);
   if (!use_lhs)
-    return;
+    return false;
 
   if (!bit)
     {
@@ -3676,7 +3677,7 @@ optimize_atomic_bit_test_and (gimple_stm
 	  mask = fold_convert (TREE_TYPE (lhs), mask);
 	  int ibit = tree_log2 (mask);
 	  if (ibit < 0)
-	    return;
+	    return false;
 	  bit = build_int_cst (TREE_TYPE (lhs), ibit);
 	}
       else if (TREE_CODE (mask) == SSA_NAME)
@@ -3687,30 +3688,30 @@ optimize_atomic_bit_test_and (gimple_stm
 	    {
 	      mask = match_op;
 	      if (TREE_CODE (mask) != SSA_NAME)
-		return;
+		return false;
 	      g = SSA_NAME_DEF_STMT (mask);
 	    }
 	  if (!is_gimple_assign (g))
-	    return;
+	    return false;
 
 	  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
 	    {
 	      if (gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
-		return;
+		return false;
 	      mask = gimple_assign_rhs1 (g);
 	      if (TREE_CODE (mask) != SSA_NAME)
-		return;
+		return false;
 	      g = SSA_NAME_DEF_STMT (mask);
 	    }
 
 	  rhs_code = gimple_assign_rhs_code (g);
 	  if (rhs_code != LSHIFT_EXPR
 	      || !integer_onep (gimple_assign_rhs1 (g)))
-	    return;
+	    return false;
 	  bit = gimple_assign_rhs2 (g);
 	}
       else
-	return;
+	return false;
 
       tree cmp_mask;
       if (gimple_assign_rhs1 (use_stmt) == lhs)
@@ -3723,7 +3724,7 @@ optimize_atomic_bit_test_and (gimple_stm
 	cmp_mask = match_op;
 
       if (!operand_equal_p (cmp_mask, mask, 0))
-	return;
+	return false;
     }
 
   bool use_bool = true;
@@ -3748,6 +3749,8 @@ optimize_atomic_bit_test_and (gimple_stm
 	  case COND_EXPR:
 	    op1 = gimple_assign_rhs1 (g);
 	    code = TREE_CODE (op1);
+	    if (TREE_CODE_CLASS (code) != tcc_comparison)
+	      break;
 	    op0 = TREE_OPERAND (op1, 0);
 	    op1 = TREE_OPERAND (op1, 1);
 	    break;
@@ -3864,6 +3867,196 @@ optimize_atomic_bit_test_and (gimple_stm
   release_defs (use_stmt);
   gsi_remove (gsip, true);
   release_ssa_name (lhs);
+  return true;
+}
+
+/* Optimize
+     _4 = __atomic_add_fetch_* (ptr_6, arg_2, _3);
+     _5 = _4 == 0;
+   to
+     _4 = .ATOMIC_ADD_FETCH_CMP_0 (EQ_EXPR, ptr_6, arg_2, _3);
+     _5 = _4;
+   Similarly for __sync_add_and_fetch_* (without the ", _3" part
+   in there).  */
+
+static bool
+optimize_atomic_op_fetch_cmp_0 (gimple_stmt_iterator *gsip,
+				enum internal_fn fn, bool has_model_arg)
+{
+  gimple *call = gsi_stmt (*gsip);
+  tree lhs = gimple_call_lhs (call);
+  use_operand_p use_p;
+  gimple *use_stmt;
+
+  if (!flag_inline_atomics
+      || optimize_debug
+      || !gimple_call_builtin_p (call, BUILT_IN_NORMAL)
+      || !lhs
+      || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
+      || !single_imm_use (lhs, &use_p, &use_stmt)
+      || !gimple_vdef (call))
+    return false;
+
+  optab optab;
+  switch (fn)
+    {
+    case IFN_ATOMIC_ADD_FETCH_CMP_0:
+      optab = atomic_add_fetch_cmp_0_optab;
+      break;
+    case IFN_ATOMIC_SUB_FETCH_CMP_0:
+      optab = atomic_sub_fetch_cmp_0_optab;
+      break;
+    case IFN_ATOMIC_AND_FETCH_CMP_0:
+      optab = atomic_and_fetch_cmp_0_optab;
+      break;
+    case IFN_ATOMIC_OR_FETCH_CMP_0:
+      optab = atomic_or_fetch_cmp_0_optab;
+      break;
+    case IFN_ATOMIC_XOR_FETCH_CMP_0:
+      optab = atomic_xor_fetch_cmp_0_optab;
+      break;
+    default:
+      return false;
+    }
+
+  if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)))
+      == CODE_FOR_nothing)
+    return false;
+
+  tree use_lhs = lhs;
+  if (gimple_assign_cast_p (use_stmt))
+    {
+      use_lhs = gimple_assign_lhs (use_stmt);
+      if (!tree_nop_conversion_p (TREE_TYPE (use_lhs), TREE_TYPE (lhs))
+	  || (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
+	      && !POINTER_TYPE_P (TREE_TYPE (use_lhs)))
+	  || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs)
+	  || !single_imm_use (use_lhs, &use_p, &use_stmt))
+	return false;
+    }
+  enum tree_code code = ERROR_MARK;
+  tree op0 = NULL_TREE, op1 = NULL_TREE;
+  if (is_gimple_assign (use_stmt))
+    switch (gimple_assign_rhs_code (use_stmt))
+      {
+      case COND_EXPR:
+	op1 = gimple_assign_rhs1 (use_stmt);
+	code = TREE_CODE (op1);
+	if (TREE_CODE_CLASS (code) == tcc_comparison)
+	  {
+	    op0 = TREE_OPERAND (op1, 0);
+	    op1 = TREE_OPERAND (op1, 1);
+	  }
+	break;
+      default:
+	code = gimple_assign_rhs_code (use_stmt);
+	if (TREE_CODE_CLASS (code) == tcc_comparison)
+	  {
+	    op0 = gimple_assign_rhs1 (use_stmt);
+	    op1 = gimple_assign_rhs2 (use_stmt);
+	  }
+	break;
+      }
+  else if (gimple_code (use_stmt) == GIMPLE_COND)
+    {
+      code = gimple_cond_code (use_stmt);
+      op0 = gimple_cond_lhs (use_stmt);
+      op1 = gimple_cond_rhs (use_stmt);
+    }
+
+  switch (code)
+    {
+    case LT_EXPR:
+    case LE_EXPR:
+    case GT_EXPR:
+    case GE_EXPR:
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
+	  || TREE_CODE (TREE_TYPE (use_lhs)) == BOOLEAN_TYPE
+	  || TYPE_UNSIGNED (TREE_TYPE (use_lhs)))
+	return false;
+      /* FALLTHRU */
+    case EQ_EXPR:
+    case NE_EXPR:
+      if (op0 == use_lhs && integer_zerop (op1))
+	break;
+      return false;
+    default:
+      return false;
+    }
+
+  int encoded;
+  switch (code)
+    {
+    /* Use special encoding of the operation.  We want to also
+       encode the mode in the first argument and for neither EQ_EXPR
+       etc. nor EQ etc. we can rely it will fit into QImode.  */
+    case EQ_EXPR: encoded = ATOMIC_OP_FETCH_CMP_0_EQ; break;
+    case NE_EXPR: encoded = ATOMIC_OP_FETCH_CMP_0_NE; break;
+    case LT_EXPR: encoded = ATOMIC_OP_FETCH_CMP_0_LT; break;
+    case LE_EXPR: encoded = ATOMIC_OP_FETCH_CMP_0_LE; break;
+    case GT_EXPR: encoded = ATOMIC_OP_FETCH_CMP_0_GT; break;
+    case GE_EXPR: encoded = ATOMIC_OP_FETCH_CMP_0_GE; break;
+    default: gcc_unreachable ();
+    }
+
+  tree new_lhs = make_ssa_name (boolean_type_node);
+  gimple *g;
+  tree flag = build_int_cst (TREE_TYPE (lhs), encoded);
+  if (has_model_arg)
+    g = gimple_build_call_internal (fn, 4, flag,
+				    gimple_call_arg (call, 0),
+				    gimple_call_arg (call, 1),
+				    gimple_call_arg (call, 2));
+  else
+    g = gimple_build_call_internal (fn, 3, flag,
+				    gimple_call_arg (call, 0),
+				    gimple_call_arg (call, 1));
+  gimple_call_set_lhs (g, new_lhs);
+  gimple_set_location (g, gimple_location (call));
+  gimple_move_vops (g, call);
+  bool throws = stmt_can_throw_internal (cfun, call);
+  gimple_call_set_nothrow (as_a <gcall *> (g),
+			   gimple_call_nothrow_p (as_a <gcall *> (call)));
+  gimple_stmt_iterator gsi = *gsip;
+  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
+  if (throws)
+    maybe_clean_or_replace_eh_stmt (call, g);
+  if (is_gimple_assign (use_stmt))
+    switch (gimple_assign_rhs_code (use_stmt))
+      {
+      case COND_EXPR:
+	gimple_assign_set_rhs1 (use_stmt, new_lhs);
+	break;
+      default:
+	gsi = gsi_for_stmt (use_stmt);
+	if (tree ulhs = gimple_assign_lhs (use_stmt))
+	  if (useless_type_conversion_p (TREE_TYPE (ulhs),
+					 boolean_type_node))
+	    {
+	      gimple_assign_set_rhs_with_ops (&gsi, SSA_NAME, new_lhs);
+	      break;
+	    }
+	gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, new_lhs);
+	break;
+      }
+  else if (gimple_code (use_stmt) == GIMPLE_COND)
+    {
+      gcond *use_cond = as_a <gcond *> (use_stmt);
+      gimple_cond_set_code (use_cond, NE_EXPR);
+      gimple_cond_set_lhs (use_cond, new_lhs);
+      gimple_cond_set_rhs (use_cond, boolean_false_node);
+    }
+
+  update_stmt (use_stmt);
+  if (use_lhs != lhs)
+    {
+      gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (use_lhs));
+      gsi_remove (&gsi, true);
+      release_ssa_name (use_lhs);
+    }
+  gsi_remove (gsip, true);
+  release_ssa_name (lhs);
+  return true;
 }
 
 /* Optimize
@@ -4092,6 +4285,44 @@ pass_fold_builtins::execute (function *f
 		    cfg_changed = true;
 		  break;
 
+		case BUILT_IN_ATOMIC_ADD_FETCH_1:
+		case BUILT_IN_ATOMIC_ADD_FETCH_2:
+		case BUILT_IN_ATOMIC_ADD_FETCH_4:
+		case BUILT_IN_ATOMIC_ADD_FETCH_8:
+		case BUILT_IN_ATOMIC_ADD_FETCH_16:
+		  optimize_atomic_op_fetch_cmp_0 (&i,
+						  IFN_ATOMIC_ADD_FETCH_CMP_0,
+						  true);
+		  break;
+		case BUILT_IN_SYNC_ADD_AND_FETCH_1:
+		case BUILT_IN_SYNC_ADD_AND_FETCH_2:
+		case BUILT_IN_SYNC_ADD_AND_FETCH_4:
+		case BUILT_IN_SYNC_ADD_AND_FETCH_8:
+		case BUILT_IN_SYNC_ADD_AND_FETCH_16:
+		  optimize_atomic_op_fetch_cmp_0 (&i,
+						  IFN_ATOMIC_ADD_FETCH_CMP_0,
+						  false);
+		  break;
+
+		case BUILT_IN_ATOMIC_SUB_FETCH_1:
+		case BUILT_IN_ATOMIC_SUB_FETCH_2:
+		case BUILT_IN_ATOMIC_SUB_FETCH_4:
+		case BUILT_IN_ATOMIC_SUB_FETCH_8:
+		case BUILT_IN_ATOMIC_SUB_FETCH_16:
+		  optimize_atomic_op_fetch_cmp_0 (&i,
+						  IFN_ATOMIC_SUB_FETCH_CMP_0,
+						  true);
+		  break;
+		case BUILT_IN_SYNC_SUB_AND_FETCH_1:
+		case BUILT_IN_SYNC_SUB_AND_FETCH_2:
+		case BUILT_IN_SYNC_SUB_AND_FETCH_4:
+		case BUILT_IN_SYNC_SUB_AND_FETCH_8:
+		case BUILT_IN_SYNC_SUB_AND_FETCH_16:
+		  optimize_atomic_op_fetch_cmp_0 (&i,
+						  IFN_ATOMIC_SUB_FETCH_CMP_0,
+						  false);
+		  break;
+
 		case BUILT_IN_ATOMIC_FETCH_OR_1:
 		case BUILT_IN_ATOMIC_FETCH_OR_2:
 		case BUILT_IN_ATOMIC_FETCH_OR_4:
@@ -4133,16 +4364,24 @@ pass_fold_builtins::execute (function *f
 		case BUILT_IN_ATOMIC_XOR_FETCH_4:
 		case BUILT_IN_ATOMIC_XOR_FETCH_8:
 		case BUILT_IN_ATOMIC_XOR_FETCH_16:
-		  optimize_atomic_bit_test_and
-			(&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, true, true);
+		  if (optimize_atomic_bit_test_and
+			(&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, true, true))
+		    break;
+		  optimize_atomic_op_fetch_cmp_0 (&i,
+						  IFN_ATOMIC_XOR_FETCH_CMP_0,
+						  true);
 		  break;
 		case BUILT_IN_SYNC_XOR_AND_FETCH_1:
 		case BUILT_IN_SYNC_XOR_AND_FETCH_2:
 		case BUILT_IN_SYNC_XOR_AND_FETCH_4:
 		case BUILT_IN_SYNC_XOR_AND_FETCH_8:
 		case BUILT_IN_SYNC_XOR_AND_FETCH_16:
-		  optimize_atomic_bit_test_and
-			(&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, false, true);
+		  if (optimize_atomic_bit_test_and
+			(&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, false, true))
+		    break;
+		  optimize_atomic_op_fetch_cmp_0 (&i,
+						  IFN_ATOMIC_XOR_FETCH_CMP_0,
+						  false);
 		  break;
 
 		case BUILT_IN_ATOMIC_FETCH_AND_1:
@@ -4164,6 +4403,44 @@ pass_fold_builtins::execute (function *f
 						false, false);
 		  break;
 
+		case BUILT_IN_ATOMIC_AND_FETCH_1:
+		case BUILT_IN_ATOMIC_AND_FETCH_2:
+		case BUILT_IN_ATOMIC_AND_FETCH_4:
+		case BUILT_IN_ATOMIC_AND_FETCH_8:
+		case BUILT_IN_ATOMIC_AND_FETCH_16:
+		  optimize_atomic_op_fetch_cmp_0 (&i,
+						  IFN_ATOMIC_AND_FETCH_CMP_0,
+						  true);
+		  break;
+		case BUILT_IN_SYNC_AND_AND_FETCH_1:
+		case BUILT_IN_SYNC_AND_AND_FETCH_2:
+		case BUILT_IN_SYNC_AND_AND_FETCH_4:
+		case BUILT_IN_SYNC_AND_AND_FETCH_8:
+		case BUILT_IN_SYNC_AND_AND_FETCH_16:
+		  optimize_atomic_op_fetch_cmp_0 (&i,
+						  IFN_ATOMIC_AND_FETCH_CMP_0,
+						  false);
+		  break;
+
+		case BUILT_IN_ATOMIC_OR_FETCH_1:
+		case BUILT_IN_ATOMIC_OR_FETCH_2:
+		case BUILT_IN_ATOMIC_OR_FETCH_4:
+		case BUILT_IN_ATOMIC_OR_FETCH_8:
+		case BUILT_IN_ATOMIC_OR_FETCH_16:
+		  optimize_atomic_op_fetch_cmp_0 (&i,
+						  IFN_ATOMIC_OR_FETCH_CMP_0,
+						  true);
+		  break;
+		case BUILT_IN_SYNC_OR_AND_FETCH_1:
+		case BUILT_IN_SYNC_OR_AND_FETCH_2:
+		case BUILT_IN_SYNC_OR_AND_FETCH_4:
+		case BUILT_IN_SYNC_OR_AND_FETCH_8:
+		case BUILT_IN_SYNC_OR_AND_FETCH_16:
+		  optimize_atomic_op_fetch_cmp_0 (&i,
+						  IFN_ATOMIC_OR_FETCH_CMP_0,
+						  false);
+		  break;
+
 		case BUILT_IN_MEMCPY:
 		  if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)
 		      && TREE_CODE (gimple_call_arg (stmt, 0)) == ADDR_EXPR
--- gcc/config/i386/sync.md.jj	2021-11-15 13:19:07.347900863 +0100
+++ gcc/config/i386/sync.md	2021-12-13 19:06:24.123913074 +0100
@@ -938,3 +938,84 @@ (define_insn "atomic_bit_test_and_reset<
 	(const_int 0))]
   ""
   "lock{%;} %K2btr{<imodesuffix>}\t{%1, %0|%0, %1}")
+
+(define_expand "atomic_<plusminus_mnemonic>_fetch_cmp_0<mode>"
+  [(match_operand:QI 0 "register_operand")
+   (plusminus:SWI (match_operand:SWI 1 "memory_operand")
+		  (match_operand:SWI 2 "nonmemory_operand"))
+   (match_operand:SI 3 "const_int_operand") ;; model
+   (match_operand:SI 4 "const_int_operand")]
+  ""
+{
+  if (INTVAL (operands[4]) == GT || INTVAL (operands[4]) == LE)
+    FAIL;
+  emit_insn (gen_atomic_<plusminus_mnemonic>_fetch_cmp_0<mode>_1 (operands[1],
+								  operands[2],
+								  operands[3]));
+  ix86_expand_setcc (operands[0], (enum rtx_code) INTVAL (operands[4]),
+		     gen_rtx_REG (CCGOCmode, FLAGS_REG), const0_rtx);
+  DONE;
+})
+
+(define_insn "atomic_<plusminus_mnemonic>_fetch_cmp_0<mode>_1"
+  [(set (reg:CCGOC FLAGS_REG)
+	(compare:CCGOC
+	  (plusminus:SWI
+	    (unspec_volatile:SWI
+	      [(match_operand:SWI 0 "memory_operand" "+m")
+	       (match_operand:SI 2 "const_int_operand")]		;; model
+	      UNSPECV_XCHG)
+	    (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+	  (const_int 0)))
+   (set (match_dup 0)
+	(plusminus:SWI (match_dup 0) (match_dup 1)))]
+  ""
+{
+  if (incdec_operand (operands[1], <MODE>mode))
+    {
+      if ((operands[1] == const1_rtx) ^ (<CODE> != PLUS))
+	return "lock{%;} %K2inc{<imodesuffix>}\t%0";
+      else
+	return "lock{%;} %K2dec{<imodesuffix>}\t%0";
+    }
+
+  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
+    {
+      if (<CODE> == PLUS)
+	return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
+      else
+	return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
+    }
+
+  return "lock{%;} %K2<plusminus_mnemonic>{<imodesuffix>}\t{%1, %0|%0, %1}";
+})
+
+(define_expand "atomic_<logic>_fetch_cmp_0<mode>"
+  [(match_operand:QI 0 "register_operand")
+   (any_logic:SWI (match_operand:SWI 1 "memory_operand")
+		  (match_operand:SWI 2 "nonmemory_operand"))
+   (match_operand:SI 3 "const_int_operand") ;; model
+   (match_operand:SI 4 "const_int_operand")]
+  ""
+{
+  emit_insn (gen_atomic_<logic>_fetch_cmp_0<mode>_1 (operands[1], operands[2],
+						     operands[3]));
+  ix86_expand_setcc (operands[0], (enum rtx_code) INTVAL (operands[4]),
+		     gen_rtx_REG (CCNOmode, FLAGS_REG), const0_rtx);
+  DONE;
+})
+
+(define_insn "atomic_<logic>_fetch_cmp_0<mode>_1"
+  [(set (reg:CCNO FLAGS_REG)
+	(compare:CCNO
+	  (any_logic:SWI
+	    (unspec_volatile:SWI
+	      [(match_operand:SWI 0 "memory_operand" "+m")
+	       (match_operand:SI 2 "const_int_operand")]		;; model
+	      UNSPECV_XCHG)
+	    (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+	  (const_int 0)))
+   (set (match_dup 0)
+	(any_logic:SWI (match_dup 0) (match_dup 1)))]
+  ""
+  "lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
--- gcc/testsuite/gcc.target/i386/pr98737-1.c.jj	2021-12-14 10:32:11.150582805 +0100
+++ gcc/testsuite/gcc.target/i386/pr98737-1.c	2021-12-14 10:32:05.211668125 +0100
@@ -0,0 +1,207 @@
+/* PR target/98737 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-march=i686" { target ia32 } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*subq\t" { target lp64 } } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*subl\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*subw\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*subb\t" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*xadd" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*cmpxchg" } } */
+
+long a;
+int b;
+short c;
+char d;
+
+int
+f1 (long x)
+{
+  return __atomic_sub_fetch (&a, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f2 (int x)
+{
+  return __atomic_sub_fetch (&b, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f3 (short x)
+{
+  return __atomic_sub_fetch (&c, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f4 (char x)
+{
+  return __atomic_sub_fetch (&d, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f5 (long x)
+{
+  return __atomic_sub_fetch (&a, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f6 (int x)
+{
+  return __atomic_sub_fetch (&b, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f7 (short x)
+{
+  return __atomic_sub_fetch (&c, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f8 (char x)
+{
+  return __atomic_sub_fetch (&d, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f9 (long x)
+{
+  return __atomic_sub_fetch (&a, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f10 (int x)
+{
+  return __atomic_sub_fetch (&b, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f11 (short x)
+{
+  return __atomic_sub_fetch (&c, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f12 (char x)
+{
+  return __atomic_sub_fetch (&d, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f13 (long x)
+{
+  return __atomic_sub_fetch (&a, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f14 (int x)
+{
+  return __atomic_sub_fetch (&b, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f15 (short x)
+{
+  return __atomic_sub_fetch (&c, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f16 (char x)
+{
+  return __atomic_sub_fetch (&d, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f17 (long x)
+{
+  return __sync_sub_and_fetch (&a, x) == 0;
+}
+
+int
+f18 (int x)
+{
+  return __sync_sub_and_fetch (&b, x) == 0;
+}
+
+int
+f19 (short x)
+{
+  return __sync_sub_and_fetch (&c, x) == 0;
+}
+
+int
+f20 (char x)
+{
+  return __sync_sub_and_fetch (&d, x) == 0;
+}
+
+int
+f21 (long x)
+{
+  return __sync_sub_and_fetch (&a, x) != 0;
+}
+
+int
+f22 (int x)
+{
+  return __sync_sub_and_fetch (&b, x) != 0;
+}
+
+int
+f23 (short x)
+{
+  return __sync_sub_and_fetch (&c, x) != 0;
+}
+
+int
+f24 (char x)
+{
+  return __sync_sub_and_fetch (&d, x) != 0;
+}
+
+int
+f25 (long x)
+{
+  return __sync_sub_and_fetch (&a, x) < 0;
+}
+
+int
+f26 (int x)
+{
+  return __sync_sub_and_fetch (&b, x) < 0;
+}
+
+int
+f27 (short x)
+{
+  return __sync_sub_and_fetch (&c, x) < 0;
+}
+
+int
+f28 (char x)
+{
+  return __sync_sub_and_fetch (&d, x) < 0;
+}
+
+int
+f29 (long x)
+{
+  return __sync_sub_and_fetch (&a, x) >= 0;
+}
+
+int
+f30 (int x)
+{
+  return __sync_sub_and_fetch (&b, x) >= 0;
+}
+
+int
+f31 (short x)
+{
+  return __sync_sub_and_fetch (&c, x) >= 0;
+}
+
+int
+f32 (char x)
+{
+  return __sync_sub_and_fetch (&d, x) >= 0;
+}
--- gcc/testsuite/gcc.target/i386/pr98737-2.c.jj	2021-12-14 10:32:26.619360582 +0100
+++ gcc/testsuite/gcc.target/i386/pr98737-2.c	2021-12-14 10:34:16.927782344 +0100
@@ -0,0 +1,111 @@
+/* PR target/98737 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-march=i686" { target ia32 } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*subq\t" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*subl\t" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*subw\t" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*subb\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*xadd" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*cmpxchg" } } */
+
+long a;
+int b;
+short c;
+char d;
+
+int
+f1 (long x)
+{
+  return __atomic_sub_fetch (&a, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f2 (int x)
+{
+  return __atomic_sub_fetch (&b, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f3 (short x)
+{
+  return __atomic_sub_fetch (&c, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f4 (char x)
+{
+  return __atomic_sub_fetch (&d, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f5 (long x)
+{
+  return __atomic_sub_fetch (&a, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f6 (int x)
+{
+  return __atomic_sub_fetch (&b, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f7 (short x)
+{
+  return __atomic_sub_fetch (&c, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f8 (char x)
+{
+  return __atomic_sub_fetch (&d, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f9 (long x)
+{
+  return __sync_sub_and_fetch (&a, x) <= 0;
+}
+
+int
+f10 (int x)
+{
+  return __sync_sub_and_fetch (&b, x) <= 0;
+}
+
+int
+f11 (short x)
+{
+  return __sync_sub_and_fetch (&c, x) <= 0;
+}
+
+int
+f12 (char x)
+{
+  return __sync_sub_and_fetch (&d, x) <= 0;
+}
+
+int
+f13 (long x)
+{
+  return __sync_sub_and_fetch (&a, x) > 0;
+}
+
+int
+f14 (int x)
+{
+  return __sync_sub_and_fetch (&b, x) > 0;
+}
+
+int
+f15 (short x)
+{
+  return __sync_sub_and_fetch (&c, x) > 0;
+}
+
+int
+f16 (char x)
+{
+  return __sync_sub_and_fetch (&d, x) > 0;
+}
--- gcc/testsuite/gcc.target/i386/pr98737-3.c.jj	2021-12-14 10:34:31.544573270 +0100
+++ gcc/testsuite/gcc.target/i386/pr98737-3.c	2021-12-14 10:34:46.086365265 +0100
@@ -0,0 +1,207 @@
+/* PR target/98737 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-march=i686" { target ia32 } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*addq\t" { target lp64 } } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*addl\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*addw\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*addb\t" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*xadd" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*cmpxchg" } } */
+
+long a;
+int b;
+short c;
+char d;
+
+int
+f1 (long x)
+{
+  return __atomic_add_fetch (&a, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f2 (int x)
+{
+  return __atomic_add_fetch (&b, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f3 (short x)
+{
+  return __atomic_add_fetch (&c, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f4 (char x)
+{
+  return __atomic_add_fetch (&d, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f5 (long x)
+{
+  return __atomic_add_fetch (&a, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f6 (int x)
+{
+  return __atomic_add_fetch (&b, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f7 (short x)
+{
+  return __atomic_add_fetch (&c, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f8 (char x)
+{
+  return __atomic_add_fetch (&d, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f9 (long x)
+{
+  return __atomic_add_fetch (&a, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f10 (int x)
+{
+  return __atomic_add_fetch (&b, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f11 (short x)
+{
+  return __atomic_add_fetch (&c, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f12 (char x)
+{
+  return __atomic_add_fetch (&d, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f13 (long x)
+{
+  return __atomic_add_fetch (&a, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f14 (int x)
+{
+  return __atomic_add_fetch (&b, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f15 (short x)
+{
+  return __atomic_add_fetch (&c, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f16 (char x)
+{
+  return __atomic_add_fetch (&d, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f17 (long x)
+{
+  return __sync_add_and_fetch (&a, x) == 0;
+}
+
+int
+f18 (int x)
+{
+  return __sync_add_and_fetch (&b, x) == 0;
+}
+
+int
+f19 (short x)
+{
+  return __sync_add_and_fetch (&c, x) == 0;
+}
+
+int
+f20 (char x)
+{
+  return __sync_add_and_fetch (&d, x) == 0;
+}
+
+int
+f21 (long x)
+{
+  return __sync_add_and_fetch (&a, x) != 0;
+}
+
+int
+f22 (int x)
+{
+  return __sync_add_and_fetch (&b, x) != 0;
+}
+
+int
+f23 (short x)
+{
+  return __sync_add_and_fetch (&c, x) != 0;
+}
+
+int
+f24 (char x)
+{
+  return __sync_add_and_fetch (&d, x) != 0;
+}
+
+int
+f25 (long x)
+{
+  return __sync_add_and_fetch (&a, x) < 0;
+}
+
+int
+f26 (int x)
+{
+  return __sync_add_and_fetch (&b, x) < 0;
+}
+
+int
+f27 (short x)
+{
+  return __sync_add_and_fetch (&c, x) < 0;
+}
+
+int
+f28 (char x)
+{
+  return __sync_add_and_fetch (&d, x) < 0;
+}
+
+int
+f29 (long x)
+{
+  return __sync_add_and_fetch (&a, x) >= 0;
+}
+
+int
+f30 (int x)
+{
+  return __sync_add_and_fetch (&b, x) >= 0;
+}
+
+int
+f31 (short x)
+{
+  return __sync_add_and_fetch (&c, x) >= 0;
+}
+
+int
+f32 (char x)
+{
+  return __sync_add_and_fetch (&d, x) >= 0;
+}
--- gcc/testsuite/gcc.target/i386/pr98737-4.c.jj	2021-12-14 10:34:55.005237694 +0100
+++ gcc/testsuite/gcc.target/i386/pr98737-4.c	2021-12-14 10:36:54.492528580 +0100
@@ -0,0 +1,111 @@
+/* PR target/98737 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-march=i686" { target ia32 } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\rx]\*addq\t" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\rx]\*addl\t" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\rx]\*addw\t" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\rx]\*addb\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*xadd" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*cmpxchg" } } */
+
+long a;
+int b;
+short c;
+char d;
+
+int
+f1 (long x)
+{
+  return __atomic_add_fetch (&a, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f2 (int x)
+{
+  return __atomic_add_fetch (&b, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f3 (short x)
+{
+  return __atomic_add_fetch (&c, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f4 (char x)
+{
+  return __atomic_add_fetch (&d, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f5 (long x)
+{
+  return __atomic_add_fetch (&a, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f6 (int x)
+{
+  return __atomic_add_fetch (&b, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f7 (short x)
+{
+  return __atomic_add_fetch (&c, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f8 (char x)
+{
+  return __atomic_add_fetch (&d, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f9 (long x)
+{
+  return __sync_add_and_fetch (&a, x) <= 0;
+}
+
+int
+f10 (int x)
+{
+  return __sync_add_and_fetch (&b, x) <= 0;
+}
+
+int
+f11 (short x)
+{
+  return __sync_add_and_fetch (&c, x) <= 0;
+}
+
+int
+f12 (char x)
+{
+  return __sync_add_and_fetch (&d, x) <= 0;
+}
+
+int
+f13 (long x)
+{
+  return __sync_add_and_fetch (&a, x) > 0;
+}
+
+int
+f14 (int x)
+{
+  return __sync_add_and_fetch (&b, x) > 0;
+}
+
+int
+f15 (short x)
+{
+  return __sync_add_and_fetch (&c, x) > 0;
+}
+
+int
+f16 (char x)
+{
+  return __sync_add_and_fetch (&d, x) > 0;
+}
--- gcc/testsuite/gcc.target/i386/pr98737-5.c.jj	2021-12-14 10:39:26.256357792 +0100
+++ gcc/testsuite/gcc.target/i386/pr98737-5.c	2021-12-14 10:39:22.027418280 +0100
@@ -0,0 +1,303 @@
+/* PR target/98737 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-march=i686" { target ia32 } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*andq\t" { target lp64 } } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*andl\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*andw\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*andb\t" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*xadd" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*cmpxchg" } } */
+
+long a;
+int b;
+short c;
+char d;
+
+int
+f1 (long x)
+{
+  return __atomic_and_fetch (&a, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f2 (int x)
+{
+  return __atomic_and_fetch (&b, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f3 (short x)
+{
+  return __atomic_and_fetch (&c, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f4 (char x)
+{
+  return __atomic_and_fetch (&d, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f5 (long x)
+{
+  return __atomic_and_fetch (&a, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f6 (int x)
+{
+  return __atomic_and_fetch (&b, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f7 (short x)
+{
+  return __atomic_and_fetch (&c, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f8 (char x)
+{
+  return __atomic_and_fetch (&d, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f9 (long x)
+{
+  return __atomic_and_fetch (&a, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f10 (int x)
+{
+  return __atomic_and_fetch (&b, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f11 (short x)
+{
+  return __atomic_and_fetch (&c, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f12 (char x)
+{
+  return __atomic_and_fetch (&d, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f13 (long x)
+{
+  return __atomic_and_fetch (&a, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f14 (int x)
+{
+  return __atomic_and_fetch (&b, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f15 (short x)
+{
+  return __atomic_and_fetch (&c, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f16 (char x)
+{
+  return __atomic_and_fetch (&d, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f17 (long x)
+{
+  return __sync_and_and_fetch (&a, x) == 0;
+}
+
+int
+f18 (int x)
+{
+  return __sync_and_and_fetch (&b, x) == 0;
+}
+
+int
+f19 (short x)
+{
+  return __sync_and_and_fetch (&c, x) == 0;
+}
+
+int
+f20 (char x)
+{
+  return __sync_and_and_fetch (&d, x) == 0;
+}
+
+int
+f21 (long x)
+{
+  return __sync_and_and_fetch (&a, x) != 0;
+}
+
+int
+f22 (int x)
+{
+  return __sync_and_and_fetch (&b, x) != 0;
+}
+
+int
+f23 (short x)
+{
+  return __sync_and_and_fetch (&c, x) != 0;
+}
+
+int
+f24 (char x)
+{
+  return __sync_and_and_fetch (&d, x) != 0;
+}
+
+int
+f25 (long x)
+{
+  return __sync_and_and_fetch (&a, x) < 0;
+}
+
+int
+f26 (int x)
+{
+  return __sync_and_and_fetch (&b, x) < 0;
+}
+
+int
+f27 (short x)
+{
+  return __sync_and_and_fetch (&c, x) < 0;
+}
+
+int
+f28 (char x)
+{
+  return __sync_and_and_fetch (&d, x) < 0;
+}
+
+int
+f29 (long x)
+{
+  return __sync_and_and_fetch (&a, x) >= 0;
+}
+
+int
+f30 (int x)
+{
+  return __sync_and_and_fetch (&b, x) >= 0;
+}
+
+int
+f31 (short x)
+{
+  return __sync_and_and_fetch (&c, x) >= 0;
+}
+
+int
+f32 (char x)
+{
+  return __sync_and_and_fetch (&d, x) >= 0;
+}
+
+int
+f33 (long x)
+{
+  return __atomic_and_fetch (&a, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f34 (int x)
+{
+  return __atomic_and_fetch (&b, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f35 (short x)
+{
+  return __atomic_and_fetch (&c, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f36 (char x)
+{
+  return __atomic_and_fetch (&d, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f37 (long x)
+{
+  return __atomic_and_fetch (&a, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f38 (int x)
+{
+  return __atomic_and_fetch (&b, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f39 (short x)
+{
+  return __atomic_and_fetch (&c, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f40 (char x)
+{
+  return __atomic_and_fetch (&d, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f41 (long x)
+{
+  return __sync_and_and_fetch (&a, x) <= 0;
+}
+
+int
+f42 (int x)
+{
+  return __sync_and_and_fetch (&b, x) <= 0;
+}
+
+int
+f43 (short x)
+{
+  return __sync_and_and_fetch (&c, x) <= 0;
+}
+
+int
+f44 (char x)
+{
+  return __sync_and_and_fetch (&d, x) <= 0;
+}
+
+int
+f45 (long x)
+{
+  return __sync_and_and_fetch (&a, x) > 0;
+}
+
+int
+f46 (int x)
+{
+  return __sync_and_and_fetch (&b, x) > 0;
+}
+
+int
+f47 (short x)
+{
+  return __sync_and_and_fetch (&c, x) > 0;
+}
+
+int
+f48 (char x)
+{
+  return __sync_and_and_fetch (&d, x) > 0;
+}
--- gcc/testsuite/gcc.target/i386/pr98737-6.c.jj	2021-12-14 10:39:40.076160115 +0100
+++ gcc/testsuite/gcc.target/i386/pr98737-6.c	2021-12-14 10:40:15.013660380 +0100
@@ -0,0 +1,303 @@
+/* PR target/98737 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-march=i686" { target ia32 } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*orq\t" { target lp64 } } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*orl\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*orw\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*orb\t" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*xadd" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*cmpxchg" } } */
+
+long a;
+int b;
+short c;
+char d;
+
+int
+f1 (long x)
+{
+  return __atomic_or_fetch (&a, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f2 (int x)
+{
+  return __atomic_or_fetch (&b, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f3 (short x)
+{
+  return __atomic_or_fetch (&c, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f4 (char x)
+{
+  return __atomic_or_fetch (&d, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f5 (long x)
+{
+  return __atomic_or_fetch (&a, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f6 (int x)
+{
+  return __atomic_or_fetch (&b, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f7 (short x)
+{
+  return __atomic_or_fetch (&c, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f8 (char x)
+{
+  return __atomic_or_fetch (&d, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f9 (long x)
+{
+  return __atomic_or_fetch (&a, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f10 (int x)
+{
+  return __atomic_or_fetch (&b, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f11 (short x)
+{
+  return __atomic_or_fetch (&c, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f12 (char x)
+{
+  return __atomic_or_fetch (&d, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f13 (long x)
+{
+  return __atomic_or_fetch (&a, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f14 (int x)
+{
+  return __atomic_or_fetch (&b, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f15 (short x)
+{
+  return __atomic_or_fetch (&c, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f16 (char x)
+{
+  return __atomic_or_fetch (&d, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f17 (long x)
+{
+  return __sync_or_and_fetch (&a, x) == 0;
+}
+
+int
+f18 (int x)
+{
+  return __sync_or_and_fetch (&b, x) == 0;
+}
+
+int
+f19 (short x)
+{
+  return __sync_or_and_fetch (&c, x) == 0;
+}
+
+int
+f20 (char x)
+{
+  return __sync_or_and_fetch (&d, x) == 0;
+}
+
+int
+f21 (long x)
+{
+  return __sync_or_and_fetch (&a, x) != 0;
+}
+
+int
+f22 (int x)
+{
+  return __sync_or_and_fetch (&b, x) != 0;
+}
+
+int
+f23 (short x)
+{
+  return __sync_or_and_fetch (&c, x) != 0;
+}
+
+int
+f24 (char x)
+{
+  return __sync_or_and_fetch (&d, x) != 0;
+}
+
+int
+f25 (long x)
+{
+  return __sync_or_and_fetch (&a, x) < 0;
+}
+
+int
+f26 (int x)
+{
+  return __sync_or_and_fetch (&b, x) < 0;
+}
+
+int
+f27 (short x)
+{
+  return __sync_or_and_fetch (&c, x) < 0;
+}
+
+int
+f28 (char x)
+{
+  return __sync_or_and_fetch (&d, x) < 0;
+}
+
+int
+f29 (long x)
+{
+  return __sync_or_and_fetch (&a, x) >= 0;
+}
+
+int
+f30 (int x)
+{
+  return __sync_or_and_fetch (&b, x) >= 0;
+}
+
+int
+f31 (short x)
+{
+  return __sync_or_and_fetch (&c, x) >= 0;
+}
+
+int
+f32 (char x)
+{
+  return __sync_or_and_fetch (&d, x) >= 0;
+}
+
+int
+f33 (long x)
+{
+  return __atomic_or_fetch (&a, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f34 (int x)
+{
+  return __atomic_or_fetch (&b, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f35 (short x)
+{
+  return __atomic_or_fetch (&c, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f36 (char x)
+{
+  return __atomic_or_fetch (&d, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f37 (long x)
+{
+  return __atomic_or_fetch (&a, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f38 (int x)
+{
+  return __atomic_or_fetch (&b, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f39 (short x)
+{
+  return __atomic_or_fetch (&c, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f40 (char x)
+{
+  return __atomic_or_fetch (&d, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f41 (long x)
+{
+  return __sync_or_and_fetch (&a, x) <= 0;
+}
+
+int
+f42 (int x)
+{
+  return __sync_or_and_fetch (&b, x) <= 0;
+}
+
+int
+f43 (short x)
+{
+  return __sync_or_and_fetch (&c, x) <= 0;
+}
+
+int
+f44 (char x)
+{
+  return __sync_or_and_fetch (&d, x) <= 0;
+}
+
+int
+f45 (long x)
+{
+  return __sync_or_and_fetch (&a, x) > 0;
+}
+
+int
+f46 (int x)
+{
+  return __sync_or_and_fetch (&b, x) > 0;
+}
+
+int
+f47 (short x)
+{
+  return __sync_or_and_fetch (&c, x) > 0;
+}
+
+int
+f48 (char x)
+{
+  return __sync_or_and_fetch (&d, x) > 0;
+}
--- gcc/testsuite/gcc.target/i386/pr98737-7.c.jj	2021-12-14 10:40:23.587537740 +0100
+++ gcc/testsuite/gcc.target/i386/pr98737-7.c	2021-12-14 10:40:59.445024845 +0100
@@ -0,0 +1,303 @@
+/* PR target/98737 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-march=i686" { target ia32 } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*xorq\t" { target lp64 } } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*xorl\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*xorw\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*xorb\t" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*xadd" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*cmpxchg" } } */
+
+long a;
+int b;
+short c;
+char d;
+
+int
+f1 (long x)
+{
+  return __atomic_xor_fetch (&a, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f2 (int x)
+{
+  return __atomic_xor_fetch (&b, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f3 (short x)
+{
+  return __atomic_xor_fetch (&c, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f4 (char x)
+{
+  return __atomic_xor_fetch (&d, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+f5 (long x)
+{
+  return __atomic_xor_fetch (&a, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f6 (int x)
+{
+  return __atomic_xor_fetch (&b, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f7 (short x)
+{
+  return __atomic_xor_fetch (&c, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f8 (char x)
+{
+  return __atomic_xor_fetch (&d, x, __ATOMIC_RELEASE) != 0;
+}
+
+int
+f9 (long x)
+{
+  return __atomic_xor_fetch (&a, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f10 (int x)
+{
+  return __atomic_xor_fetch (&b, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f11 (short x)
+{
+  return __atomic_xor_fetch (&c, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f12 (char x)
+{
+  return __atomic_xor_fetch (&d, x, __ATOMIC_RELEASE) < 0;
+}
+
+int
+f13 (long x)
+{
+  return __atomic_xor_fetch (&a, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f14 (int x)
+{
+  return __atomic_xor_fetch (&b, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f15 (short x)
+{
+  return __atomic_xor_fetch (&c, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f16 (char x)
+{
+  return __atomic_xor_fetch (&d, x, __ATOMIC_RELEASE) >= 0;
+}
+
+int
+f17 (long x)
+{
+  return __sync_xor_and_fetch (&a, x) == 0;
+}
+
+int
+f18 (int x)
+{
+  return __sync_xor_and_fetch (&b, x) == 0;
+}
+
+int
+f19 (short x)
+{
+  return __sync_xor_and_fetch (&c, x) == 0;
+}
+
+int
+f20 (char x)
+{
+  return __sync_xor_and_fetch (&d, x) == 0;
+}
+
+int
+f21 (long x)
+{
+  return __sync_xor_and_fetch (&a, x) != 0;
+}
+
+int
+f22 (int x)
+{
+  return __sync_xor_and_fetch (&b, x) != 0;
+}
+
+int
+f23 (short x)
+{
+  return __sync_xor_and_fetch (&c, x) != 0;
+}
+
+int
+f24 (char x)
+{
+  return __sync_xor_and_fetch (&d, x) != 0;
+}
+
+int
+f25 (long x)
+{
+  return __sync_xor_and_fetch (&a, x) < 0;
+}
+
+int
+f26 (int x)
+{
+  return __sync_xor_and_fetch (&b, x) < 0;
+}
+
+int
+f27 (short x)
+{
+  return __sync_xor_and_fetch (&c, x) < 0;
+}
+
+int
+f28 (char x)
+{
+  return __sync_xor_and_fetch (&d, x) < 0;
+}
+
+int
+f29 (long x)
+{
+  return __sync_xor_and_fetch (&a, x) >= 0;
+}
+
+int
+f30 (int x)
+{
+  return __sync_xor_and_fetch (&b, x) >= 0;
+}
+
+int
+f31 (short x)
+{
+  return __sync_xor_and_fetch (&c, x) >= 0;
+}
+
+int
+f32 (char x)
+{
+  return __sync_xor_and_fetch (&d, x) >= 0;
+}
+
+int
+f33 (long x)
+{
+  return __atomic_xor_fetch (&a, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f34 (int x)
+{
+  return __atomic_xor_fetch (&b, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f35 (short x)
+{
+  return __atomic_xor_fetch (&c, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f36 (char x)
+{
+  return __atomic_xor_fetch (&d, x, __ATOMIC_RELEASE) <= 0;
+}
+
+int
+f37 (long x)
+{
+  return __atomic_xor_fetch (&a, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f38 (int x)
+{
+  return __atomic_xor_fetch (&b, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f39 (short x)
+{
+  return __atomic_xor_fetch (&c, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f40 (char x)
+{
+  return __atomic_xor_fetch (&d, x, __ATOMIC_RELEASE) > 0;
+}
+
+int
+f41 (long x)
+{
+  return __sync_xor_and_fetch (&a, x) <= 0;
+}
+
+int
+f42 (int x)
+{
+  return __sync_xor_and_fetch (&b, x) <= 0;
+}
+
+int
+f43 (short x)
+{
+  return __sync_xor_and_fetch (&c, x) <= 0;
+}
+
+int
+f44 (char x)
+{
+  return __sync_xor_and_fetch (&d, x) <= 0;
+}
+
+int
+f45 (long x)
+{
+  return __sync_xor_and_fetch (&a, x) > 0;
+}
+
+int
+f46 (int x)
+{
+  return __sync_xor_and_fetch (&b, x) > 0;
+}
+
+int
+f47 (short x)
+{
+  return __sync_xor_and_fetch (&c, x) > 0;
+}
+
+int
+f48 (char x)
+{
+  return __sync_xor_and_fetch (&d, x) > 0;
+}


	Jakub


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] i386, fab: Optimize __atomic_{add,sub,and,or,xor}_fetch (x, y, z) {==,!=,<,<=,>,>=} 0 [PR98737]
  2021-12-15  9:22       ` [PATCH] i386, fab: Optimize __atomic_{add,sub,and,or,xor}_fetch (x, y, z) {==,!=,<,<=,>,>=} " Jakub Jelinek
@ 2021-12-15 10:54         ` Uros Bizjak
  0 siblings, 0 replies; 6+ messages in thread
From: Uros Bizjak @ 2021-12-15 10:54 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Jeff Law, Ulrich Drepper, gcc-patches

On Wed, Dec 15, 2021 at 10:23 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Wed, Jan 27, 2021 at 12:27:13PM +0100, Ulrich Drepper via Gcc-patches wrote:
> > On 1/27/21 11:37 AM, Jakub Jelinek wrote:
> > > Would equality comparison against 0 handle the most common cases.
> > >
> > > The user can write it as
> > > __atomic_sub_fetch (x, y, z) == 0
> > > or
> > > __atomic_fetch_sub (x, y, z) - y == 0
> > > thouch, so the expansion code would need to be able to cope with both.
> >
> > Please also keep !=0, <0, <=0, >0, and >=0 in mind.  They all can be
> > useful and can be handled with the flags.
>
> <= 0 and > 0 don't really work well with lock {add,sub,inc,dec}, x86 doesn't
> have comparisons that would look solely at both SF and ZF and not at other
> flags (and emitting two separate conditional jumps or two setcc insns and
> oring them together looks awful).
>
> But the rest can work.
>
> Here is a patch that adds internal functions and optabs for these,
> recognizes them at the same spot as e.g. .ATOMIC_BIT_TEST_AND* internal
> functions (fold all builtins pass) and expands them appropriately (or for
> the <= 0 and > 0 cases of +/- FAILs and let's middle-end fall back).
>
> So far I have handled just the op_fetch builtins, IMHO instead of handling
> also __atomic_fetch_sub (x, y, z) - y == 0 etc. we should canonicalize
> __atomic_fetch_sub (x, y, z) - y to __atomic_sub_fetch (x, y, z) (and vice
> versa).
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2021-12-15  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/98737
>         * internal-fn.def (ATOMIC_ADD_FETCH_CMP_0, ATOMIC_SUB_FETCH_CMP_0,
>         ATOMIC_AND_FETCH_CMP_0, ATOMIC_OR_FETCH_CMP_0, ATOMIC_XOR_FETCH_CMP_0):
>         New internal fns.
>         * internal-fn.h (ATOMIC_OP_FETCH_CMP_0_EQ, ATOMIC_OP_FETCH_CMP_0_NE,
>         ATOMIC_OP_FETCH_CMP_0_LT, ATOMIC_OP_FETCH_CMP_0_LE,
>         ATOMIC_OP_FETCH_CMP_0_GT, ATOMIC_OP_FETCH_CMP_0_GE): New enumerators.
>         * internal-fn.c (expand_ATOMIC_ADD_FETCH_CMP_0,
>         expand_ATOMIC_SUB_FETCH_CMP_0, expand_ATOMIC_AND_FETCH_CMP_0,
>         expand_ATOMIC_OR_FETCH_CMP_0, expand_ATOMIC_XOR_FETCH_CMP_0): New
>         functions.
>         * optabs.def (atomic_add_fetch_cmp_0_optab,
>         atomic_sub_fetch_cmp_0_optab, atomic_and_fetch_cmp_0_optab,
>         atomic_or_fetch_cmp_0_optab, atomic_xor_fetch_cmp_0_optab): New
>         direct optabs.
>         * builtins.h (expand_ifn_atomic_op_fetch_cmp_0): Declare.
>         * builtins.c (expand_ifn_atomic_op_fetch_cmp_0): New function.
>         * tree-ssa-ccp.c: Include internal-fn.h.
>         (optimize_atomic_bit_test_and): Add . before internal fn call
>         in function comment.  Change return type from void to bool and
>         return true only if successfully replaced.
>         (optimize_atomic_op_fetch_cmp_0): New function.
>         (pass_fold_builtins::execute): Use optimize_atomic_op_fetch_cmp_0
>         for BUILT_IN_ATOMIC_{ADD,SUB,AND,OR,XOR}_FETCH_{1,2,4,8,16} and
>         BUILT_IN_SYNC_{ADD,SUB,AND,OR,XOR}_AND_FETCH_{1,2,4,8,16},
>         for *XOR* ones only if optimize_atomic_bit_test_and failed.
>         * config/i386/sync.md (atomic_<plusminus_mnemonic>_fetch_cmp_0<mode>,
>         atomic_<logic>_fetch_cmp_0<mode>): New define_expand patterns.
>         (atomic_<plusminus_mnemonic>_fetch_cmp_0<mode>_1,
>         atomic_<logic>_fetch_cmp_0<mode>_1): New define_insn patterns.
>
>         * gcc.target/i386/pr98737-1.c: New test.
>         * gcc.target/i386/pr98737-2.c: New test.
>         * gcc.target/i386/pr98737-3.c: New test.
>         * gcc.target/i386/pr98737-4.c: New test.
>         * gcc.target/i386/pr98737-5.c: New test.
>         * gcc.target/i386/pr98737-6.c: New test.
>         * gcc.target/i386/pr98737-7.c: New test.

OK (with a small adjustment) for the x86 part.

Thanks,
Uros.

>
> --- gcc/internal-fn.def.jj      2021-11-30 13:26:09.323329485 +0100
> +++ gcc/internal-fn.def 2021-12-13 12:12:10.947053554 +0100
> @@ -403,6 +403,11 @@ DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_SET
>  DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_COMPLEMENT, ECF_LEAF, NULL)
>  DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_RESET, ECF_LEAF, NULL)
>  DEF_INTERNAL_FN (ATOMIC_COMPARE_EXCHANGE, ECF_LEAF, NULL)
> +DEF_INTERNAL_FN (ATOMIC_ADD_FETCH_CMP_0, ECF_LEAF, NULL)
> +DEF_INTERNAL_FN (ATOMIC_SUB_FETCH_CMP_0, ECF_LEAF, NULL)
> +DEF_INTERNAL_FN (ATOMIC_AND_FETCH_CMP_0, ECF_LEAF, NULL)
> +DEF_INTERNAL_FN (ATOMIC_OR_FETCH_CMP_0, ECF_LEAF, NULL)
> +DEF_INTERNAL_FN (ATOMIC_XOR_FETCH_CMP_0, ECF_LEAF, NULL)
>
>  /* To implement [[fallthrough]].  */
>  DEF_INTERNAL_FN (FALLTHROUGH, ECF_LEAF | ECF_NOTHROW, NULL)
> --- gcc/internal-fn.h.jj        2021-11-30 13:26:09.324329471 +0100
> +++ gcc/internal-fn.h   2021-12-13 19:17:03.491728748 +0100
> @@ -240,4 +240,13 @@ extern void expand_SHUFFLEVECTOR (intern
>
>  extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
>
> +enum {
> +  ATOMIC_OP_FETCH_CMP_0_EQ = 0,
> +  ATOMIC_OP_FETCH_CMP_0_NE = 1,
> +  ATOMIC_OP_FETCH_CMP_0_LT = 2,
> +  ATOMIC_OP_FETCH_CMP_0_LE = 3,
> +  ATOMIC_OP_FETCH_CMP_0_GT = 4,
> +  ATOMIC_OP_FETCH_CMP_0_GE = 5
> +};
> +
>  #endif
> --- gcc/internal-fn.c.jj        2021-12-02 19:41:52.635552695 +0100
> +++ gcc/internal-fn.c   2021-12-13 12:19:51.504465053 +0100
> @@ -3238,6 +3238,46 @@ expand_ATOMIC_COMPARE_EXCHANGE (internal
>    expand_ifn_atomic_compare_exchange (call);
>  }
>
> +/* Expand atomic add fetch and cmp with 0.  */
> +
> +static void
> +expand_ATOMIC_ADD_FETCH_CMP_0 (internal_fn, gcall *call)
> +{
> +  expand_ifn_atomic_op_fetch_cmp_0 (call);
> +}
> +
> +/* Expand atomic sub fetch and cmp with 0.  */
> +
> +static void
> +expand_ATOMIC_SUB_FETCH_CMP_0 (internal_fn, gcall *call)
> +{
> +  expand_ifn_atomic_op_fetch_cmp_0 (call);
> +}
> +
> +/* Expand atomic and fetch and cmp with 0.  */
> +
> +static void
> +expand_ATOMIC_AND_FETCH_CMP_0 (internal_fn, gcall *call)
> +{
> +  expand_ifn_atomic_op_fetch_cmp_0 (call);
> +}
> +
> +/* Expand atomic or fetch and cmp with 0.  */
> +
> +static void
> +expand_ATOMIC_OR_FETCH_CMP_0 (internal_fn, gcall *call)
> +{
> +  expand_ifn_atomic_op_fetch_cmp_0 (call);
> +}
> +
> +/* Expand atomic xor fetch and cmp with 0.  */
> +
> +static void
> +expand_ATOMIC_XOR_FETCH_CMP_0 (internal_fn, gcall *call)
> +{
> +  expand_ifn_atomic_op_fetch_cmp_0 (call);
> +}
> +
>  /* Expand LAUNDER to assignment, lhs = arg0.  */
>
>  static void
> --- gcc/optabs.def.jj   2021-11-30 13:26:09.357328990 +0100
> +++ gcc/optabs.def      2021-12-13 14:52:40.180933731 +0100
> @@ -451,6 +451,11 @@ OPTAB_D (atomic_sub_fetch_optab, "atomic
>  OPTAB_D (atomic_sub_optab, "atomic_sub$I$a")
>  OPTAB_D (atomic_xor_fetch_optab, "atomic_xor_fetch$I$a")
>  OPTAB_D (atomic_xor_optab, "atomic_xor$I$a")
> +OPTAB_D (atomic_add_fetch_cmp_0_optab, "atomic_add_fetch_cmp_0$I$a")
> +OPTAB_D (atomic_sub_fetch_cmp_0_optab, "atomic_sub_fetch_cmp_0$I$a")
> +OPTAB_D (atomic_and_fetch_cmp_0_optab, "atomic_and_fetch_cmp_0$I$a")
> +OPTAB_D (atomic_or_fetch_cmp_0_optab, "atomic_or_fetch_cmp_0$I$a")
> +OPTAB_D (atomic_xor_fetch_cmp_0_optab, "atomic_xor_fetch_cmp_0$I$a")
>
>  OPTAB_D (get_thread_pointer_optab, "get_thread_pointer$I$a")
>  OPTAB_D (set_thread_pointer_optab, "set_thread_pointer$I$a")
> --- gcc/builtins.h.jj   2021-11-30 13:26:09.254330489 +0100
> +++ gcc/builtins.h      2021-12-13 15:00:29.585187247 +0100
> @@ -123,6 +123,7 @@ extern void std_expand_builtin_va_start
>  extern void expand_builtin_trap (void);
>  extern void expand_ifn_atomic_bit_test_and (gcall *);
>  extern void expand_ifn_atomic_compare_exchange (gcall *);
> +extern void expand_ifn_atomic_op_fetch_cmp_0 (gcall *);
>  extern rtx expand_builtin (tree, rtx, rtx, machine_mode, int);
>  extern enum built_in_function builtin_mathfn_code (const_tree);
>  extern tree fold_builtin_expect (location_t, tree, tree, tree, tree);
> --- gcc/builtins.c.jj   2021-11-30 13:26:09.254330489 +0100
> +++ gcc/builtins.c      2021-12-14 10:21:28.524814726 +0100
> @@ -6275,6 +6275,93 @@ expand_ifn_atomic_bit_test_and (gcall *c
>      emit_move_insn (target, result);
>  }
>
> +/* Expand IFN_ATOMIC_*_FETCH_CMP_0 internal function.  */
> +
> +void
> +expand_ifn_atomic_op_fetch_cmp_0 (gcall *call)
> +{
> +  tree cmp = gimple_call_arg (call, 0);
> +  tree ptr = gimple_call_arg (call, 1);
> +  tree arg = gimple_call_arg (call, 2);
> +  tree lhs = gimple_call_lhs (call);
> +  enum memmodel model = MEMMODEL_SYNC_SEQ_CST;
> +  machine_mode mode = TYPE_MODE (TREE_TYPE (cmp));
> +  optab optab;
> +  rtx_code code;
> +  class expand_operand ops[5];
> +
> +  gcc_assert (flag_inline_atomics);
> +
> +  if (gimple_call_num_args (call) == 4)
> +    model = get_memmodel (gimple_call_arg (call, 3));
> +
> +  rtx mem = get_builtin_sync_mem (ptr, mode);
> +  rtx op = expand_expr_force_mode (arg, mode);
> +
> +  switch (gimple_call_internal_fn (call))
> +    {
> +    case IFN_ATOMIC_ADD_FETCH_CMP_0:
> +      code = PLUS;
> +      optab = atomic_add_fetch_cmp_0_optab;
> +      break;
> +    case IFN_ATOMIC_SUB_FETCH_CMP_0:
> +      code = MINUS;
> +      optab = atomic_sub_fetch_cmp_0_optab;
> +      break;
> +    case IFN_ATOMIC_AND_FETCH_CMP_0:
> +      code = AND;
> +      optab = atomic_and_fetch_cmp_0_optab;
> +      break;
> +    case IFN_ATOMIC_OR_FETCH_CMP_0:
> +      code = IOR;
> +      optab = atomic_or_fetch_cmp_0_optab;
> +      break;
> +    case IFN_ATOMIC_XOR_FETCH_CMP_0:
> +      code = XOR;
> +      optab = atomic_xor_fetch_cmp_0_optab;
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +
> +  enum rtx_code comp = UNKNOWN;
> +  switch (tree_to_uhwi (cmp))
> +    {
> +    case ATOMIC_OP_FETCH_CMP_0_EQ: comp = EQ; break;
> +    case ATOMIC_OP_FETCH_CMP_0_NE: comp = NE; break;
> +    case ATOMIC_OP_FETCH_CMP_0_GT: comp = GT; break;
> +    case ATOMIC_OP_FETCH_CMP_0_GE: comp = GE; break;
> +    case ATOMIC_OP_FETCH_CMP_0_LT: comp = LT; break;
> +    case ATOMIC_OP_FETCH_CMP_0_LE: comp = LE; break;
> +    default: gcc_unreachable ();
> +    }
> +
> +  rtx target;
> +  if (lhs == NULL_TREE)
> +    target = gen_reg_rtx (TYPE_MODE (boolean_type_node));
> +  else
> +    target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
> +  enum insn_code icode = direct_optab_handler (optab, mode);
> +  gcc_assert (icode != CODE_FOR_nothing);
> +  create_output_operand (&ops[0], target, TYPE_MODE (boolean_type_node));
> +  create_fixed_operand (&ops[1], mem);
> +  create_convert_operand_to (&ops[2], op, mode, true);
> +  create_integer_operand (&ops[3], model);
> +  create_integer_operand (&ops[4], comp);
> +  if (maybe_expand_insn (icode, 5, ops))
> +    return;
> +
> +  rtx result = expand_atomic_fetch_op (gen_reg_rtx (mode), mem, op,
> +                                      code, model, true);
> +  if (lhs)
> +    {
> +      result = emit_store_flag_force (target, comp, result, const0_rtx, mode,
> +                                     0, 1);
> +      if (result != target)
> +       emit_move_insn (target, result);
> +    }
> +}
> +
>  /* Expand an atomic clear operation.
>         void _atomic_clear (BOOL *obj, enum memmodel)
>     EXP is the call expression.  */
> --- gcc/tree-ssa-ccp.c.jj       2021-11-24 09:54:11.572737923 +0100
> +++ gcc/tree-ssa-ccp.c  2021-12-14 10:24:00.394632973 +0100
> @@ -151,6 +151,7 @@ along with GCC; see the file COPYING3.
>  #include "symbol-summary.h"
>  #include "ipa-utils.h"
>  #include "ipa-prop.h"
> +#include "internal-fn.h"
>
>  /* Possible lattice values.  */
>  typedef enum
> @@ -3333,7 +3334,7 @@ extern bool gimple_nop_convert (tree, tr
>       _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
>       _5 = _4 & mask_2;
>     to
> -     _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
> +     _4 = .ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
>       _5 = _4;
>     If _5 is only used in _5 != 0 or _5 == 0 comparisons, 1
>     is passed instead of 0, and the builtin just returns a zero
> @@ -3345,7 +3346,7 @@ extern bool gimple_nop_convert (tree, tr
>     the second argument to the builtin needs to be one's complement
>     of the mask instead of mask.  */
>
> -static void
> +static bool
>  optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
>                               enum internal_fn fn, bool has_model_arg,
>                               bool after)
> @@ -3365,7 +3366,7 @@ optimize_atomic_bit_test_and (gimple_stm
>        || !single_imm_use (lhs, &use_p, &use_stmt)
>        || !is_gimple_assign (use_stmt)
>        || !gimple_vdef (call))
> -    return;
> +    return false;
>
>    switch (fn)
>      {
> @@ -3379,7 +3380,7 @@ optimize_atomic_bit_test_and (gimple_stm
>        optab = atomic_bit_test_and_reset_optab;
>        break;
>      default:
> -      return;
> +      return false;
>      }
>
>    tree bit = nullptr;
> @@ -3389,20 +3390,20 @@ optimize_atomic_bit_test_and (gimple_stm
>    if (rhs_code != BIT_AND_EXPR)
>      {
>        if (rhs_code != NOP_EXPR && rhs_code != BIT_NOT_EXPR)
> -       return;
> +       return false;
>
>        tree use_lhs = gimple_assign_lhs (use_stmt);
>        if (TREE_CODE (use_lhs) == SSA_NAME
>           && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
> -       return;
> +       return false;
>
>        tree use_rhs = gimple_assign_rhs1 (use_stmt);
>        if (lhs != use_rhs)
> -       return;
> +       return false;
>
>        if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)))
>           == CODE_FOR_nothing)
> -       return;
> +       return false;
>
>        gimple *g;
>        gimple_stmt_iterator gsi;
> @@ -3413,7 +3414,7 @@ optimize_atomic_bit_test_and (gimple_stm
>         {
>           g = convert_atomic_bit_not (fn, use_stmt, lhs, mask);
>           if (!g)
> -           return;
> +           return false;
>           use_stmt = g;
>           ibit = 0;
>         }
> @@ -3426,7 +3427,7 @@ optimize_atomic_bit_test_and (gimple_stm
>               if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
>                                                    ~HOST_WIDE_INT_1),
>                                     mask, 0))
> -               return;
> +               return false;
>
>               /* Convert
>                  _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> @@ -3442,7 +3443,7 @@ optimize_atomic_bit_test_and (gimple_stm
>             {
>               and_mask = build_int_cst (TREE_TYPE (lhs), 1);
>               if (!operand_equal_p (and_mask, mask, 0))
> -               return;
> +               return false;
>
>               /* Convert
>                  _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> @@ -3468,20 +3469,20 @@ optimize_atomic_bit_test_and (gimple_stm
>           gimple *use_nop_stmt;
>           if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
>               || !is_gimple_assign (use_nop_stmt))
> -           return;
> +           return false;
>           tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
>           rhs_code = gimple_assign_rhs_code (use_nop_stmt);
>           if (rhs_code != BIT_AND_EXPR)
>             {
>               if (TREE_CODE (use_nop_lhs) == SSA_NAME
>                   && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
> -               return;
> +               return false;
>               if (rhs_code == BIT_NOT_EXPR)
>                 {
>                   g = convert_atomic_bit_not (fn, use_nop_stmt, lhs,
>                                               mask);
>                   if (!g)
> -                   return;
> +                   return false;
>                   /* Convert
>                      _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
>                      _2 = (int) _1;
> @@ -3509,15 +3510,15 @@ optimize_atomic_bit_test_and (gimple_stm
>               else
>                 {
>                   if (TREE_CODE (TREE_TYPE (use_nop_lhs)) != BOOLEAN_TYPE)
> -                   return;
> +                   return false;
>                   if (rhs_code != GE_EXPR && rhs_code != LT_EXPR)
> -                   return;
> +                   return false;
>                   tree cmp_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
>                   if (use_lhs != cmp_rhs1)
> -                   return;
> +                   return false;
>                   tree cmp_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
>                   if (!integer_zerop (cmp_rhs2))
> -                   return;
> +                   return false;
>
>                   tree and_mask;
>
> @@ -3533,7 +3534,7 @@ optimize_atomic_bit_test_and (gimple_stm
>                       and_mask = build_int_cst (TREE_TYPE (use_rhs),
>                                                 highest - 1);
>                       if (!operand_equal_p (and_mask, mask, 0))
> -                       return;
> +                       return false;
>
>                       /* Convert
>                          _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> @@ -3553,7 +3554,7 @@ optimize_atomic_bit_test_and (gimple_stm
>                       and_mask = build_int_cst (TREE_TYPE (use_rhs),
>                                                 highest);
>                       if (!operand_equal_p (and_mask, mask, 0))
> -                       return;
> +                       return false;
>
>                       /* Convert
>                          _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> @@ -3592,7 +3593,7 @@ optimize_atomic_bit_test_and (gimple_stm
>                   || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (match_op[2])
>                   || !single_imm_use (match_op[2], &use_p, &g)
>                   || !is_gimple_assign (g))
> -               return;
> +               return false;
>               mask = match_op[0];
>               if (TREE_CODE (match_op[1]) == INTEGER_CST)
>                 {
> @@ -3650,7 +3651,7 @@ optimize_atomic_bit_test_and (gimple_stm
>             }
>         }
>        else
> -       return;
> +       return false;
>
>        if (!bit)
>         {
> @@ -3661,11 +3662,11 @@ optimize_atomic_bit_test_and (gimple_stm
>      }
>    else if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)))
>            == CODE_FOR_nothing)
> -    return;
> +    return false;
>
>    tree use_lhs = gimple_assign_lhs (use_stmt);
>    if (!use_lhs)
> -    return;
> +    return false;
>
>    if (!bit)
>      {
> @@ -3676,7 +3677,7 @@ optimize_atomic_bit_test_and (gimple_stm
>           mask = fold_convert (TREE_TYPE (lhs), mask);
>           int ibit = tree_log2 (mask);
>           if (ibit < 0)
> -           return;
> +           return false;
>           bit = build_int_cst (TREE_TYPE (lhs), ibit);
>         }
>        else if (TREE_CODE (mask) == SSA_NAME)
> @@ -3687,30 +3688,30 @@ optimize_atomic_bit_test_and (gimple_stm
>             {
>               mask = match_op;
>               if (TREE_CODE (mask) != SSA_NAME)
> -               return;
> +               return false;
>               g = SSA_NAME_DEF_STMT (mask);
>             }
>           if (!is_gimple_assign (g))
> -           return;
> +           return false;
>
>           if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
>             {
>               if (gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> -               return;
> +               return false;
>               mask = gimple_assign_rhs1 (g);
>               if (TREE_CODE (mask) != SSA_NAME)
> -               return;
> +               return false;
>               g = SSA_NAME_DEF_STMT (mask);
>             }
>
>           rhs_code = gimple_assign_rhs_code (g);
>           if (rhs_code != LSHIFT_EXPR
>               || !integer_onep (gimple_assign_rhs1 (g)))
> -           return;
> +           return false;
>           bit = gimple_assign_rhs2 (g);
>         }
>        else
> -       return;
> +       return false;
>
>        tree cmp_mask;
>        if (gimple_assign_rhs1 (use_stmt) == lhs)
> @@ -3723,7 +3724,7 @@ optimize_atomic_bit_test_and (gimple_stm
>         cmp_mask = match_op;
>
>        if (!operand_equal_p (cmp_mask, mask, 0))
> -       return;
> +       return false;
>      }
>
>    bool use_bool = true;
> @@ -3748,6 +3749,8 @@ optimize_atomic_bit_test_and (gimple_stm
>           case COND_EXPR:
>             op1 = gimple_assign_rhs1 (g);
>             code = TREE_CODE (op1);
> +           if (TREE_CODE_CLASS (code) != tcc_comparison)
> +             break;
>             op0 = TREE_OPERAND (op1, 0);
>             op1 = TREE_OPERAND (op1, 1);
>             break;
> @@ -3864,6 +3867,196 @@ optimize_atomic_bit_test_and (gimple_stm
>    release_defs (use_stmt);
>    gsi_remove (gsip, true);
>    release_ssa_name (lhs);
> +  return true;
> +}
> +
> +/* Optimize
> +     _4 = __atomic_add_fetch_* (ptr_6, arg_2, _3);
> +     _5 = _4 == 0;
> +   to
> +     _4 = .ATOMIC_ADD_FETCH_CMP_0 (EQ_EXPR, ptr_6, arg_2, _3);
> +     _5 = _4;
> +   Similarly for __sync_add_and_fetch_* (without the ", _3" part
> +   in there).  */
> +
> +static bool
> +optimize_atomic_op_fetch_cmp_0 (gimple_stmt_iterator *gsip,
> +                               enum internal_fn fn, bool has_model_arg)
> +{
> +  gimple *call = gsi_stmt (*gsip);
> +  tree lhs = gimple_call_lhs (call);
> +  use_operand_p use_p;
> +  gimple *use_stmt;
> +
> +  if (!flag_inline_atomics
> +      || optimize_debug
> +      || !gimple_call_builtin_p (call, BUILT_IN_NORMAL)
> +      || !lhs
> +      || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
> +      || !single_imm_use (lhs, &use_p, &use_stmt)
> +      || !gimple_vdef (call))
> +    return false;
> +
> +  optab optab;
> +  switch (fn)
> +    {
> +    case IFN_ATOMIC_ADD_FETCH_CMP_0:
> +      optab = atomic_add_fetch_cmp_0_optab;
> +      break;
> +    case IFN_ATOMIC_SUB_FETCH_CMP_0:
> +      optab = atomic_sub_fetch_cmp_0_optab;
> +      break;
> +    case IFN_ATOMIC_AND_FETCH_CMP_0:
> +      optab = atomic_and_fetch_cmp_0_optab;
> +      break;
> +    case IFN_ATOMIC_OR_FETCH_CMP_0:
> +      optab = atomic_or_fetch_cmp_0_optab;
> +      break;
> +    case IFN_ATOMIC_XOR_FETCH_CMP_0:
> +      optab = atomic_xor_fetch_cmp_0_optab;
> +      break;
> +    default:
> +      return false;
> +    }
> +
> +  if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)))
> +      == CODE_FOR_nothing)
> +    return false;
> +
> +  tree use_lhs = lhs;
> +  if (gimple_assign_cast_p (use_stmt))
> +    {
> +      use_lhs = gimple_assign_lhs (use_stmt);
> +      if (!tree_nop_conversion_p (TREE_TYPE (use_lhs), TREE_TYPE (lhs))
> +         || (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
> +             && !POINTER_TYPE_P (TREE_TYPE (use_lhs)))
> +         || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs)
> +         || !single_imm_use (use_lhs, &use_p, &use_stmt))
> +       return false;
> +    }
> +  enum tree_code code = ERROR_MARK;
> +  tree op0 = NULL_TREE, op1 = NULL_TREE;
> +  if (is_gimple_assign (use_stmt))
> +    switch (gimple_assign_rhs_code (use_stmt))
> +      {
> +      case COND_EXPR:
> +       op1 = gimple_assign_rhs1 (use_stmt);
> +       code = TREE_CODE (op1);
> +       if (TREE_CODE_CLASS (code) == tcc_comparison)
> +         {
> +           op0 = TREE_OPERAND (op1, 0);
> +           op1 = TREE_OPERAND (op1, 1);
> +         }
> +       break;
> +      default:
> +       code = gimple_assign_rhs_code (use_stmt);
> +       if (TREE_CODE_CLASS (code) == tcc_comparison)
> +         {
> +           op0 = gimple_assign_rhs1 (use_stmt);
> +           op1 = gimple_assign_rhs2 (use_stmt);
> +         }
> +       break;
> +      }
> +  else if (gimple_code (use_stmt) == GIMPLE_COND)
> +    {
> +      code = gimple_cond_code (use_stmt);
> +      op0 = gimple_cond_lhs (use_stmt);
> +      op1 = gimple_cond_rhs (use_stmt);
> +    }
> +
> +  switch (code)
> +    {
> +    case LT_EXPR:
> +    case LE_EXPR:
> +    case GT_EXPR:
> +    case GE_EXPR:
> +      if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
> +         || TREE_CODE (TREE_TYPE (use_lhs)) == BOOLEAN_TYPE
> +         || TYPE_UNSIGNED (TREE_TYPE (use_lhs)))
> +       return false;
> +      /* FALLTHRU */
> +    case EQ_EXPR:
> +    case NE_EXPR:
> +      if (op0 == use_lhs && integer_zerop (op1))
> +       break;
> +      return false;
> +    default:
> +      return false;
> +    }
> +
> +  int encoded;
> +  switch (code)
> +    {
> +    /* Use special encoding of the operation.  We want to also
> +       encode the mode in the first argument and for neither EQ_EXPR
> +       etc. nor EQ etc. we can rely it will fit into QImode.  */
> +    case EQ_EXPR: encoded = ATOMIC_OP_FETCH_CMP_0_EQ; break;
> +    case NE_EXPR: encoded = ATOMIC_OP_FETCH_CMP_0_NE; break;
> +    case LT_EXPR: encoded = ATOMIC_OP_FETCH_CMP_0_LT; break;
> +    case LE_EXPR: encoded = ATOMIC_OP_FETCH_CMP_0_LE; break;
> +    case GT_EXPR: encoded = ATOMIC_OP_FETCH_CMP_0_GT; break;
> +    case GE_EXPR: encoded = ATOMIC_OP_FETCH_CMP_0_GE; break;
> +    default: gcc_unreachable ();
> +    }
> +
> +  tree new_lhs = make_ssa_name (boolean_type_node);
> +  gimple *g;
> +  tree flag = build_int_cst (TREE_TYPE (lhs), encoded);
> +  if (has_model_arg)
> +    g = gimple_build_call_internal (fn, 4, flag,
> +                                   gimple_call_arg (call, 0),
> +                                   gimple_call_arg (call, 1),
> +                                   gimple_call_arg (call, 2));
> +  else
> +    g = gimple_build_call_internal (fn, 3, flag,
> +                                   gimple_call_arg (call, 0),
> +                                   gimple_call_arg (call, 1));
> +  gimple_call_set_lhs (g, new_lhs);
> +  gimple_set_location (g, gimple_location (call));
> +  gimple_move_vops (g, call);
> +  bool throws = stmt_can_throw_internal (cfun, call);
> +  gimple_call_set_nothrow (as_a <gcall *> (g),
> +                          gimple_call_nothrow_p (as_a <gcall *> (call)));
> +  gimple_stmt_iterator gsi = *gsip;
> +  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
> +  if (throws)
> +    maybe_clean_or_replace_eh_stmt (call, g);
> +  if (is_gimple_assign (use_stmt))
> +    switch (gimple_assign_rhs_code (use_stmt))
> +      {
> +      case COND_EXPR:
> +       gimple_assign_set_rhs1 (use_stmt, new_lhs);
> +       break;
> +      default:
> +       gsi = gsi_for_stmt (use_stmt);
> +       if (tree ulhs = gimple_assign_lhs (use_stmt))
> +         if (useless_type_conversion_p (TREE_TYPE (ulhs),
> +                                        boolean_type_node))
> +           {
> +             gimple_assign_set_rhs_with_ops (&gsi, SSA_NAME, new_lhs);
> +             break;
> +           }
> +       gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, new_lhs);
> +       break;
> +      }
> +  else if (gimple_code (use_stmt) == GIMPLE_COND)
> +    {
> +      gcond *use_cond = as_a <gcond *> (use_stmt);
> +      gimple_cond_set_code (use_cond, NE_EXPR);
> +      gimple_cond_set_lhs (use_cond, new_lhs);
> +      gimple_cond_set_rhs (use_cond, boolean_false_node);
> +    }
> +
> +  update_stmt (use_stmt);
> +  if (use_lhs != lhs)
> +    {
> +      gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (use_lhs));
> +      gsi_remove (&gsi, true);
> +      release_ssa_name (use_lhs);
> +    }
> +  gsi_remove (gsip, true);
> +  release_ssa_name (lhs);
> +  return true;
>  }
>
>  /* Optimize
> @@ -4092,6 +4285,44 @@ pass_fold_builtins::execute (function *f
>                     cfg_changed = true;
>                   break;
>
> +               case BUILT_IN_ATOMIC_ADD_FETCH_1:
> +               case BUILT_IN_ATOMIC_ADD_FETCH_2:
> +               case BUILT_IN_ATOMIC_ADD_FETCH_4:
> +               case BUILT_IN_ATOMIC_ADD_FETCH_8:
> +               case BUILT_IN_ATOMIC_ADD_FETCH_16:
> +                 optimize_atomic_op_fetch_cmp_0 (&i,
> +                                                 IFN_ATOMIC_ADD_FETCH_CMP_0,
> +                                                 true);
> +                 break;
> +               case BUILT_IN_SYNC_ADD_AND_FETCH_1:
> +               case BUILT_IN_SYNC_ADD_AND_FETCH_2:
> +               case BUILT_IN_SYNC_ADD_AND_FETCH_4:
> +               case BUILT_IN_SYNC_ADD_AND_FETCH_8:
> +               case BUILT_IN_SYNC_ADD_AND_FETCH_16:
> +                 optimize_atomic_op_fetch_cmp_0 (&i,
> +                                                 IFN_ATOMIC_ADD_FETCH_CMP_0,
> +                                                 false);
> +                 break;
> +
> +               case BUILT_IN_ATOMIC_SUB_FETCH_1:
> +               case BUILT_IN_ATOMIC_SUB_FETCH_2:
> +               case BUILT_IN_ATOMIC_SUB_FETCH_4:
> +               case BUILT_IN_ATOMIC_SUB_FETCH_8:
> +               case BUILT_IN_ATOMIC_SUB_FETCH_16:
> +                 optimize_atomic_op_fetch_cmp_0 (&i,
> +                                                 IFN_ATOMIC_SUB_FETCH_CMP_0,
> +                                                 true);
> +                 break;
> +               case BUILT_IN_SYNC_SUB_AND_FETCH_1:
> +               case BUILT_IN_SYNC_SUB_AND_FETCH_2:
> +               case BUILT_IN_SYNC_SUB_AND_FETCH_4:
> +               case BUILT_IN_SYNC_SUB_AND_FETCH_8:
> +               case BUILT_IN_SYNC_SUB_AND_FETCH_16:
> +                 optimize_atomic_op_fetch_cmp_0 (&i,
> +                                                 IFN_ATOMIC_SUB_FETCH_CMP_0,
> +                                                 false);
> +                 break;
> +
>                 case BUILT_IN_ATOMIC_FETCH_OR_1:
>                 case BUILT_IN_ATOMIC_FETCH_OR_2:
>                 case BUILT_IN_ATOMIC_FETCH_OR_4:
> @@ -4133,16 +4364,24 @@ pass_fold_builtins::execute (function *f
>                 case BUILT_IN_ATOMIC_XOR_FETCH_4:
>                 case BUILT_IN_ATOMIC_XOR_FETCH_8:
>                 case BUILT_IN_ATOMIC_XOR_FETCH_16:
> -                 optimize_atomic_bit_test_and
> -                       (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, true, true);
> +                 if (optimize_atomic_bit_test_and
> +                       (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, true, true))
> +                   break;
> +                 optimize_atomic_op_fetch_cmp_0 (&i,
> +                                                 IFN_ATOMIC_XOR_FETCH_CMP_0,
> +                                                 true);
>                   break;
>                 case BUILT_IN_SYNC_XOR_AND_FETCH_1:
>                 case BUILT_IN_SYNC_XOR_AND_FETCH_2:
>                 case BUILT_IN_SYNC_XOR_AND_FETCH_4:
>                 case BUILT_IN_SYNC_XOR_AND_FETCH_8:
>                 case BUILT_IN_SYNC_XOR_AND_FETCH_16:
> -                 optimize_atomic_bit_test_and
> -                       (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, false, true);
> +                 if (optimize_atomic_bit_test_and
> +                       (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, false, true))
> +                   break;
> +                 optimize_atomic_op_fetch_cmp_0 (&i,
> +                                                 IFN_ATOMIC_XOR_FETCH_CMP_0,
> +                                                 false);
>                   break;
>
>                 case BUILT_IN_ATOMIC_FETCH_AND_1:
> @@ -4164,6 +4403,44 @@ pass_fold_builtins::execute (function *f
>                                                 false, false);
>                   break;
>
> +               case BUILT_IN_ATOMIC_AND_FETCH_1:
> +               case BUILT_IN_ATOMIC_AND_FETCH_2:
> +               case BUILT_IN_ATOMIC_AND_FETCH_4:
> +               case BUILT_IN_ATOMIC_AND_FETCH_8:
> +               case BUILT_IN_ATOMIC_AND_FETCH_16:
> +                 optimize_atomic_op_fetch_cmp_0 (&i,
> +                                                 IFN_ATOMIC_AND_FETCH_CMP_0,
> +                                                 true);
> +                 break;
> +               case BUILT_IN_SYNC_AND_AND_FETCH_1:
> +               case BUILT_IN_SYNC_AND_AND_FETCH_2:
> +               case BUILT_IN_SYNC_AND_AND_FETCH_4:
> +               case BUILT_IN_SYNC_AND_AND_FETCH_8:
> +               case BUILT_IN_SYNC_AND_AND_FETCH_16:
> +                 optimize_atomic_op_fetch_cmp_0 (&i,
> +                                                 IFN_ATOMIC_AND_FETCH_CMP_0,
> +                                                 false);
> +                 break;
> +
> +               case BUILT_IN_ATOMIC_OR_FETCH_1:
> +               case BUILT_IN_ATOMIC_OR_FETCH_2:
> +               case BUILT_IN_ATOMIC_OR_FETCH_4:
> +               case BUILT_IN_ATOMIC_OR_FETCH_8:
> +               case BUILT_IN_ATOMIC_OR_FETCH_16:
> +                 optimize_atomic_op_fetch_cmp_0 (&i,
> +                                                 IFN_ATOMIC_OR_FETCH_CMP_0,
> +                                                 true);
> +                 break;
> +               case BUILT_IN_SYNC_OR_AND_FETCH_1:
> +               case BUILT_IN_SYNC_OR_AND_FETCH_2:
> +               case BUILT_IN_SYNC_OR_AND_FETCH_4:
> +               case BUILT_IN_SYNC_OR_AND_FETCH_8:
> +               case BUILT_IN_SYNC_OR_AND_FETCH_16:
> +                 optimize_atomic_op_fetch_cmp_0 (&i,
> +                                                 IFN_ATOMIC_OR_FETCH_CMP_0,
> +                                                 false);
> +                 break;
> +
>                 case BUILT_IN_MEMCPY:
>                   if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)
>                       && TREE_CODE (gimple_call_arg (stmt, 0)) == ADDR_EXPR
> --- gcc/config/i386/sync.md.jj  2021-11-15 13:19:07.347900863 +0100
> +++ gcc/config/i386/sync.md     2021-12-13 19:06:24.123913074 +0100
> @@ -938,3 +938,84 @@ (define_insn "atomic_bit_test_and_reset<
>         (const_int 0))]
>    ""
>    "lock{%;} %K2btr{<imodesuffix>}\t{%1, %0|%0, %1}")
> +
> +(define_expand "atomic_<plusminus_mnemonic>_fetch_cmp_0<mode>"
> +  [(match_operand:QI 0 "register_operand")
> +   (plusminus:SWI (match_operand:SWI 1 "memory_operand")
> +                 (match_operand:SWI 2 "nonmemory_operand"))
> +   (match_operand:SI 3 "const_int_operand") ;; model
> +   (match_operand:SI 4 "const_int_operand")]
> +  ""
> +{
> +  if (INTVAL (operands[4]) == GT || INTVAL (operands[4]) == LE)
> +    FAIL;
> +  emit_insn (gen_atomic_<plusminus_mnemonic>_fetch_cmp_0<mode>_1 (operands[1],
> +                                                                 operands[2],
> +                                                                 operands[3]));
> +  ix86_expand_setcc (operands[0], (enum rtx_code) INTVAL (operands[4]),
> +                    gen_rtx_REG (CCGOCmode, FLAGS_REG), const0_rtx);
> +  DONE;
> +})
> +
> +(define_insn "atomic_<plusminus_mnemonic>_fetch_cmp_0<mode>_1"

Please split this to a separate plus and minus pattern.

> +  [(set (reg:CCGOC FLAGS_REG)
> +       (compare:CCGOC
> +         (plusminus:SWI
> +           (unspec_volatile:SWI
> +             [(match_operand:SWI 0 "memory_operand" "+m")
> +              (match_operand:SI 2 "const_int_operand")]                ;; model
> +             UNSPECV_XCHG)
> +           (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
> +         (const_int 0)))
> +   (set (match_dup 0)
> +       (plusminus:SWI (match_dup 0) (match_dup 1)))]
> +  ""
> +{
> +  if (incdec_operand (operands[1], <MODE>mode))
> +    {
> +      if ((operands[1] == const1_rtx) ^ (<CODE> != PLUS))
> +       return "lock{%;} %K2inc{<imodesuffix>}\t%0";
> +      else
> +       return "lock{%;} %K2dec{<imodesuffix>}\t%0";
> +    }
> +
> +  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
> +    {
> +      if (<CODE> == PLUS)
> +       return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
> +      else
> +       return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
> +    }
> +
> +  return "lock{%;} %K2<plusminus_mnemonic>{<imodesuffix>}\t{%1, %0|%0, %1}";
> +})
> +
> +(define_expand "atomic_<logic>_fetch_cmp_0<mode>"
> +  [(match_operand:QI 0 "register_operand")
> +   (any_logic:SWI (match_operand:SWI 1 "memory_operand")
> +                 (match_operand:SWI 2 "nonmemory_operand"))
> +   (match_operand:SI 3 "const_int_operand") ;; model
> +   (match_operand:SI 4 "const_int_operand")]
> +  ""
> +{
> +  emit_insn (gen_atomic_<logic>_fetch_cmp_0<mode>_1 (operands[1], operands[2],
> +                                                    operands[3]));
> +  ix86_expand_setcc (operands[0], (enum rtx_code) INTVAL (operands[4]),
> +                    gen_rtx_REG (CCNOmode, FLAGS_REG), const0_rtx);
> +  DONE;
> +})
> +
> +(define_insn "atomic_<logic>_fetch_cmp_0<mode>_1"
> +  [(set (reg:CCNO FLAGS_REG)
> +       (compare:CCNO
> +         (any_logic:SWI
> +           (unspec_volatile:SWI
> +             [(match_operand:SWI 0 "memory_operand" "+m")
> +              (match_operand:SI 2 "const_int_operand")]                ;; model
> +             UNSPECV_XCHG)
> +           (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
> +         (const_int 0)))
> +   (set (match_dup 0)
> +       (any_logic:SWI (match_dup 0) (match_dup 1)))]
> +  ""
> +  "lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
> --- gcc/testsuite/gcc.target/i386/pr98737-1.c.jj        2021-12-14 10:32:11.150582805 +0100
> +++ gcc/testsuite/gcc.target/i386/pr98737-1.c   2021-12-14 10:32:05.211668125 +0100
> @@ -0,0 +1,207 @@
> +/* PR target/98737 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -masm=att" } */
> +/* { dg-additional-options "-march=i686" { target ia32 } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*subq\t" { target lp64 } } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*subl\t" } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*subw\t" } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*subb\t" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\r]\*xadd" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\r]\*cmpxchg" } } */
> +
> +long a;
> +int b;
> +short c;
> +char d;
> +
> +int
> +f1 (long x)
> +{
> +  return __atomic_sub_fetch (&a, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f2 (int x)
> +{
> +  return __atomic_sub_fetch (&b, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f3 (short x)
> +{
> +  return __atomic_sub_fetch (&c, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f4 (char x)
> +{
> +  return __atomic_sub_fetch (&d, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f5 (long x)
> +{
> +  return __atomic_sub_fetch (&a, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f6 (int x)
> +{
> +  return __atomic_sub_fetch (&b, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f7 (short x)
> +{
> +  return __atomic_sub_fetch (&c, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f8 (char x)
> +{
> +  return __atomic_sub_fetch (&d, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f9 (long x)
> +{
> +  return __atomic_sub_fetch (&a, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f10 (int x)
> +{
> +  return __atomic_sub_fetch (&b, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f11 (short x)
> +{
> +  return __atomic_sub_fetch (&c, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f12 (char x)
> +{
> +  return __atomic_sub_fetch (&d, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f13 (long x)
> +{
> +  return __atomic_sub_fetch (&a, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f14 (int x)
> +{
> +  return __atomic_sub_fetch (&b, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f15 (short x)
> +{
> +  return __atomic_sub_fetch (&c, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f16 (char x)
> +{
> +  return __atomic_sub_fetch (&d, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f17 (long x)
> +{
> +  return __sync_sub_and_fetch (&a, x) == 0;
> +}
> +
> +int
> +f18 (int x)
> +{
> +  return __sync_sub_and_fetch (&b, x) == 0;
> +}
> +
> +int
> +f19 (short x)
> +{
> +  return __sync_sub_and_fetch (&c, x) == 0;
> +}
> +
> +int
> +f20 (char x)
> +{
> +  return __sync_sub_and_fetch (&d, x) == 0;
> +}
> +
> +int
> +f21 (long x)
> +{
> +  return __sync_sub_and_fetch (&a, x) != 0;
> +}
> +
> +int
> +f22 (int x)
> +{
> +  return __sync_sub_and_fetch (&b, x) != 0;
> +}
> +
> +int
> +f23 (short x)
> +{
> +  return __sync_sub_and_fetch (&c, x) != 0;
> +}
> +
> +int
> +f24 (char x)
> +{
> +  return __sync_sub_and_fetch (&d, x) != 0;
> +}
> +
> +int
> +f25 (long x)
> +{
> +  return __sync_sub_and_fetch (&a, x) < 0;
> +}
> +
> +int
> +f26 (int x)
> +{
> +  return __sync_sub_and_fetch (&b, x) < 0;
> +}
> +
> +int
> +f27 (short x)
> +{
> +  return __sync_sub_and_fetch (&c, x) < 0;
> +}
> +
> +int
> +f28 (char x)
> +{
> +  return __sync_sub_and_fetch (&d, x) < 0;
> +}
> +
> +int
> +f29 (long x)
> +{
> +  return __sync_sub_and_fetch (&a, x) >= 0;
> +}
> +
> +int
> +f30 (int x)
> +{
> +  return __sync_sub_and_fetch (&b, x) >= 0;
> +}
> +
> +int
> +f31 (short x)
> +{
> +  return __sync_sub_and_fetch (&c, x) >= 0;
> +}
> +
> +int
> +f32 (char x)
> +{
> +  return __sync_sub_and_fetch (&d, x) >= 0;
> +}
> --- gcc/testsuite/gcc.target/i386/pr98737-2.c.jj        2021-12-14 10:32:26.619360582 +0100
> +++ gcc/testsuite/gcc.target/i386/pr98737-2.c   2021-12-14 10:34:16.927782344 +0100
> @@ -0,0 +1,111 @@
> +/* PR target/98737 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -masm=att" } */
> +/* { dg-additional-options "-march=i686" { target ia32 } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\r]\*subq\t" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\r]\*subl\t" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\r]\*subw\t" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\r]\*subb\t" } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*xadd" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\r]\*cmpxchg" } } */
> +
> +long a;
> +int b;
> +short c;
> +char d;
> +
> +int
> +f1 (long x)
> +{
> +  return __atomic_sub_fetch (&a, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f2 (int x)
> +{
> +  return __atomic_sub_fetch (&b, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f3 (short x)
> +{
> +  return __atomic_sub_fetch (&c, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f4 (char x)
> +{
> +  return __atomic_sub_fetch (&d, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f5 (long x)
> +{
> +  return __atomic_sub_fetch (&a, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f6 (int x)
> +{
> +  return __atomic_sub_fetch (&b, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f7 (short x)
> +{
> +  return __atomic_sub_fetch (&c, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f8 (char x)
> +{
> +  return __atomic_sub_fetch (&d, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f9 (long x)
> +{
> +  return __sync_sub_and_fetch (&a, x) <= 0;
> +}
> +
> +int
> +f10 (int x)
> +{
> +  return __sync_sub_and_fetch (&b, x) <= 0;
> +}
> +
> +int
> +f11 (short x)
> +{
> +  return __sync_sub_and_fetch (&c, x) <= 0;
> +}
> +
> +int
> +f12 (char x)
> +{
> +  return __sync_sub_and_fetch (&d, x) <= 0;
> +}
> +
> +int
> +f13 (long x)
> +{
> +  return __sync_sub_and_fetch (&a, x) > 0;
> +}
> +
> +int
> +f14 (int x)
> +{
> +  return __sync_sub_and_fetch (&b, x) > 0;
> +}
> +
> +int
> +f15 (short x)
> +{
> +  return __sync_sub_and_fetch (&c, x) > 0;
> +}
> +
> +int
> +f16 (char x)
> +{
> +  return __sync_sub_and_fetch (&d, x) > 0;
> +}
> --- gcc/testsuite/gcc.target/i386/pr98737-3.c.jj        2021-12-14 10:34:31.544573270 +0100
> +++ gcc/testsuite/gcc.target/i386/pr98737-3.c   2021-12-14 10:34:46.086365265 +0100
> @@ -0,0 +1,207 @@
> +/* PR target/98737 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -masm=att" } */
> +/* { dg-additional-options "-march=i686" { target ia32 } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*addq\t" { target lp64 } } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*addl\t" } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*addw\t" } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*addb\t" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\r]\*xadd" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\r]\*cmpxchg" } } */
> +
> +long a;
> +int b;
> +short c;
> +char d;
> +
> +int
> +f1 (long x)
> +{
> +  return __atomic_add_fetch (&a, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f2 (int x)
> +{
> +  return __atomic_add_fetch (&b, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f3 (short x)
> +{
> +  return __atomic_add_fetch (&c, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f4 (char x)
> +{
> +  return __atomic_add_fetch (&d, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f5 (long x)
> +{
> +  return __atomic_add_fetch (&a, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f6 (int x)
> +{
> +  return __atomic_add_fetch (&b, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f7 (short x)
> +{
> +  return __atomic_add_fetch (&c, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f8 (char x)
> +{
> +  return __atomic_add_fetch (&d, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f9 (long x)
> +{
> +  return __atomic_add_fetch (&a, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f10 (int x)
> +{
> +  return __atomic_add_fetch (&b, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f11 (short x)
> +{
> +  return __atomic_add_fetch (&c, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f12 (char x)
> +{
> +  return __atomic_add_fetch (&d, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f13 (long x)
> +{
> +  return __atomic_add_fetch (&a, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f14 (int x)
> +{
> +  return __atomic_add_fetch (&b, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f15 (short x)
> +{
> +  return __atomic_add_fetch (&c, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f16 (char x)
> +{
> +  return __atomic_add_fetch (&d, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f17 (long x)
> +{
> +  return __sync_add_and_fetch (&a, x) == 0;
> +}
> +
> +int
> +f18 (int x)
> +{
> +  return __sync_add_and_fetch (&b, x) == 0;
> +}
> +
> +int
> +f19 (short x)
> +{
> +  return __sync_add_and_fetch (&c, x) == 0;
> +}
> +
> +int
> +f20 (char x)
> +{
> +  return __sync_add_and_fetch (&d, x) == 0;
> +}
> +
> +int
> +f21 (long x)
> +{
> +  return __sync_add_and_fetch (&a, x) != 0;
> +}
> +
> +int
> +f22 (int x)
> +{
> +  return __sync_add_and_fetch (&b, x) != 0;
> +}
> +
> +int
> +f23 (short x)
> +{
> +  return __sync_add_and_fetch (&c, x) != 0;
> +}
> +
> +int
> +f24 (char x)
> +{
> +  return __sync_add_and_fetch (&d, x) != 0;
> +}
> +
> +int
> +f25 (long x)
> +{
> +  return __sync_add_and_fetch (&a, x) < 0;
> +}
> +
> +int
> +f26 (int x)
> +{
> +  return __sync_add_and_fetch (&b, x) < 0;
> +}
> +
> +int
> +f27 (short x)
> +{
> +  return __sync_add_and_fetch (&c, x) < 0;
> +}
> +
> +int
> +f28 (char x)
> +{
> +  return __sync_add_and_fetch (&d, x) < 0;
> +}
> +
> +int
> +f29 (long x)
> +{
> +  return __sync_add_and_fetch (&a, x) >= 0;
> +}
> +
> +int
> +f30 (int x)
> +{
> +  return __sync_add_and_fetch (&b, x) >= 0;
> +}
> +
> +int
> +f31 (short x)
> +{
> +  return __sync_add_and_fetch (&c, x) >= 0;
> +}
> +
> +int
> +f32 (char x)
> +{
> +  return __sync_add_and_fetch (&d, x) >= 0;
> +}
> --- gcc/testsuite/gcc.target/i386/pr98737-4.c.jj        2021-12-14 10:34:55.005237694 +0100
> +++ gcc/testsuite/gcc.target/i386/pr98737-4.c   2021-12-14 10:36:54.492528580 +0100
> @@ -0,0 +1,111 @@
> +/* PR target/98737 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -masm=att" } */
> +/* { dg-additional-options "-march=i686" { target ia32 } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\rx]\*addq\t" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\rx]\*addl\t" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\rx]\*addw\t" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\rx]\*addb\t" } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*xadd" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\r]\*cmpxchg" } } */
> +
> +long a;
> +int b;
> +short c;
> +char d;
> +
> +int
> +f1 (long x)
> +{
> +  return __atomic_add_fetch (&a, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f2 (int x)
> +{
> +  return __atomic_add_fetch (&b, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f3 (short x)
> +{
> +  return __atomic_add_fetch (&c, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f4 (char x)
> +{
> +  return __atomic_add_fetch (&d, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f5 (long x)
> +{
> +  return __atomic_add_fetch (&a, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f6 (int x)
> +{
> +  return __atomic_add_fetch (&b, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f7 (short x)
> +{
> +  return __atomic_add_fetch (&c, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f8 (char x)
> +{
> +  return __atomic_add_fetch (&d, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f9 (long x)
> +{
> +  return __sync_add_and_fetch (&a, x) <= 0;
> +}
> +
> +int
> +f10 (int x)
> +{
> +  return __sync_add_and_fetch (&b, x) <= 0;
> +}
> +
> +int
> +f11 (short x)
> +{
> +  return __sync_add_and_fetch (&c, x) <= 0;
> +}
> +
> +int
> +f12 (char x)
> +{
> +  return __sync_add_and_fetch (&d, x) <= 0;
> +}
> +
> +int
> +f13 (long x)
> +{
> +  return __sync_add_and_fetch (&a, x) > 0;
> +}
> +
> +int
> +f14 (int x)
> +{
> +  return __sync_add_and_fetch (&b, x) > 0;
> +}
> +
> +int
> +f15 (short x)
> +{
> +  return __sync_add_and_fetch (&c, x) > 0;
> +}
> +
> +int
> +f16 (char x)
> +{
> +  return __sync_add_and_fetch (&d, x) > 0;
> +}
> --- gcc/testsuite/gcc.target/i386/pr98737-5.c.jj        2021-12-14 10:39:26.256357792 +0100
> +++ gcc/testsuite/gcc.target/i386/pr98737-5.c   2021-12-14 10:39:22.027418280 +0100
> @@ -0,0 +1,303 @@
> +/* PR target/98737 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -masm=att" } */
> +/* { dg-additional-options "-march=i686" { target ia32 } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*andq\t" { target lp64 } } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*andl\t" } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*andw\t" } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*andb\t" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\r]\*xadd" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\r]\*cmpxchg" } } */
> +
> +long a;
> +int b;
> +short c;
> +char d;
> +
> +int
> +f1 (long x)
> +{
> +  return __atomic_and_fetch (&a, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f2 (int x)
> +{
> +  return __atomic_and_fetch (&b, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f3 (short x)
> +{
> +  return __atomic_and_fetch (&c, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f4 (char x)
> +{
> +  return __atomic_and_fetch (&d, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f5 (long x)
> +{
> +  return __atomic_and_fetch (&a, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f6 (int x)
> +{
> +  return __atomic_and_fetch (&b, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f7 (short x)
> +{
> +  return __atomic_and_fetch (&c, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f8 (char x)
> +{
> +  return __atomic_and_fetch (&d, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f9 (long x)
> +{
> +  return __atomic_and_fetch (&a, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f10 (int x)
> +{
> +  return __atomic_and_fetch (&b, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f11 (short x)
> +{
> +  return __atomic_and_fetch (&c, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f12 (char x)
> +{
> +  return __atomic_and_fetch (&d, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f13 (long x)
> +{
> +  return __atomic_and_fetch (&a, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f14 (int x)
> +{
> +  return __atomic_and_fetch (&b, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f15 (short x)
> +{
> +  return __atomic_and_fetch (&c, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f16 (char x)
> +{
> +  return __atomic_and_fetch (&d, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f17 (long x)
> +{
> +  return __sync_and_and_fetch (&a, x) == 0;
> +}
> +
> +int
> +f18 (int x)
> +{
> +  return __sync_and_and_fetch (&b, x) == 0;
> +}
> +
> +int
> +f19 (short x)
> +{
> +  return __sync_and_and_fetch (&c, x) == 0;
> +}
> +
> +int
> +f20 (char x)
> +{
> +  return __sync_and_and_fetch (&d, x) == 0;
> +}
> +
> +int
> +f21 (long x)
> +{
> +  return __sync_and_and_fetch (&a, x) != 0;
> +}
> +
> +int
> +f22 (int x)
> +{
> +  return __sync_and_and_fetch (&b, x) != 0;
> +}
> +
> +int
> +f23 (short x)
> +{
> +  return __sync_and_and_fetch (&c, x) != 0;
> +}
> +
> +int
> +f24 (char x)
> +{
> +  return __sync_and_and_fetch (&d, x) != 0;
> +}
> +
> +int
> +f25 (long x)
> +{
> +  return __sync_and_and_fetch (&a, x) < 0;
> +}
> +
> +int
> +f26 (int x)
> +{
> +  return __sync_and_and_fetch (&b, x) < 0;
> +}
> +
> +int
> +f27 (short x)
> +{
> +  return __sync_and_and_fetch (&c, x) < 0;
> +}
> +
> +int
> +f28 (char x)
> +{
> +  return __sync_and_and_fetch (&d, x) < 0;
> +}
> +
> +int
> +f29 (long x)
> +{
> +  return __sync_and_and_fetch (&a, x) >= 0;
> +}
> +
> +int
> +f30 (int x)
> +{
> +  return __sync_and_and_fetch (&b, x) >= 0;
> +}
> +
> +int
> +f31 (short x)
> +{
> +  return __sync_and_and_fetch (&c, x) >= 0;
> +}
> +
> +int
> +f32 (char x)
> +{
> +  return __sync_and_and_fetch (&d, x) >= 0;
> +}
> +
> +int
> +f33 (long x)
> +{
> +  return __atomic_and_fetch (&a, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f34 (int x)
> +{
> +  return __atomic_and_fetch (&b, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f35 (short x)
> +{
> +  return __atomic_and_fetch (&c, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f36 (char x)
> +{
> +  return __atomic_and_fetch (&d, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f37 (long x)
> +{
> +  return __atomic_and_fetch (&a, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f38 (int x)
> +{
> +  return __atomic_and_fetch (&b, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f39 (short x)
> +{
> +  return __atomic_and_fetch (&c, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f40 (char x)
> +{
> +  return __atomic_and_fetch (&d, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f41 (long x)
> +{
> +  return __sync_and_and_fetch (&a, x) <= 0;
> +}
> +
> +int
> +f42 (int x)
> +{
> +  return __sync_and_and_fetch (&b, x) <= 0;
> +}
> +
> +int
> +f43 (short x)
> +{
> +  return __sync_and_and_fetch (&c, x) <= 0;
> +}
> +
> +int
> +f44 (char x)
> +{
> +  return __sync_and_and_fetch (&d, x) <= 0;
> +}
> +
> +int
> +f45 (long x)
> +{
> +  return __sync_and_and_fetch (&a, x) > 0;
> +}
> +
> +int
> +f46 (int x)
> +{
> +  return __sync_and_and_fetch (&b, x) > 0;
> +}
> +
> +int
> +f47 (short x)
> +{
> +  return __sync_and_and_fetch (&c, x) > 0;
> +}
> +
> +int
> +f48 (char x)
> +{
> +  return __sync_and_and_fetch (&d, x) > 0;
> +}
> --- gcc/testsuite/gcc.target/i386/pr98737-6.c.jj        2021-12-14 10:39:40.076160115 +0100
> +++ gcc/testsuite/gcc.target/i386/pr98737-6.c   2021-12-14 10:40:15.013660380 +0100
> @@ -0,0 +1,303 @@
> +/* PR target/98737 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -masm=att" } */
> +/* { dg-additional-options "-march=i686" { target ia32 } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*orq\t" { target lp64 } } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*orl\t" } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*orw\t" } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*orb\t" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\r]\*xadd" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\r]\*cmpxchg" } } */
> +
> +long a;
> +int b;
> +short c;
> +char d;
> +
> +int
> +f1 (long x)
> +{
> +  return __atomic_or_fetch (&a, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f2 (int x)
> +{
> +  return __atomic_or_fetch (&b, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f3 (short x)
> +{
> +  return __atomic_or_fetch (&c, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f4 (char x)
> +{
> +  return __atomic_or_fetch (&d, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f5 (long x)
> +{
> +  return __atomic_or_fetch (&a, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f6 (int x)
> +{
> +  return __atomic_or_fetch (&b, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f7 (short x)
> +{
> +  return __atomic_or_fetch (&c, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f8 (char x)
> +{
> +  return __atomic_or_fetch (&d, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f9 (long x)
> +{
> +  return __atomic_or_fetch (&a, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f10 (int x)
> +{
> +  return __atomic_or_fetch (&b, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f11 (short x)
> +{
> +  return __atomic_or_fetch (&c, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f12 (char x)
> +{
> +  return __atomic_or_fetch (&d, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f13 (long x)
> +{
> +  return __atomic_or_fetch (&a, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f14 (int x)
> +{
> +  return __atomic_or_fetch (&b, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f15 (short x)
> +{
> +  return __atomic_or_fetch (&c, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f16 (char x)
> +{
> +  return __atomic_or_fetch (&d, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f17 (long x)
> +{
> +  return __sync_or_and_fetch (&a, x) == 0;
> +}
> +
> +int
> +f18 (int x)
> +{
> +  return __sync_or_and_fetch (&b, x) == 0;
> +}
> +
> +int
> +f19 (short x)
> +{
> +  return __sync_or_and_fetch (&c, x) == 0;
> +}
> +
> +int
> +f20 (char x)
> +{
> +  return __sync_or_and_fetch (&d, x) == 0;
> +}
> +
> +int
> +f21 (long x)
> +{
> +  return __sync_or_and_fetch (&a, x) != 0;
> +}
> +
> +int
> +f22 (int x)
> +{
> +  return __sync_or_and_fetch (&b, x) != 0;
> +}
> +
> +int
> +f23 (short x)
> +{
> +  return __sync_or_and_fetch (&c, x) != 0;
> +}
> +
> +int
> +f24 (char x)
> +{
> +  return __sync_or_and_fetch (&d, x) != 0;
> +}
> +
> +int
> +f25 (long x)
> +{
> +  return __sync_or_and_fetch (&a, x) < 0;
> +}
> +
> +int
> +f26 (int x)
> +{
> +  return __sync_or_and_fetch (&b, x) < 0;
> +}
> +
> +int
> +f27 (short x)
> +{
> +  return __sync_or_and_fetch (&c, x) < 0;
> +}
> +
> +int
> +f28 (char x)
> +{
> +  return __sync_or_and_fetch (&d, x) < 0;
> +}
> +
> +int
> +f29 (long x)
> +{
> +  return __sync_or_and_fetch (&a, x) >= 0;
> +}
> +
> +int
> +f30 (int x)
> +{
> +  return __sync_or_and_fetch (&b, x) >= 0;
> +}
> +
> +int
> +f31 (short x)
> +{
> +  return __sync_or_and_fetch (&c, x) >= 0;
> +}
> +
> +int
> +f32 (char x)
> +{
> +  return __sync_or_and_fetch (&d, x) >= 0;
> +}
> +
> +int
> +f33 (long x)
> +{
> +  return __atomic_or_fetch (&a, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f34 (int x)
> +{
> +  return __atomic_or_fetch (&b, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f35 (short x)
> +{
> +  return __atomic_or_fetch (&c, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f36 (char x)
> +{
> +  return __atomic_or_fetch (&d, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f37 (long x)
> +{
> +  return __atomic_or_fetch (&a, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f38 (int x)
> +{
> +  return __atomic_or_fetch (&b, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f39 (short x)
> +{
> +  return __atomic_or_fetch (&c, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f40 (char x)
> +{
> +  return __atomic_or_fetch (&d, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f41 (long x)
> +{
> +  return __sync_or_and_fetch (&a, x) <= 0;
> +}
> +
> +int
> +f42 (int x)
> +{
> +  return __sync_or_and_fetch (&b, x) <= 0;
> +}
> +
> +int
> +f43 (short x)
> +{
> +  return __sync_or_and_fetch (&c, x) <= 0;
> +}
> +
> +int
> +f44 (char x)
> +{
> +  return __sync_or_and_fetch (&d, x) <= 0;
> +}
> +
> +int
> +f45 (long x)
> +{
> +  return __sync_or_and_fetch (&a, x) > 0;
> +}
> +
> +int
> +f46 (int x)
> +{
> +  return __sync_or_and_fetch (&b, x) > 0;
> +}
> +
> +int
> +f47 (short x)
> +{
> +  return __sync_or_and_fetch (&c, x) > 0;
> +}
> +
> +int
> +f48 (char x)
> +{
> +  return __sync_or_and_fetch (&d, x) > 0;
> +}
> --- gcc/testsuite/gcc.target/i386/pr98737-7.c.jj        2021-12-14 10:40:23.587537740 +0100
> +++ gcc/testsuite/gcc.target/i386/pr98737-7.c   2021-12-14 10:40:59.445024845 +0100
> @@ -0,0 +1,303 @@
> +/* PR target/98737 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -masm=att" } */
> +/* { dg-additional-options "-march=i686" { target ia32 } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*xorq\t" { target lp64 } } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*xorl\t" } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*xorw\t" } } */
> +/* { dg-final { scan-assembler "lock\[^\n\r]\*xorb\t" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\r]\*xadd" } } */
> +/* { dg-final { scan-assembler-not "lock\[^\n\r]\*cmpxchg" } } */
> +
> +long a;
> +int b;
> +short c;
> +char d;
> +
> +int
> +f1 (long x)
> +{
> +  return __atomic_xor_fetch (&a, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f2 (int x)
> +{
> +  return __atomic_xor_fetch (&b, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f3 (short x)
> +{
> +  return __atomic_xor_fetch (&c, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f4 (char x)
> +{
> +  return __atomic_xor_fetch (&d, x, __ATOMIC_RELEASE) == 0;
> +}
> +
> +int
> +f5 (long x)
> +{
> +  return __atomic_xor_fetch (&a, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f6 (int x)
> +{
> +  return __atomic_xor_fetch (&b, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f7 (short x)
> +{
> +  return __atomic_xor_fetch (&c, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f8 (char x)
> +{
> +  return __atomic_xor_fetch (&d, x, __ATOMIC_RELEASE) != 0;
> +}
> +
> +int
> +f9 (long x)
> +{
> +  return __atomic_xor_fetch (&a, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f10 (int x)
> +{
> +  return __atomic_xor_fetch (&b, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f11 (short x)
> +{
> +  return __atomic_xor_fetch (&c, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f12 (char x)
> +{
> +  return __atomic_xor_fetch (&d, x, __ATOMIC_RELEASE) < 0;
> +}
> +
> +int
> +f13 (long x)
> +{
> +  return __atomic_xor_fetch (&a, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f14 (int x)
> +{
> +  return __atomic_xor_fetch (&b, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f15 (short x)
> +{
> +  return __atomic_xor_fetch (&c, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f16 (char x)
> +{
> +  return __atomic_xor_fetch (&d, x, __ATOMIC_RELEASE) >= 0;
> +}
> +
> +int
> +f17 (long x)
> +{
> +  return __sync_xor_and_fetch (&a, x) == 0;
> +}
> +
> +int
> +f18 (int x)
> +{
> +  return __sync_xor_and_fetch (&b, x) == 0;
> +}
> +
> +int
> +f19 (short x)
> +{
> +  return __sync_xor_and_fetch (&c, x) == 0;
> +}
> +
> +int
> +f20 (char x)
> +{
> +  return __sync_xor_and_fetch (&d, x) == 0;
> +}
> +
> +int
> +f21 (long x)
> +{
> +  return __sync_xor_and_fetch (&a, x) != 0;
> +}
> +
> +int
> +f22 (int x)
> +{
> +  return __sync_xor_and_fetch (&b, x) != 0;
> +}
> +
> +int
> +f23 (short x)
> +{
> +  return __sync_xor_and_fetch (&c, x) != 0;
> +}
> +
> +int
> +f24 (char x)
> +{
> +  return __sync_xor_and_fetch (&d, x) != 0;
> +}
> +
> +int
> +f25 (long x)
> +{
> +  return __sync_xor_and_fetch (&a, x) < 0;
> +}
> +
> +int
> +f26 (int x)
> +{
> +  return __sync_xor_and_fetch (&b, x) < 0;
> +}
> +
> +int
> +f27 (short x)
> +{
> +  return __sync_xor_and_fetch (&c, x) < 0;
> +}
> +
> +int
> +f28 (char x)
> +{
> +  return __sync_xor_and_fetch (&d, x) < 0;
> +}
> +
> +int
> +f29 (long x)
> +{
> +  return __sync_xor_and_fetch (&a, x) >= 0;
> +}
> +
> +int
> +f30 (int x)
> +{
> +  return __sync_xor_and_fetch (&b, x) >= 0;
> +}
> +
> +int
> +f31 (short x)
> +{
> +  return __sync_xor_and_fetch (&c, x) >= 0;
> +}
> +
> +int
> +f32 (char x)
> +{
> +  return __sync_xor_and_fetch (&d, x) >= 0;
> +}
> +
> +int
> +f33 (long x)
> +{
> +  return __atomic_xor_fetch (&a, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f34 (int x)
> +{
> +  return __atomic_xor_fetch (&b, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f35 (short x)
> +{
> +  return __atomic_xor_fetch (&c, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f36 (char x)
> +{
> +  return __atomic_xor_fetch (&d, x, __ATOMIC_RELEASE) <= 0;
> +}
> +
> +int
> +f37 (long x)
> +{
> +  return __atomic_xor_fetch (&a, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f38 (int x)
> +{
> +  return __atomic_xor_fetch (&b, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f39 (short x)
> +{
> +  return __atomic_xor_fetch (&c, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f40 (char x)
> +{
> +  return __atomic_xor_fetch (&d, x, __ATOMIC_RELEASE) > 0;
> +}
> +
> +int
> +f41 (long x)
> +{
> +  return __sync_xor_and_fetch (&a, x) <= 0;
> +}
> +
> +int
> +f42 (int x)
> +{
> +  return __sync_xor_and_fetch (&b, x) <= 0;
> +}
> +
> +int
> +f43 (short x)
> +{
> +  return __sync_xor_and_fetch (&c, x) <= 0;
> +}
> +
> +int
> +f44 (char x)
> +{
> +  return __sync_xor_and_fetch (&d, x) <= 0;
> +}
> +
> +int
> +f45 (long x)
> +{
> +  return __sync_xor_and_fetch (&a, x) > 0;
> +}
> +
> +int
> +f46 (int x)
> +{
> +  return __sync_xor_and_fetch (&b, x) > 0;
> +}
> +
> +int
> +f47 (short x)
> +{
> +  return __sync_xor_and_fetch (&c, x) > 0;
> +}
> +
> +int
> +f48 (char x)
> +{
> +  return __sync_xor_and_fetch (&d, x) > 0;
> +}
>
>
>         Jakub
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-12-15 10:55 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-27  9:20 [PATCH] i386: Add peephole2 for __atomic_sub_fetch (x, y, z) == 0 [PR98737] Jakub Jelinek
2021-01-27 10:22 ` Uros Bizjak
2021-01-27 10:37   ` Jakub Jelinek
2021-01-27 11:27     ` Ulrich Drepper
2021-12-15  9:22       ` [PATCH] i386, fab: Optimize __atomic_{add,sub,and,or,xor}_fetch (x, y, z) {==,!=,<,<=,>,>=} " Jakub Jelinek
2021-12-15 10:54         ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).