public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] ifcvt: Fix up noce_convert_multiple_sets [PR106590]
@ 2022-08-15 10:47 Jakub Jelinek
  2022-08-15 11:29 ` Richard Biener
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2022-08-15 10:47 UTC (permalink / raw)
  To: Richard Biener, Jeff Law; +Cc: gcc-patches

Hi!

The following testcase is miscompiled on x86_64-linux.
The problem is in the noce_convert_multiple_sets optimization.
We essentially have:
if (g == 1)
  {
    g = 1;
    f = 23;
  }
else
  {
    g = 2;
    f = 20;
  }
and for each insn try to create a conditional move sequence.
There is code to detect overlap with the regs used in the condition
and the destinations, so we actually try to construct:
tmp_g = g == 1 ? 1 : 2;
f = g == 1 ? 23 : 20;
g = tmp_g;
which is fine.  But, we actually try to create two different
conditional move sequences in each case, seq1 with the whole
(eq (reg/v:HI 82 [ g ]) (const_int 1 [0x1]))
condition and seq2 with cc_cmp
(eq (reg:CCZ 17 flags) (const_int 0 [0]))
to rely on the earlier present comparison.  In each case, we
compare the rtx costs and choose the cheaper sequence (seq1 if both
have the same cost).
The problem is that with the skylake tuning,
tmp_g = g == 1 ? 1 : 2;
is actually expanded as
tmp_g = (g == 1) + 1;
in seq1 (which clobbers (reg 17 flags)) and as a cmov in seq2
(which doesn't).  The tuning says both have the same cost, so we
pick seq1.  Next we check sequences for
f = g == 1 ? 23 : 20; and here the seq2 cmov is cheaper, but it
uses (reg 17 flags) which has been clobbered earlier.

The following patch fixes that by detecting if we in the chosen
sequence clobber some register mentioned in cc_cmp or rev_cc_cmp,
and if yes, arranges for only seq1 (i.e. sequences that emit the
comparison itself) to be used after that.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-08-15  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/106590
	* ifcvt.cc (check_for_cc_cmp_clobbers): New function.
	(noce_convert_multiple_sets_1): If SEQ sets or clobbers any regs
	mentioned in cc_cmp or rev_cc_cmp, don't consider seq2 for any
	further conditional moves.

	* gcc.dg/torture/pr106590.c: New test.

--- gcc/ifcvt.cc.jj	2022-07-26 10:32:23.000000000 +0200
+++ gcc/ifcvt.cc	2022-08-12 16:31:45.348151269 +0200
@@ -3369,6 +3369,20 @@ noce_convert_multiple_sets (struct noce_
   return TRUE;
 }
 
+/* Helper function for noce_convert_multiple_sets_1.  If store to
+   DEST can affect P[0] or P[1], clear P[0].  Called via note_stores.  */
+
+static void
+check_for_cc_cmp_clobbers (rtx dest, const_rtx, void *p0)
+{
+  rtx *p = (rtx *) p0;
+  if (p[0] == NULL_RTX)
+    return;
+  if (reg_overlap_mentioned_p (dest, p[0])
+      || (p[1] && reg_overlap_mentioned_p (dest, p[1])))
+    p[0] = NULL_RTX;
+}
+
 /* This goes through all relevant insns of IF_INFO->then_bb and tries to
    create conditional moves.  In case a simple move sufficis the insn
    should be listed in NEED_NO_CMOV.  The rewired-src cases should be
@@ -3519,7 +3533,7 @@ noce_convert_multiple_sets_1 (struct noc
 
 	 as min/max and emit an insn, accordingly.  */
       unsigned cost1 = 0, cost2 = 0;
-      rtx_insn *seq, *seq1, *seq2;
+      rtx_insn *seq, *seq1, *seq2 = NULL;
       rtx temp_dest = NULL_RTX, temp_dest1 = NULL_RTX, temp_dest2 = NULL_RTX;
       bool read_comparison = false;
 
@@ -3531,9 +3545,10 @@ noce_convert_multiple_sets_1 (struct noc
 	 as well.  This allows the backend to emit a cmov directly without
 	 creating an additional compare for each.  If successful, costing
 	 is easier and this sequence is usually preferred.  */
-      seq2 = try_emit_cmove_seq (if_info, temp, cond,
-				 new_val, old_val, need_cmov,
-				 &cost2, &temp_dest2, cc_cmp, rev_cc_cmp);
+      if (cc_cmp)
+	seq2 = try_emit_cmove_seq (if_info, temp, cond,
+				   new_val, old_val, need_cmov,
+				   &cost2, &temp_dest2, cc_cmp, rev_cc_cmp);
 
       /* The backend might have created a sequence that uses the
 	 condition.  Check this.  */
@@ -3588,6 +3603,24 @@ noce_convert_multiple_sets_1 (struct noc
 	  return FALSE;
 	}
 
+      if (cc_cmp)
+	{
+	  /* Check if SEQ can clobber registers mentioned in
+	     cc_cmp and/or rev_cc_cmp.  If yes, we need to use
+	     only seq1 from that point on.  */
+	  rtx cc_cmp_pair[2] = { cc_cmp, rev_cc_cmp };
+	  for (walk = seq; walk; walk = NEXT_INSN (walk))
+	    {
+	      note_stores (walk, check_for_cc_cmp_clobbers, cc_cmp_pair);
+	      if (cc_cmp_pair[0] == NULL_RTX)
+		{
+		  cc_cmp = NULL_RTX;
+		  rev_cc_cmp = NULL_RTX;
+		  break;
+		}
+	    }
+	}
+
       /* End the sub sequence and emit to the main sequence.  */
       emit_insn (seq);
 
--- gcc/testsuite/gcc.dg/torture/pr106590.c.jj	2022-08-12 16:39:33.931965959 +0200
+++ gcc/testsuite/gcc.dg/torture/pr106590.c	2022-08-12 16:39:17.752179521 +0200
@@ -0,0 +1,75 @@
+/* PR rtl-optimization/106590 } */
+/* { dg-do run } */
+/* { dg-additional-options "-mtune=skylake" { target { i?86-*-* x86_64-*-* } } } */
+
+typedef struct A { short a; } A;
+typedef A *B;
+typedef struct C { int c, d; } C;
+typedef C *D;
+
+B
+foo (void)
+{
+  static A r = { .a = 1 };
+  return &r;
+}
+
+D
+bar (void)
+{
+  static C r = { .c = 1, .d = 23 };
+  return &r;
+}
+
+static inline int __attribute__((always_inline))
+baz (short a)
+{
+  int e = 1, f;
+  short g;
+  D h;
+
+  switch (a)
+    {
+    case 1:
+      f = 23;
+      g = 1;
+      break;
+    case 2:
+      f = 20;
+      g = 2;
+      break;
+    }
+
+  h = bar ();
+
+  if (h->d != f || h->c != g)
+    __builtin_abort ();
+  return e;
+}
+
+int
+qux (void)
+{
+  B i = foo ();
+  int e = 1;
+
+  switch (i->a)
+    {
+    case 1:
+    case 2:
+      e = baz (i->a);
+      break;
+    case 3:
+      e = 0;
+      break;
+    }
+
+  return e;
+}
+
+int
+main ()
+{
+  qux ();
+  return 0;
+}

	Jakub


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] ifcvt: Fix up noce_convert_multiple_sets [PR106590]
  2022-08-15 10:47 [PATCH] ifcvt: Fix up noce_convert_multiple_sets [PR106590] Jakub Jelinek
@ 2022-08-15 11:29 ` Richard Biener
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2022-08-15 11:29 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Jeff Law, gcc-patches

On Mon, 15 Aug 2022, Jakub Jelinek wrote:

> Hi!
> 
> The following testcase is miscompiled on x86_64-linux.
> The problem is in the noce_convert_multiple_sets optimization.
> We essentially have:
> if (g == 1)
>   {
>     g = 1;
>     f = 23;
>   }
> else
>   {
>     g = 2;
>     f = 20;
>   }
> and for each insn try to create a conditional move sequence.
> There is code to detect overlap with the regs used in the condition
> and the destinations, so we actually try to construct:
> tmp_g = g == 1 ? 1 : 2;
> f = g == 1 ? 23 : 20;
> g = tmp_g;
> which is fine.  But, we actually try to create two different
> conditional move sequences in each case, seq1 with the whole
> (eq (reg/v:HI 82 [ g ]) (const_int 1 [0x1]))
> condition and seq2 with cc_cmp
> (eq (reg:CCZ 17 flags) (const_int 0 [0]))
> to rely on the earlier present comparison.  In each case, we
> compare the rtx costs and choose the cheaper sequence (seq1 if both
> have the same cost).
> The problem is that with the skylake tuning,
> tmp_g = g == 1 ? 1 : 2;
> is actually expanded as
> tmp_g = (g == 1) + 1;
> in seq1 (which clobbers (reg 17 flags)) and as a cmov in seq2
> (which doesn't).  The tuning says both have the same cost, so we
> pick seq1.  Next we check sequences for
> f = g == 1 ? 23 : 20; and here the seq2 cmov is cheaper, but it
> uses (reg 17 flags) which has been clobbered earlier.
> 
> The following patch fixes that by detecting if we in the chosen
> sequence clobber some register mentioned in cc_cmp or rev_cc_cmp,
> and if yes, arranges for only seq1 (i.e. sequences that emit the
> comparison itself) to be used after that.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2022-08-15  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR rtl-optimization/106590
> 	* ifcvt.cc (check_for_cc_cmp_clobbers): New function.
> 	(noce_convert_multiple_sets_1): If SEQ sets or clobbers any regs
> 	mentioned in cc_cmp or rev_cc_cmp, don't consider seq2 for any
> 	further conditional moves.
> 
> 	* gcc.dg/torture/pr106590.c: New test.
> 
> --- gcc/ifcvt.cc.jj	2022-07-26 10:32:23.000000000 +0200
> +++ gcc/ifcvt.cc	2022-08-12 16:31:45.348151269 +0200
> @@ -3369,6 +3369,20 @@ noce_convert_multiple_sets (struct noce_
>    return TRUE;
>  }
>  
> +/* Helper function for noce_convert_multiple_sets_1.  If store to
> +   DEST can affect P[0] or P[1], clear P[0].  Called via note_stores.  */
> +
> +static void
> +check_for_cc_cmp_clobbers (rtx dest, const_rtx, void *p0)
> +{
> +  rtx *p = (rtx *) p0;
> +  if (p[0] == NULL_RTX)
> +    return;
> +  if (reg_overlap_mentioned_p (dest, p[0])
> +      || (p[1] && reg_overlap_mentioned_p (dest, p[1])))
> +    p[0] = NULL_RTX;
> +}
> +
>  /* This goes through all relevant insns of IF_INFO->then_bb and tries to
>     create conditional moves.  In case a simple move sufficis the insn
>     should be listed in NEED_NO_CMOV.  The rewired-src cases should be
> @@ -3519,7 +3533,7 @@ noce_convert_multiple_sets_1 (struct noc
>  
>  	 as min/max and emit an insn, accordingly.  */
>        unsigned cost1 = 0, cost2 = 0;
> -      rtx_insn *seq, *seq1, *seq2;
> +      rtx_insn *seq, *seq1, *seq2 = NULL;
>        rtx temp_dest = NULL_RTX, temp_dest1 = NULL_RTX, temp_dest2 = NULL_RTX;
>        bool read_comparison = false;
>  
> @@ -3531,9 +3545,10 @@ noce_convert_multiple_sets_1 (struct noc
>  	 as well.  This allows the backend to emit a cmov directly without
>  	 creating an additional compare for each.  If successful, costing
>  	 is easier and this sequence is usually preferred.  */
> -      seq2 = try_emit_cmove_seq (if_info, temp, cond,
> -				 new_val, old_val, need_cmov,
> -				 &cost2, &temp_dest2, cc_cmp, rev_cc_cmp);
> +      if (cc_cmp)
> +	seq2 = try_emit_cmove_seq (if_info, temp, cond,
> +				   new_val, old_val, need_cmov,
> +				   &cost2, &temp_dest2, cc_cmp, rev_cc_cmp);
>  
>        /* The backend might have created a sequence that uses the
>  	 condition.  Check this.  */
> @@ -3588,6 +3603,24 @@ noce_convert_multiple_sets_1 (struct noc
>  	  return FALSE;
>  	}
>  
> +      if (cc_cmp)
> +	{
> +	  /* Check if SEQ can clobber registers mentioned in
> +	     cc_cmp and/or rev_cc_cmp.  If yes, we need to use
> +	     only seq1 from that point on.  */
> +	  rtx cc_cmp_pair[2] = { cc_cmp, rev_cc_cmp };
> +	  for (walk = seq; walk; walk = NEXT_INSN (walk))
> +	    {
> +	      note_stores (walk, check_for_cc_cmp_clobbers, cc_cmp_pair);
> +	      if (cc_cmp_pair[0] == NULL_RTX)
> +		{
> +		  cc_cmp = NULL_RTX;
> +		  rev_cc_cmp = NULL_RTX;
> +		  break;
> +		}
> +	    }
> +	}
> +
>        /* End the sub sequence and emit to the main sequence.  */
>        emit_insn (seq);
>  
> --- gcc/testsuite/gcc.dg/torture/pr106590.c.jj	2022-08-12 16:39:33.931965959 +0200
> +++ gcc/testsuite/gcc.dg/torture/pr106590.c	2022-08-12 16:39:17.752179521 +0200
> @@ -0,0 +1,75 @@
> +/* PR rtl-optimization/106590 } */
> +/* { dg-do run } */
> +/* { dg-additional-options "-mtune=skylake" { target { i?86-*-* x86_64-*-* } } } */
> +
> +typedef struct A { short a; } A;
> +typedef A *B;
> +typedef struct C { int c, d; } C;
> +typedef C *D;
> +
> +B
> +foo (void)
> +{
> +  static A r = { .a = 1 };
> +  return &r;
> +}
> +
> +D
> +bar (void)
> +{
> +  static C r = { .c = 1, .d = 23 };
> +  return &r;
> +}
> +
> +static inline int __attribute__((always_inline))
> +baz (short a)
> +{
> +  int e = 1, f;
> +  short g;
> +  D h;
> +
> +  switch (a)
> +    {
> +    case 1:
> +      f = 23;
> +      g = 1;
> +      break;
> +    case 2:
> +      f = 20;
> +      g = 2;
> +      break;
> +    }
> +
> +  h = bar ();
> +
> +  if (h->d != f || h->c != g)
> +    __builtin_abort ();
> +  return e;
> +}
> +
> +int
> +qux (void)
> +{
> +  B i = foo ();
> +  int e = 1;
> +
> +  switch (i->a)
> +    {
> +    case 1:
> +    case 2:
> +      e = baz (i->a);
> +      break;
> +    case 3:
> +      e = 0;
> +      break;
> +    }
> +
> +  return e;
> +}
> +
> +int
> +main ()
> +{
> +  qux ();
> +  return 0;
> +}
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-08-15 11:29 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-15 10:47 [PATCH] ifcvt: Fix up noce_convert_multiple_sets [PR106590] Jakub Jelinek
2022-08-15 11:29 ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).