public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] strlen: Return TODO_update_address_taken when memcmp has been optimized [PR96271]
@ 2021-01-15 18:32 Jakub Jelinek
  2021-01-15 19:48 ` Richard Biener
  0 siblings, 1 reply; 8+ messages in thread
From: Jakub Jelinek @ 2021-01-15 18:32 UTC (permalink / raw)
  To: Richard Biener, Jeff Law; +Cc: gcc-patches

Hi!

On the following testcase, handle_builtin_memcmp in the strlen pass folds
the memcmp into comparison of two MEM_REFs.  But nothing triggers updating
of addressable vars afterwards, so even when the parameters are no longer
address taken, we force the parameters to stack and back anyway.

The following patch fixes that.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-01-15  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/96271
	* tree-ssa-strlen.c (handle_builtin_memcmp): Add UPDATE_ADDRESS_TAKEN
	argument, set what it points to to true if optimizing memcmp into
	a simple MEM_REF comparison.
	(strlen_check_and_optimize_call): Add UPDATE_ADDRESS_TAKEN argument
	and pass it through to handle_builtin_memcmp.
	(check_and_optimize_stmt): Add UPDATE_ADDRESS_TAKEN argument
	and pass it through to strlen_check_and_optimize_call.
	(strlen_dom_walker): Replace m_cleanup_cfg with todo.
	(strlen_dom_walker::before_dom_children): Adjust for the above change,
	adjust check_and_optimize_stmt caller and or in into todo
	TODO_cleanup_cfg and/or TODO_update_address_taken.
	(printf_strlen_execute): Return todo instead of conditionally
	TODO_cleanup_cfg.

	* gcc.target/i386/pr96271.c: New test.

--- gcc/tree-ssa-strlen.c.jj	2021-01-04 10:25:40.000000000 +0100
+++ gcc/tree-ssa-strlen.c	2021-01-15 15:13:09.847839781 +0100
@@ -3813,7 +3813,7 @@ use_in_zero_equality (tree res, bool exc
    return true when call is transformed, return false otherwise.  */
 
 static bool
-handle_builtin_memcmp (gimple_stmt_iterator *gsi)
+handle_builtin_memcmp (gimple_stmt_iterator *gsi, bool *update_address_taken)
 {
   gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
   tree res = gimple_call_lhs (stmt);
@@ -3858,6 +3858,7 @@ handle_builtin_memcmp (gimple_stmt_itera
 						   boolean_type_node,
 						   arg1, arg2));
 	  gimplify_and_update_call_from_tree (gsi, res);
+	  *update_address_taken = true;
 	  return true;
 	}
     }
@@ -5110,7 +5111,7 @@ is_char_type (tree type)
 
 static bool
 strlen_check_and_optimize_call (gimple_stmt_iterator *gsi, bool *zero_write,
-				pointer_query &ptr_qry)
+				bool *update_address_taken, pointer_query &ptr_qry)
 {
   gimple *stmt = gsi_stmt (*gsi);
 
@@ -5179,7 +5180,7 @@ strlen_check_and_optimize_call (gimple_s
 	return false;
       break;
     case BUILT_IN_MEMCMP:
-      if (handle_builtin_memcmp (gsi))
+      if (handle_builtin_memcmp (gsi, update_address_taken))
 	return false;
       break;
     case BUILT_IN_STRCMP:
@@ -5341,12 +5342,13 @@ handle_integral_assign (gimple_stmt_iter
 /* Attempt to check for validity of the performed access a single statement
    at *GSI using string length knowledge, and to optimize it.
    If the given basic block needs clean-up of EH, CLEANUP_EH is set to
-   true.  Return true to let the caller advance *GSI to the next statement
-   in the basic block and false otherwise.  */
+   true.  If it is to update addressables at the end of the pass, set
+   *UPDATE_ADDRESS_TAKEN to true.  Return true to let the caller advance *GSI
+   to the next statement in the basic block and false otherwise.  */
 
 static bool
 check_and_optimize_stmt (gimple_stmt_iterator *gsi, bool *cleanup_eh,
-			 pointer_query &ptr_qry)
+			 bool *update_address_taken, pointer_query &ptr_qry)
 {
   gimple *stmt = gsi_stmt (*gsi);
 
@@ -5356,7 +5358,8 @@ check_and_optimize_stmt (gimple_stmt_ite
 
   if (is_gimple_call (stmt))
     {
-      if (!strlen_check_and_optimize_call (gsi, &zero_write, ptr_qry))
+      if (!strlen_check_and_optimize_call (gsi, &zero_write,
+					   update_address_taken, ptr_qry))
 	return false;
     }
   else if (!flag_optimize_strlen || !strlen_optimize)
@@ -5488,7 +5491,7 @@ public:
     evrp (false),
     ptr_qry (&evrp, &var_cache),
     var_cache (),
-    m_cleanup_cfg (false)
+    todo (0)
   { }
 
   virtual edge before_dom_children (basic_block);
@@ -5503,9 +5506,8 @@ public:
   pointer_query ptr_qry;
   pointer_query::cache_type var_cache;
 
-  /* Flag that will trigger TODO_cleanup_cfg to be returned in strlen
-     execute function.  */
-  bool m_cleanup_cfg;
+  /* TODO_* flags for the pass.  */
+  int todo;
 };
 
 /* Callback for walk_dominator_tree.  Attempt to optimize various
@@ -5586,6 +5588,7 @@ strlen_dom_walker::before_dom_children (
     }
 
   bool cleanup_eh = false;
+  bool update_address_taken = false;
 
   /* Attempt to optimize individual statements.  */
   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
@@ -5599,12 +5602,15 @@ strlen_dom_walker::before_dom_children (
       /* Reset search depth preformance counter.  */
       ptr_qry.depth = 0;
 
-      if (check_and_optimize_stmt (&gsi, &cleanup_eh, ptr_qry))
+      if (check_and_optimize_stmt (&gsi, &cleanup_eh, &update_address_taken,
+				   ptr_qry))
 	gsi_next (&gsi);
     }
 
   if (cleanup_eh && gimple_purge_dead_eh_edges (bb))
-      m_cleanup_cfg = true;
+    todo |= TODO_cleanup_cfg;
+  if (update_address_taken)
+    todo |= TODO_update_address_taken;
 
   bb->aux = stridx_to_strinfo;
   if (vec_safe_length (stridx_to_strinfo) && !strinfo_shared ())
@@ -5715,7 +5721,7 @@ printf_strlen_execute (function *fun, bo
       loop_optimizer_finalize ();
     }
 
-  return walker.m_cleanup_cfg ? TODO_cleanup_cfg : 0;
+  return walker.todo;
 }
 
 /* This file defines two passes: one for warnings that runs only when
--- gcc/testsuite/gcc.target/i386/pr96271.c.jj	2021-01-15 15:17:42.001780947 +0100
+++ gcc/testsuite/gcc.target/i386/pr96271.c	2021-01-15 15:17:25.447967002 +0100
@@ -0,0 +1,11 @@
+/* PR tree-optimization/96271 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=intel -msse2 -masm=att" } */
+/* { dg-final { scan-assembler "movq\t%xmm0, %r" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "movq\t%xmm1, %r" { target { ! ia32 } } } } */
+
+int
+foo (double a, double b)
+{
+  return __builtin_memcmp (&a, &b, sizeof (double)) == 0;
+}

	Jakub


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] strlen: Return TODO_update_address_taken when memcmp has been optimized [PR96271]
  2021-01-15 18:32 [PATCH] strlen: Return TODO_update_address_taken when memcmp has been optimized [PR96271] Jakub Jelinek
@ 2021-01-15 19:48 ` Richard Biener
  2021-01-15 19:57   ` Jakub Jelinek
  0 siblings, 1 reply; 8+ messages in thread
From: Richard Biener @ 2021-01-15 19:48 UTC (permalink / raw)
  To: Jakub Jelinek, Jeff Law; +Cc: gcc-patches

On January 15, 2021 7:32:35 PM GMT+01:00, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>On the following testcase, handle_builtin_memcmp in the strlen pass
>folds
>the memcmp into comparison of two MEM_REFs.  But nothing triggers
>updating
>of addressable vars afterwards, so even when the parameters are no
>longer
>address taken, we force the parameters to stack and back anyway.
>
>The following patch fixes that.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Hmm, we're currently doing it unconditionally at strategic points in the pass pipeline. Is there no such point after the pass?

>2021-01-15  Jakub Jelinek  <jakub@redhat.com>
>
>	PR tree-optimization/96271
>	* tree-ssa-strlen.c (handle_builtin_memcmp): Add UPDATE_ADDRESS_TAKEN
>	argument, set what it points to to true if optimizing memcmp into
>	a simple MEM_REF comparison.
>	(strlen_check_and_optimize_call): Add UPDATE_ADDRESS_TAKEN argument
>	and pass it through to handle_builtin_memcmp.
>	(check_and_optimize_stmt): Add UPDATE_ADDRESS_TAKEN argument
>	and pass it through to strlen_check_and_optimize_call.
>	(strlen_dom_walker): Replace m_cleanup_cfg with todo.
>	(strlen_dom_walker::before_dom_children): Adjust for the above change,
>	adjust check_and_optimize_stmt caller and or in into todo
>	TODO_cleanup_cfg and/or TODO_update_address_taken.
>	(printf_strlen_execute): Return todo instead of conditionally
>	TODO_cleanup_cfg.
>
>	* gcc.target/i386/pr96271.c: New test.
>
>--- gcc/tree-ssa-strlen.c.jj	2021-01-04 10:25:40.000000000 +0100
>+++ gcc/tree-ssa-strlen.c	2021-01-15 15:13:09.847839781 +0100
>@@ -3813,7 +3813,7 @@ use_in_zero_equality (tree res, bool exc
>    return true when call is transformed, return false otherwise.  */
> 
> static bool
>-handle_builtin_memcmp (gimple_stmt_iterator *gsi)
>+handle_builtin_memcmp (gimple_stmt_iterator *gsi, bool
>*update_address_taken)
> {
>   gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
>   tree res = gimple_call_lhs (stmt);
>@@ -3858,6 +3858,7 @@ handle_builtin_memcmp (gimple_stmt_itera
> 						   boolean_type_node,
> 						   arg1, arg2));
> 	  gimplify_and_update_call_from_tree (gsi, res);
>+	  *update_address_taken = true;
> 	  return true;
> 	}
>     }
>@@ -5110,7 +5111,7 @@ is_char_type (tree type)
> 
> static bool
>strlen_check_and_optimize_call (gimple_stmt_iterator *gsi, bool
>*zero_write,
>-				pointer_query &ptr_qry)
>+				bool *update_address_taken, pointer_query &ptr_qry)
> {
>   gimple *stmt = gsi_stmt (*gsi);
> 
>@@ -5179,7 +5180,7 @@ strlen_check_and_optimize_call (gimple_s
> 	return false;
>       break;
>     case BUILT_IN_MEMCMP:
>-      if (handle_builtin_memcmp (gsi))
>+      if (handle_builtin_memcmp (gsi, update_address_taken))
> 	return false;
>       break;
>     case BUILT_IN_STRCMP:
>@@ -5341,12 +5342,13 @@ handle_integral_assign (gimple_stmt_iter
>/* Attempt to check for validity of the performed access a single
>statement
>    at *GSI using string length knowledge, and to optimize it.
>    If the given basic block needs clean-up of EH, CLEANUP_EH is set to
>-   true.  Return true to let the caller advance *GSI to the next
>statement
>-   in the basic block and false otherwise.  */
>+   true.  If it is to update addressables at the end of the pass, set
>+   *UPDATE_ADDRESS_TAKEN to true.  Return true to let the caller
>advance *GSI
>+   to the next statement in the basic block and false otherwise.  */
> 
> static bool
> check_and_optimize_stmt (gimple_stmt_iterator *gsi, bool *cleanup_eh,
>-			 pointer_query &ptr_qry)
>+			 bool *update_address_taken, pointer_query &ptr_qry)
> {
>   gimple *stmt = gsi_stmt (*gsi);
> 
>@@ -5356,7 +5358,8 @@ check_and_optimize_stmt (gimple_stmt_ite
> 
>   if (is_gimple_call (stmt))
>     {
>-      if (!strlen_check_and_optimize_call (gsi, &zero_write, ptr_qry))
>+      if (!strlen_check_and_optimize_call (gsi, &zero_write,
>+					   update_address_taken, ptr_qry))
> 	return false;
>     }
>   else if (!flag_optimize_strlen || !strlen_optimize)
>@@ -5488,7 +5491,7 @@ public:
>     evrp (false),
>     ptr_qry (&evrp, &var_cache),
>     var_cache (),
>-    m_cleanup_cfg (false)
>+    todo (0)
>   { }
> 
>   virtual edge before_dom_children (basic_block);
>@@ -5503,9 +5506,8 @@ public:
>   pointer_query ptr_qry;
>   pointer_query::cache_type var_cache;
> 
>-  /* Flag that will trigger TODO_cleanup_cfg to be returned in strlen
>-     execute function.  */
>-  bool m_cleanup_cfg;
>+  /* TODO_* flags for the pass.  */
>+  int todo;
> };
> 
> /* Callback for walk_dominator_tree.  Attempt to optimize various
>@@ -5586,6 +5588,7 @@ strlen_dom_walker::before_dom_children (
>     }
> 
>   bool cleanup_eh = false;
>+  bool update_address_taken = false;
> 
>   /* Attempt to optimize individual statements.  */
> for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
>@@ -5599,12 +5602,15 @@ strlen_dom_walker::before_dom_children (
>       /* Reset search depth preformance counter.  */
>       ptr_qry.depth = 0;
> 
>-      if (check_and_optimize_stmt (&gsi, &cleanup_eh, ptr_qry))
>+      if (check_and_optimize_stmt (&gsi, &cleanup_eh,
>&update_address_taken,
>+				   ptr_qry))
> 	gsi_next (&gsi);
>     }
> 
>   if (cleanup_eh && gimple_purge_dead_eh_edges (bb))
>-      m_cleanup_cfg = true;
>+    todo |= TODO_cleanup_cfg;
>+  if (update_address_taken)
>+    todo |= TODO_update_address_taken;
> 
>   bb->aux = stridx_to_strinfo;
>   if (vec_safe_length (stridx_to_strinfo) && !strinfo_shared ())
>@@ -5715,7 +5721,7 @@ printf_strlen_execute (function *fun, bo
>       loop_optimizer_finalize ();
>     }
> 
>-  return walker.m_cleanup_cfg ? TODO_cleanup_cfg : 0;
>+  return walker.todo;
> }
> 
> /* This file defines two passes: one for warnings that runs only when
>--- gcc/testsuite/gcc.target/i386/pr96271.c.jj	2021-01-15
>15:17:42.001780947 +0100
>+++ gcc/testsuite/gcc.target/i386/pr96271.c	2021-01-15
>15:17:25.447967002 +0100
>@@ -0,0 +1,11 @@
>+/* PR tree-optimization/96271 */
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -mtune=intel -msse2 -masm=att" } */
>+/* { dg-final { scan-assembler "movq\t%xmm0, %r" { target { ! ia32 } }
>} } */
>+/* { dg-final { scan-assembler "movq\t%xmm1, %r" { target { ! ia32 } }
>} } */
>+
>+int
>+foo (double a, double b)
>+{
>+  return __builtin_memcmp (&a, &b, sizeof (double)) == 0;
>+}
>
>	Jakub


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] strlen: Return TODO_update_address_taken when memcmp has been optimized [PR96271]
  2021-01-15 19:48 ` Richard Biener
@ 2021-01-15 19:57   ` Jakub Jelinek
  2021-01-15 20:16     ` Richard Biener
  0 siblings, 1 reply; 8+ messages in thread
From: Jakub Jelinek @ 2021-01-15 19:57 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jeff Law, gcc-patches

On Fri, Jan 15, 2021 at 08:48:32PM +0100, Richard Biener wrote:
> On January 15, 2021 7:32:35 PM GMT+01:00, Jakub Jelinek <jakub@redhat.com> wrote:
> >Hi!
> >
> >On the following testcase, handle_builtin_memcmp in the strlen pass
> >folds
> >the memcmp into comparison of two MEM_REFs.  But nothing triggers
> >updating
> >of addressable vars afterwards, so even when the parameters are no
> >longer
> >address taken, we force the parameters to stack and back anyway.
> >
> >The following patch fixes that.
> >
> >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> Hmm, we're currently doing it unconditionally at strategic points in the pass pipeline. Is there no such point after the pass?

Apparently not.  The passes after strlen1 are:
pr96271.c.191t.thread4
pr96271.c.192t.vrp2
pr96271.c.193t.copyprop5
pr96271.c.194t.wrestrict
pr96271.c.195t.dse4
pr96271.c.196t.cddce3
pr96271.c.197t.forwprop4
pr96271.c.198t.phiopt4
pr96271.c.199t.fab1
pr96271.c.200t.widening_mul
pr96271.c.201t.store-merging
pr96271.c.202t.tailc
pr96271.c.203t.dce7
pr96271.c.204t.crited1
pr96271.c.206t.uncprop1
pr96271.c.207t.local-pure-const2
pr96271.c.208t.modref2
pr96271.c.242t.nrv
pr96271.c.243t.isel
pr96271.c.244t.optimized
and TODO_update_address_taken is used by the inliner, sra, ccp, loop and
sccvn, so maybe in fre5 in 187.

	Jakub


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] strlen: Return TODO_update_address_taken when memcmp has been optimized [PR96271]
  2021-01-15 19:57   ` Jakub Jelinek
@ 2021-01-15 20:16     ` Richard Biener
  2021-01-15 20:31       ` Jakub Jelinek
  0 siblings, 1 reply; 8+ messages in thread
From: Richard Biener @ 2021-01-15 20:16 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Jeff Law, gcc-patches

On January 15, 2021 8:57:39 PM GMT+01:00, Jakub Jelinek <jakub@redhat.com> wrote:
>On Fri, Jan 15, 2021 at 08:48:32PM +0100, Richard Biener wrote:
>> On January 15, 2021 7:32:35 PM GMT+01:00, Jakub Jelinek
><jakub@redhat.com> wrote:
>> >Hi!
>> >
>> >On the following testcase, handle_builtin_memcmp in the strlen pass
>> >folds
>> >the memcmp into comparison of two MEM_REFs.  But nothing triggers
>> >updating
>> >of addressable vars afterwards, so even when the parameters are no
>> >longer
>> >address taken, we force the parameters to stack and back anyway.
>> >
>> >The following patch fixes that.
>> >
>> >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>> 
>> Hmm, we're currently doing it unconditionally at strategic points in
>the pass pipeline. Is there no such point after the pass?
>
>Apparently not.  The passes after strlen1 are:
>pr96271.c.191t.thread4
>pr96271.c.192t.vrp2
>pr96271.c.193t.copyprop5
>pr96271.c.194t.wrestrict
>pr96271.c.195t.dse4
>pr96271.c.196t.cddce3
>pr96271.c.197t.forwprop4
>pr96271.c.198t.phiopt4
>pr96271.c.199t.fab1
>pr96271.c.200t.widening_mul
>pr96271.c.201t.store-merging
>pr96271.c.202t.tailc
>pr96271.c.203t.dce7
>pr96271.c.204t.crited1
>pr96271.c.206t.uncprop1
>pr96271.c.207t.local-pure-const2
>pr96271.c.208t.modref2
>pr96271.c.242t.nrv
>pr96271.c.243t.isel
>pr96271.c.244t.optimized
>and TODO_update_address_taken is used by the inliner, sra, ccp, loop
>and
>sccvn, so maybe in fre5 in 187.

OK, I think it makes sense to delay until before forwprop4 so can you instead arrange for an unconditional run there? 

Thanks, 
Richard. 

>	Jakub


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] strlen: Return TODO_update_address_taken when memcmp has been optimized [PR96271]
  2021-01-15 20:16     ` Richard Biener
@ 2021-01-15 20:31       ` Jakub Jelinek
  2021-01-15 21:02         ` Jakub Jelinek
  0 siblings, 1 reply; 8+ messages in thread
From: Jakub Jelinek @ 2021-01-15 20:31 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jeff Law, gcc-patches

On Fri, Jan 15, 2021 at 09:16:31PM +0100, Richard Biener wrote:
> >Apparently not.  The passes after strlen1 are:
> >pr96271.c.191t.thread4
> >pr96271.c.192t.vrp2
> >pr96271.c.193t.copyprop5
> >pr96271.c.194t.wrestrict
> >pr96271.c.195t.dse4
> >pr96271.c.196t.cddce3
> >pr96271.c.197t.forwprop4
> >pr96271.c.198t.phiopt4
> >pr96271.c.199t.fab1
> >pr96271.c.200t.widening_mul
> >pr96271.c.201t.store-merging
> >pr96271.c.202t.tailc
> >pr96271.c.203t.dce7
> >pr96271.c.204t.crited1
> >pr96271.c.206t.uncprop1
> >pr96271.c.207t.local-pure-const2
> >pr96271.c.208t.modref2
> >pr96271.c.242t.nrv
> >pr96271.c.243t.isel
> >pr96271.c.244t.optimized
> >and TODO_update_address_taken is used by the inliner, sra, ccp, loop
> >and
> >sccvn, so maybe in fre5 in 187.
> 
> OK, I think it makes sense to delay until before forwprop4 so can you instead arrange for an unconditional run there? 

At the end of forwprop4 or start (i.e. end of previous pass)?

	Jakub


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] strlen: Return TODO_update_address_taken when memcmp has been optimized [PR96271]
  2021-01-15 20:31       ` Jakub Jelinek
@ 2021-01-15 21:02         ` Jakub Jelinek
  2021-01-16  1:00           ` Jakub Jelinek
  0 siblings, 1 reply; 8+ messages in thread
From: Jakub Jelinek @ 2021-01-15 21:02 UTC (permalink / raw)
  To: Richard Biener, gcc-patches

On Fri, Jan 15, 2021 at 09:31:10PM +0100, Jakub Jelinek via Gcc-patches wrote:
> On Fri, Jan 15, 2021 at 09:16:31PM +0100, Richard Biener wrote:
> > >Apparently not.  The passes after strlen1 are:
> > >pr96271.c.191t.thread4
> > >pr96271.c.192t.vrp2
> > >pr96271.c.193t.copyprop5
> > >pr96271.c.194t.wrestrict
> > >pr96271.c.195t.dse4
> > >pr96271.c.196t.cddce3
> > >pr96271.c.197t.forwprop4
> > >pr96271.c.198t.phiopt4
> > >pr96271.c.199t.fab1
> > >pr96271.c.200t.widening_mul
> > >pr96271.c.201t.store-merging
> > >pr96271.c.202t.tailc
> > >pr96271.c.203t.dce7
> > >pr96271.c.204t.crited1
> > >pr96271.c.206t.uncprop1
> > >pr96271.c.207t.local-pure-const2
> > >pr96271.c.208t.modref2
> > >pr96271.c.242t.nrv
> > >pr96271.c.243t.isel
> > >pr96271.c.244t.optimized
> > >and TODO_update_address_taken is used by the inliner, sra, ccp, loop
> > >and
> > >sccvn, so maybe in fre5 in 187.
> > 
> > OK, I think it makes sense to delay until before forwprop4 so can you instead arrange for an unconditional run there? 
> 
> At the end of forwprop4 or start (i.e. end of previous pass)?

Like this?

2021-01-15  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/96271
	* passes.def: Pass false argument to first two pass_cd_dce
	instances and true to last instance.  Add comment that
	last instance rewrites no longer addressed locals.
	(pass_cd_dce): Add update_address_taken_p member and initialize it.
	(pass_cd_dce::set_pass_param): New method.
	(pass_cd_dce::execute): Return TODO_update_address_taken from
	last cd_dce instance.

	* gcc.target/i386/pr96271.c: New test.

--- gcc/passes.def.jj	2021-01-04 10:25:38.826233904 +0100
+++ gcc/passes.def	2021-01-15 21:55:24.431629359 +0100
@@ -90,7 +90,7 @@ along with GCC; see the file COPYING3.
 	  NEXT_PASS (pass_early_vrp);
 	  NEXT_PASS (pass_merge_phi);
           NEXT_PASS (pass_dse);
-	  NEXT_PASS (pass_cd_dce);
+	  NEXT_PASS (pass_cd_dce, false /* update_address_taken_p */);
 	  NEXT_PASS (pass_phiopt, true /* early_p */);
 	  NEXT_PASS (pass_modref);
 	  NEXT_PASS (pass_tail_recursion);
@@ -272,7 +272,7 @@ along with GCC; see the file COPYING3.
 	  NEXT_PASS (pass_loop_jam);
 	  /* All unswitching, final value replacement and splitting can expose
 	     empty loops.  Remove them now.  */
-	  NEXT_PASS (pass_cd_dce);
+	  NEXT_PASS (pass_cd_dce, false /* update_address_taken_p */);
 	  NEXT_PASS (pass_iv_canon);
 	  NEXT_PASS (pass_loop_distribution);
 	  NEXT_PASS (pass_linterchange);
@@ -336,7 +336,9 @@ along with GCC; see the file COPYING3.
       NEXT_PASS (pass_copy_prop);
       NEXT_PASS (pass_warn_restrict);
       NEXT_PASS (pass_dse);
-      NEXT_PASS (pass_cd_dce);
+      NEXT_PASS (pass_cd_dce, true /* update_address_taken_p */);
+      /* After late CD DCE we rewrite no longer addressed locals into SSA
+	 form if possible.  */
       NEXT_PASS (pass_forwprop);
       NEXT_PASS (pass_phiopt, false /* early_p */);
       NEXT_PASS (pass_fold_builtins);
--- gcc/tree-ssa-dce.c.jj	2021-01-12 11:01:51.283385011 +0100
+++ gcc/tree-ssa-dce.c	2021-01-15 21:54:19.016368431 +0100
@@ -1787,14 +1787,25 @@ class pass_cd_dce : public gimple_opt_pa
 {
 public:
   pass_cd_dce (gcc::context *ctxt)
-    : gimple_opt_pass (pass_data_cd_dce, ctxt)
+    : gimple_opt_pass (pass_data_cd_dce, ctxt), update_address_taken_p (false)
   {}
 
   /* opt_pass methods: */
   opt_pass * clone () { return new pass_cd_dce (m_ctxt); }
+  void set_pass_param (unsigned n, bool param)
+    {
+      gcc_assert (n == 0);
+      update_address_taken_p = param;
+    }
   virtual bool gate (function *) { return flag_tree_dce != 0; }
-  virtual unsigned int execute (function *) { return tree_ssa_cd_dce (); }
+  virtual unsigned int execute (function *)
+    {
+      return (tree_ssa_cd_dce ()
+	      | (update_address_taken_p ? TODO_update_address_taken : 0));
+    }
 
+private:
+  bool update_address_taken_p;
 }; // class pass_cd_dce
 
 } // anon namespace
--- gcc/testsuite/gcc.target/i386/pr96271.c.jj	2021-01-15 21:56:03.067192848 +0100
+++ gcc/testsuite/gcc.target/i386/pr96271.c	2021-01-15 21:56:03.067192848 +0100
@@ -0,0 +1,11 @@
+/* PR tree-optimization/96271 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=intel -msse2 -masm=att" } */
+/* { dg-final { scan-assembler "movq\t%xmm0, %r" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "movq\t%xmm1, %r" { target { ! ia32 } } } } */
+
+int
+foo (double a, double b)
+{
+  return __builtin_memcmp (&a, &b, sizeof (double)) == 0;
+}


	Jakub


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] strlen: Return TODO_update_address_taken when memcmp has been optimized [PR96271]
  2021-01-15 21:02         ` Jakub Jelinek
@ 2021-01-16  1:00           ` Jakub Jelinek
  2021-01-16  7:52             ` Richard Biener
  0 siblings, 1 reply; 8+ messages in thread
From: Jakub Jelinek @ 2021-01-16  1:00 UTC (permalink / raw)
  To: Richard Biener, gcc-patches

On Fri, Jan 15, 2021 at 10:02:41PM +0100, Jakub Jelinek via Gcc-patches wrote:
> Like this?

Bootstrapped/regtested successfully on x86_64-linux and i686-linux.

> 2021-01-15  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/96271
> 	* passes.def: Pass false argument to first two pass_cd_dce
> 	instances and true to last instance.  Add comment that
> 	last instance rewrites no longer addressed locals.
> 	(pass_cd_dce): Add update_address_taken_p member and initialize it.
> 	(pass_cd_dce::set_pass_param): New method.
> 	(pass_cd_dce::execute): Return TODO_update_address_taken from
> 	last cd_dce instance.
> 
> 	* gcc.target/i386/pr96271.c: New test.

	Jakub


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] strlen: Return TODO_update_address_taken when memcmp has been optimized [PR96271]
  2021-01-16  1:00           ` Jakub Jelinek
@ 2021-01-16  7:52             ` Richard Biener
  0 siblings, 0 replies; 8+ messages in thread
From: Richard Biener @ 2021-01-16  7:52 UTC (permalink / raw)
  To: Jakub Jelinek, gcc-patches

On January 16, 2021 2:00:38 AM GMT+01:00, Jakub Jelinek <jakub@redhat.com> wrote:
>On Fri, Jan 15, 2021 at 10:02:41PM +0100, Jakub Jelinek via Gcc-patches
>wrote:
>> Like this?
>
>Bootstrapped/regtested successfully on x86_64-linux and i686-linux.

OK. 

Thanks, 
Richard. 

>> 2021-01-15  Jakub Jelinek  <jakub@redhat.com>
>> 
>> 	PR tree-optimization/96271
>> 	* passes.def: Pass false argument to first two pass_cd_dce
>> 	instances and true to last instance.  Add comment that
>> 	last instance rewrites no longer addressed locals.
>> 	(pass_cd_dce): Add update_address_taken_p member and initialize it.
>> 	(pass_cd_dce::set_pass_param): New method.
>> 	(pass_cd_dce::execute): Return TODO_update_address_taken from
>> 	last cd_dce instance.
>> 
>> 	* gcc.target/i386/pr96271.c: New test.
>
>	Jakub


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-01-16  7:52 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-15 18:32 [PATCH] strlen: Return TODO_update_address_taken when memcmp has been optimized [PR96271] Jakub Jelinek
2021-01-15 19:48 ` Richard Biener
2021-01-15 19:57   ` Jakub Jelinek
2021-01-15 20:16     ` Richard Biener
2021-01-15 20:31       ` Jakub Jelinek
2021-01-15 21:02         ` Jakub Jelinek
2021-01-16  1:00           ` Jakub Jelinek
2021-01-16  7:52             ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).