public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][RFC] tree-optimization/107389 - use __builtin_assume_alignment at -O0
@ 2022-11-07  9:02 Richard Biener
  2022-11-07  9:08 ` Jakub Jelinek
  2022-11-07  9:48 ` Eric Botcazou
  0 siblings, 2 replies; 5+ messages in thread
From: Richard Biener @ 2022-11-07  9:02 UTC (permalink / raw)
  To: gcc-patches; +Cc: Jakub Jelinek

The following adds a fold_builtins pass at -O0, keying off some
unwanted optimization and setting pointer alignment of the result
of __builtin_assume_alignment before removing the call.  This
allows libatomic calls to be elided at -O0 on s390 for

__uint128_t foo(__uint128_t *p)
{
  return __atomic_load_n((__uint128_t *)__builtin_assume_aligned (p, 16), 0);
}

not sure how to reliably test this though.

Thoughts?

Thanks,
Richard.

	PR tree-optimization/107389
	* tree-pass.h (pass_fold_builtins_O0): Declare.
	* passes.def (pass_fold_builtins_O0): Schedule.
	* tree-ssa-ccp.cc (pass_fold_builtins_O0): Define.
	(pass_fold_builtins): Template, add gate.
	(pass_fold_builtins::execute): Apply alignment to the
	target of __builtin_assume_aligned at -O0.  Disable most
	optimizations for the -O0 pass instance.
---
 gcc/passes.def      |  1 +
 gcc/tree-pass.h     |  1 +
 gcc/tree-ssa-ccp.cc | 52 +++++++++++++++++++++++++++++++++++++--------
 3 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/gcc/passes.def b/gcc/passes.def
index 193b5794749..4cc7c8acfcb 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -419,6 +419,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_lower_vaarg);
   NEXT_PASS (pass_lower_vector);
   NEXT_PASS (pass_lower_complex_O0);
+  NEXT_PASS (pass_fold_builtins_O0);
   NEXT_PASS (pass_sancov_O0);
   NEXT_PASS (pass_lower_switch_O0);
   NEXT_PASS (pass_asan_O0);
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 8480d41384b..43a4811128b 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -441,6 +441,7 @@ extern gimple_opt_pass *make_pass_warn_printf (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_warn_recursion (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_strlen (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_fold_builtins (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_fold_builtins_O0 (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_post_ipa_warn (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_stdarg (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_early_warn_uninitialized (gcc::context *ctxt);
diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc
index 9778e776cf2..adb1e351e15 100644
--- a/gcc/tree-ssa-ccp.cc
+++ b/gcc/tree-ssa-ccp.cc
@@ -4197,6 +4197,7 @@ const pass_data pass_data_fold_builtins =
   TODO_update_ssa, /* todo_flags_finish */
 };
 
+template <bool O0>
 class pass_fold_builtins : public gimple_opt_pass
 {
 public:
@@ -4204,14 +4205,17 @@ public:
     : gimple_opt_pass (pass_data_fold_builtins, ctxt)
   {}
 
+  bool gate (function *) final override { return O0 == !optimize; }
+
   /* opt_pass methods: */
   opt_pass * clone () final override { return new pass_fold_builtins (m_ctxt); }
   unsigned int execute (function *) final override;
 
 }; // class pass_fold_builtins
 
+template <bool O0>
 unsigned int
-pass_fold_builtins::execute (function *fun)
+pass_fold_builtins<O0>::execute (function *fun)
 {
   bool cfg_changed = false;
   basic_block bb;
@@ -4245,7 +4249,7 @@ pass_fold_builtins::execute (function *fun)
 		      continue;
 		    }
 		}
-	      else if (gimple_assign_load_p (stmt) && gimple_store_p (stmt))
+	      else if (!O0 && gimple_assign_load_p (stmt) && gimple_store_p (stmt))
 		optimize_memcpy (&i, gimple_assign_lhs (stmt),
 				 gimple_assign_rhs1 (stmt), NULL_TREE);
 	      gsi_next (&i);
@@ -4266,7 +4270,7 @@ pass_fold_builtins::execute (function *fun)
 	    }
 
 	  fcode = DECL_FUNCTION_CODE (callee);
-	  if (fold_stmt (&i))
+	  if (!O0 && fold_stmt (&i))
 	    ;
 	  else
 	    {
@@ -4281,7 +4285,29 @@ pass_fold_builtins::execute (function *fun)
 		  break;
 
 		case BUILT_IN_ASSUME_ALIGNED:
-		  /* Remove __builtin_assume_aligned.  */
+		  /* Remove __builtin_assume_aligned.  At -O0 preserve
+		     the alignment info in the result.  */
+		  if (O0)
+		    {
+		      tree align = gimple_call_arg (stmt, 1);
+		      tree misalign = (gimple_call_num_args (stmt) > 2
+				       ? gimple_call_arg (stmt, 2) : NULL_TREE);
+		      tree lhs = gimple_call_lhs (stmt);
+		      if (lhs
+			  && POINTER_TYPE_P (TREE_TYPE (lhs))
+			  && tree_fits_uhwi_p (align)
+			  && (!misalign || tree_fits_uhwi_p (misalign)))
+			{
+			  ptr_info_def *pi = get_ptr_info (lhs);
+			  unsigned aligni = TREE_INT_CST_LOW (align);
+			  unsigned misaligni
+			    = misalign ? TREE_INT_CST_LOW (misalign) : 0;
+			  if (aligni > 1
+			      && (aligni & (aligni - 1)) == 0
+			      && (misaligni & ~(aligni - 1)) == 0)
+			    set_ptr_info_alignment (pi, aligni, misaligni);
+			}
+		    }
 		  result = gimple_call_arg (stmt, 0);
 		  break;
 
@@ -4293,7 +4319,7 @@ pass_fold_builtins::execute (function *fun)
 		  continue;
 
 		case BUILT_IN_UNREACHABLE:
-		  if (optimize_unreachable (i))
+		  if (!O0 && optimize_unreachable (i))
 		    cfg_changed = true;
 		  break;
 
@@ -4454,7 +4480,8 @@ pass_fold_builtins::execute (function *fun)
 		  break;
 
 		case BUILT_IN_MEMCPY:
-		  if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)
+		  if (!O0
+		      && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)
 		      && TREE_CODE (gimple_call_arg (stmt, 0)) == ADDR_EXPR
 		      && TREE_CODE (gimple_call_arg (stmt, 1)) == ADDR_EXPR
 		      && TREE_CODE (gimple_call_arg (stmt, 2)) == INTEGER_CST)
@@ -4470,7 +4497,8 @@ pass_fold_builtins::execute (function *fun)
 		case BUILT_IN_VA_END:
 		case BUILT_IN_VA_COPY:
 		  /* These shouldn't be folded before pass_stdarg.  */
-		  result = optimize_stdarg_builtin (stmt);
+		  if (!O0)
+		    result = optimize_stdarg_builtin (stmt);
 		  break;
 
 		default:;
@@ -4534,7 +4562,13 @@ pass_fold_builtins::execute (function *fun)
 gimple_opt_pass *
 make_pass_fold_builtins (gcc::context *ctxt)
 {
-  return new pass_fold_builtins (ctxt);
+  return new pass_fold_builtins<false> (ctxt);
+}
+
+gimple_opt_pass *
+make_pass_fold_builtins_O0 (gcc::context *ctxt)
+{
+  return new pass_fold_builtins<true> (ctxt);
 }
 
 /* A simple pass that emits some warnings post IPA.  */
@@ -4566,7 +4600,7 @@ public:
   bool gate (function *) final override { return warn_nonnull != 0; }
   unsigned int execute (function *) final override;
 
-}; // class pass_fold_builtins
+}; // class pass_post_ipa_warn
 
 unsigned int
 pass_post_ipa_warn::execute (function *fun)
-- 
2.35.3

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH][RFC] tree-optimization/107389 - use __builtin_assume_alignment at -O0
  2022-11-07  9:02 [PATCH][RFC] tree-optimization/107389 - use __builtin_assume_alignment at -O0 Richard Biener
@ 2022-11-07  9:08 ` Jakub Jelinek
  2022-11-07 10:31   ` Richard Biener
  2022-11-07  9:48 ` Eric Botcazou
  1 sibling, 1 reply; 5+ messages in thread
From: Jakub Jelinek @ 2022-11-07  9:08 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

On Mon, Nov 07, 2022 at 10:02:11AM +0100, Richard Biener wrote:
> The following adds a fold_builtins pass at -O0, keying off some
> unwanted optimization and setting pointer alignment of the result
> of __builtin_assume_alignment before removing the call.  This
> allows libatomic calls to be elided at -O0 on s390 for
> 
> __uint128_t foo(__uint128_t *p)
> {
>   return __atomic_load_n((__uint128_t *)__builtin_assume_aligned (p, 16), 0);
> }
> 
> not sure how to reliably test this though.
> 
> Thoughts?

Do we really need a separate pass for it?
Can't we do it say during gimplification?

> 	PR tree-optimization/107389
> 	* tree-pass.h (pass_fold_builtins_O0): Declare.
> 	* passes.def (pass_fold_builtins_O0): Schedule.
> 	* tree-ssa-ccp.cc (pass_fold_builtins_O0): Define.
> 	(pass_fold_builtins): Template, add gate.
> 	(pass_fold_builtins::execute): Apply alignment to the
> 	target of __builtin_assume_aligned at -O0.  Disable most
> 	optimizations for the -O0 pass instance.

	Jakub


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH][RFC] tree-optimization/107389 - use __builtin_assume_alignment at -O0
  2022-11-07  9:02 [PATCH][RFC] tree-optimization/107389 - use __builtin_assume_alignment at -O0 Richard Biener
  2022-11-07  9:08 ` Jakub Jelinek
@ 2022-11-07  9:48 ` Eric Botcazou
  1 sibling, 0 replies; 5+ messages in thread
From: Eric Botcazou @ 2022-11-07  9:48 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches, Jakub Jelinek

> index 9778e776cf2..adb1e351e15 100644
> --- a/gcc/tree-ssa-ccp.cc
> +++ b/gcc/tree-ssa-ccp.cc
> @@ -4197,6 +4197,7 @@ const pass_data pass_data_fold_builtins =
>    TODO_update_ssa, /* todo_flags_finish */
>  };
> 
> +template <bool O0>
>  class pass_fold_builtins : public gimple_opt_pass
>  {
>  public:
> 
> @@ -4204,14 +4205,17 @@ public:
>      : gimple_opt_pass (pass_data_fold_builtins, ctxt)
> 
>    {}
> 
> +  bool gate (function *) final override { return O0 == !optimize; }

O0 as suffix is fine, but please avoid using it as a standalone identifier.

-- 
Eric Botcazou



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH][RFC] tree-optimization/107389 - use __builtin_assume_alignment at -O0
  2022-11-07  9:08 ` Jakub Jelinek
@ 2022-11-07 10:31   ` Richard Biener
  2022-11-07 10:35     ` Jakub Jelinek
  0 siblings, 1 reply; 5+ messages in thread
From: Richard Biener @ 2022-11-07 10:31 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, rth

On Mon, 7 Nov 2022, Jakub Jelinek wrote:

> On Mon, Nov 07, 2022 at 10:02:11AM +0100, Richard Biener wrote:
> > The following adds a fold_builtins pass at -O0, keying off some
> > unwanted optimization and setting pointer alignment of the result
> > of __builtin_assume_alignment before removing the call.  This
> > allows libatomic calls to be elided at -O0 on s390 for
> > 
> > __uint128_t foo(__uint128_t *p)
> > {
> >   return __atomic_load_n((__uint128_t *)__builtin_assume_aligned (p, 16), 0);
> > }
> > 
> > not sure how to reliably test this though.
> > 
> > Thoughts?
> 
> Do we really need a separate pass for it?
> Can't we do it say during gimplification?

gimplification would be too early for always inline - of course since
we don't do any copy propagation the source pattern this works reliably
are limited, mostly when used directly as arguments like in the example
above.

So yes, the specific case in question would work when we elide
__builtin_assume_aligned during gimplification at -O0 (or during
the GIMPLE lower pass).

Would you prefer that?  Richard, would that work for you?

Thanks,
Richard.

> > 	PR tree-optimization/107389
> > 	* tree-pass.h (pass_fold_builtins_O0): Declare.
> > 	* passes.def (pass_fold_builtins_O0): Schedule.
> > 	* tree-ssa-ccp.cc (pass_fold_builtins_O0): Define.
> > 	(pass_fold_builtins): Template, add gate.
> > 	(pass_fold_builtins::execute): Apply alignment to the
> > 	target of __builtin_assume_aligned at -O0.  Disable most
> > 	optimizations for the -O0 pass instance.
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH][RFC] tree-optimization/107389 - use __builtin_assume_alignment at -O0
  2022-11-07 10:31   ` Richard Biener
@ 2022-11-07 10:35     ` Jakub Jelinek
  0 siblings, 0 replies; 5+ messages in thread
From: Jakub Jelinek @ 2022-11-07 10:35 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches, rth

On Mon, Nov 07, 2022 at 10:31:21AM +0000, Richard Biener wrote:
> On Mon, 7 Nov 2022, Jakub Jelinek wrote:
> 
> > On Mon, Nov 07, 2022 at 10:02:11AM +0100, Richard Biener wrote:
> > > The following adds a fold_builtins pass at -O0, keying off some
> > > unwanted optimization and setting pointer alignment of the result
> > > of __builtin_assume_alignment before removing the call.  This
> > > allows libatomic calls to be elided at -O0 on s390 for
> > > 
> > > __uint128_t foo(__uint128_t *p)
> > > {
> > >   return __atomic_load_n((__uint128_t *)__builtin_assume_aligned (p, 16), 0);
> > > }
> > > 
> > > not sure how to reliably test this though.
> > > 
> > > Thoughts?
> > 
> > Do we really need a separate pass for it?
> > Can't we do it say during gimplification?
> 
> gimplification would be too early for always inline - of course since
> we don't do any copy propagation the source pattern this works reliably
> are limited, mostly when used directly as arguments like in the example
> above.

Yeah, that was exactly my thinking, because we don't copy propagate at -O0,
it will only handle cases where there is exactly one SSA_NAME involved.
The advantage of doing it at gimplification time is that we don't need to
add an extra pass for -O0.

> So yes, the specific case in question would work when we elide
> __builtin_assume_aligned during gimplification at -O0 (or during
> the GIMPLE lower pass).
> 
> Would you prefer that?  Richard, would that work for you?

	Jakub


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-11-07 10:36 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-07  9:02 [PATCH][RFC] tree-optimization/107389 - use __builtin_assume_alignment at -O0 Richard Biener
2022-11-07  9:08 ` Jakub Jelinek
2022-11-07 10:31   ` Richard Biener
2022-11-07 10:35     ` Jakub Jelinek
2022-11-07  9:48 ` Eric Botcazou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).