public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix PR88440, enable mem* detection at -O[2s]
@ 2019-05-22  9:40 Richard Biener
  2019-05-23 11:32 ` Richard Biener
  0 siblings, 1 reply; 5+ messages in thread
From: Richard Biener @ 2019-05-22  9:40 UTC (permalink / raw)
  To: gcc-patches


This enables -ftree-loop-distribute-patterns at -O[2s] and also
arranges cold loops to be still processed but for pattern
recognition to save code-size.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Martin has done extensive compile-time testing on SPEC
identifying only a single regression I'll have a look into.

Richard.

2019-05-22  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/88440
	* opts.c (default_options_table): Enable -ftree-loop-distribute-patterns
	at -O[2s]+.
	* tree-loop-distribution.c (generate_memset_builtin): Fold the
	generated call.
	(generate_memcpy_builtin): Likewise.
	(distribute_loop): Pass in whether to only distribute patterns.
	(prepare_perfect_loop_nest): Also allow size optimization.
	(pass_loop_distribution::execute): When optimizing a loop
	nest for size allow pattern replacement.

	* gcc.dg/tree-ssa/ldist-37.c: New testcase.
	* gcc.dg/tree-ssa/ldist-38.c: Likewise.

Index: gcc/opts.c
===================================================================
--- gcc/opts.c	(revision 271463)
+++ gcc/opts.c	(working copy)
@@ -550,7 +550,7 @@ static const struct default_options defa
     { OPT_LEVELS_3_PLUS, OPT_fpredictive_commoning, NULL, 1 },
     { OPT_LEVELS_3_PLUS, OPT_fsplit_loops, NULL, 1 },
     { OPT_LEVELS_3_PLUS, OPT_fsplit_paths, NULL, 1 },
-    { OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
+    { OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
     { OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribution, NULL, 1 },
     { OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 },
     { OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 },
Index: gcc/tree-loop-distribution.c
===================================================================
--- gcc/tree-loop-distribution.c	(revision 271463)
+++ gcc/tree-loop-distribution.c	(working copy)
@@ -115,6 +115,7 @@ along with GCC; see the file COPYING3.
 #include "params.h"
 #include "tree-vectorizer.h"
 #include "tree-eh.h"
+#include "gimple-fold.h"
 
 
 #define MAX_DATAREFS_NUM \
@@ -1028,6 +1029,7 @@ generate_memset_builtin (struct loop *lo
   fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
   fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
+  fold_stmt (&gsi);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
@@ -1071,6 +1073,7 @@ generate_memcpy_builtin (struct loop *lo
   fn = build_fold_addr_expr (builtin_decl_implicit (kind));
   fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
+  fold_stmt (&gsi);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
@@ -2769,7 +2772,8 @@ finalize_partitions (struct loop *loop,
 
 static int
 distribute_loop (struct loop *loop, vec<gimple *> stmts,
-		 control_dependences *cd, int *nb_calls, bool *destroy_p)
+		 control_dependences *cd, int *nb_calls, bool *destroy_p,
+		 bool only_patterns_p)
 {
   ddrs_table = new hash_table<ddr_hasher> (389);
   struct graph *rdg;
@@ -2843,7 +2847,7 @@ distribute_loop (struct loop *loop, vec<
 
   /* If we are only distributing patterns but did not detect any,
      simply bail out.  */
-  if (!flag_tree_loop_distribution
+  if (only_patterns_p
       && !any_builtin)
     {
       nbp = 0;
@@ -2855,7 +2859,7 @@ distribute_loop (struct loop *loop, vec<
      a loop into pieces, separated by builtin calls.  That is, we
      only want no or a single loop body remaining.  */
   struct partition *into;
-  if (!flag_tree_loop_distribution)
+  if (only_patterns_p)
     {
       for (i = 0; partitions.iterate (i, &into); ++i)
 	if (!partition_builtin_p (into))
@@ -3085,7 +3089,6 @@ prepare_perfect_loop_nest (struct loop *
 	 && loop_outer (outer)
 	 && outer->inner == loop && loop->next == NULL
 	 && single_exit (outer)
-	 && optimize_loop_for_speed_p (outer)
 	 && !chrec_contains_symbols_defined_in_loop (niters, outer->num)
 	 && (niters = number_of_latch_executions (outer)) != NULL_TREE
 	 && niters != chrec_dont_know)
@@ -3139,9 +3142,11 @@ pass_loop_distribution::execute (functio
      walking to innermost loops.  */
   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
     {
-      /* Don't distribute multiple exit edges loop, or cold loop.  */
+      /* Don't distribute multiple exit edges loop, or cold loop when
+         not doing pattern detection.  */
       if (!single_exit (loop)
-	  || !optimize_loop_for_speed_p (loop))
+	  || (!flag_tree_loop_distribute_patterns
+	      && !optimize_loop_for_speed_p (loop)))
 	continue;
 
       /* Don't distribute loop if niters is unknown.  */
@@ -3169,9 +3174,10 @@ pass_loop_distribution::execute (functio
 
 	  bool destroy_p;
 	  int nb_generated_loops, nb_generated_calls;
-	  nb_generated_loops = distribute_loop (loop, work_list, cd,
-						&nb_generated_calls,
-						&destroy_p);
+	  nb_generated_loops
+	    = distribute_loop (loop, work_list, cd, &nb_generated_calls,
+			       &destroy_p, (!optimize_loop_for_speed_p (loop)
+					    || !flag_tree_loop_distribution));
 	  if (destroy_p)
 	    loops_to_be_destroyed.safe_push (loop);
 
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-37.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-37.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/ldist-37.c	(working copy)
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -fdump-tree-ldist-optimized" } */
+
+void foo(char* restrict dst, const char* buf)
+{
+  for (int i=0; i<8; ++i)
+    *dst++ = *buf++;
+}
+
+/* { dg-final { scan-tree-dump "split to 0 loops and 1 library calls" "optimized" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-38.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-38.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/ldist-38.c	(working copy)
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ldist-optimized" } */
+
+void foo(char* restrict dst, const char* buf)
+{
+  for (int i=0; i<8; ++i)
+    *dst++ = *buf++;
+}
+
+/* { dg-final { scan-tree-dump "split to 0 loops and 1 library calls" "ldist" } } */

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-05-27 13:39 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-22  9:40 [PATCH] Fix PR88440, enable mem* detection at -O[2s] Richard Biener
2019-05-23 11:32 ` Richard Biener
2019-05-27  7:11   ` Christophe Lyon
2019-05-27  7:38     ` Richard Biener
2019-05-27 13:51       ` Christophe Lyon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).