From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <rguenth@sourceware.org>
Received: by sourceware.org (Postfix, from userid 1666)
	id 2131E3858D32; Mon, 18 Sep 2023 08:56:45 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 2131E3858D32
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1695027405;
	bh=xWW0SpiKvKRRCcE9+UNwS93K1d93ChGky0L+TPaYGeQ=;
	h=From:To:Subject:Date:From;
	b=td0AcIgUuNuIHiAjq97vkbo6Sn0nRu/P5wNDK9ovDfk8QntYFJZGpa7hhNJFvFTQr
	 Sq3O1Q5Y3vf90AfLiYRVBxQgsEM8PduXelNO7gP3zJiJ+B3cCIqAoPylTXhgObcBd/
	 u41zNrMsxDcymKHOa378akNL3rM5WFUF5ljtU2k8=
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Type: text/plain; charset="utf-8"
From: Richard Biener <rguenth@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc r14-4089] tree-optimization/111294 - backwards threader PHI
 costing
X-Act-Checkin: gcc
X-Git-Author: Richard Biener <rguenther@suse.de>
X-Git-Refname: refs/heads/master
X-Git-Oldrev: 1f9bf6f372da48c75d42f2669ba92f3fd4370fda
X-Git-Newrev: d45ddc2c04e471d0dcee016b6edacc00b8341b16
Message-Id: <20230918085645.2131E3858D32@sourceware.org>
Date: Mon, 18 Sep 2023 08:56:45 +0000 (GMT)
List-Id: <gcc-cvs.sourceware.org>

https://gcc.gnu.org/g:d45ddc2c04e471d0dcee016b6edacc00b8341b16

commit r14-4089-gd45ddc2c04e471d0dcee016b6edacc00b8341b16
Author: Richard Biener <rguenther@suse.de>
Date:   Thu Sep 14 13:06:51 2023 +0200

    tree-optimization/111294 - backwards threader PHI costing
    
    This revives an earlier patch since the problematic code applying
    extra costs to PHIs in copied blocks we couldn't make any sense of
    prevents a required threading in this case.  Instead of coming up
    with an artificial other costing the following simply removes the
    bits.
    
    As with all threading changes this requires a plethora of testsuite
    adjustments, but only the last three are unfortunate as is the
    libgomp team.c adjustment which is required to avoid a bogus -Werror
    diagnostic during bootstrap.
    
            PR tree-optimization/111294
    gcc/
            * tree-ssa-threadbackward.cc (back_threader_profitability::m_name):
            Remove
            (back_threader::find_paths_to_names): Adjust.
            (back_threader::maybe_thread_block): Likewise.
            (back_threader_profitability::possibly_profitable_path_p): Remove
            code applying extra costs to copies PHIs.
    
    libgomp/
            * team.c (gomp_team_start): Assert alloca size to avoid false
            positive alloc-size diagnostic.
    
    gcc/testsuite/
            * gcc.dg/tree-ssa/pr111294.c: New test.
            * gcc.dg/tree-ssa/phi_on_compare-4.c: Adjust.
            * gcc.dg/tree-ssa/pr59597.c: Likewise.
            * gcc.dg/tree-ssa/pr61839_2.c: Likewise.
            * gcc.dg/tree-ssa/ssa-sink-18.c: Likewise.
            * g++.dg/warn/Wstringop-overflow-4.C: XFAIL subtest on ilp32.
            * gcc.dg/uninit-pred-9_b.c: XFAIL subtest everywhere.
            * gcc.dg/vect/vect-117.c: Make scan for not Invalid sum
            conditional on lp64.

Diff:
---
 gcc/testsuite/g++.dg/warn/Wstringop-overflow-4.C |  4 +-
 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c |  4 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr111294.c         | 32 +++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr59597.c          |  8 +---
 gcc/testsuite/gcc.dg/tree-ssa/pr61839_2.c        |  4 +-
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-18.c      |  6 ++-
 gcc/testsuite/gcc.dg/uninit-pred-9_b.c           |  2 +-
 gcc/testsuite/gcc.dg/vect/vect-117.c             |  2 +-
 gcc/tree-ssa-threadbackward.cc                   | 60 +++---------------------
 libgomp/team.c                                   |  2 +
 10 files changed, 56 insertions(+), 68 deletions(-)

diff --git a/gcc/testsuite/g++.dg/warn/Wstringop-overflow-4.C b/gcc/testsuite/g++.dg/warn/Wstringop-overflow-4.C
index faad5bed074..275ecac01b5 100644
--- a/gcc/testsuite/g++.dg/warn/Wstringop-overflow-4.C
+++ b/gcc/testsuite/g++.dg/warn/Wstringop-overflow-4.C
@@ -151,7 +151,9 @@ void test_strcpy_new_int16_t (size_t n, const size_t vals[])
        as size_t as a result of threading.  See PR 101688 comment #2.  */
     T (S (1), new int16_t[r_0_imax]);
 
-  T (S (2), new int16_t[r_0_imax + 1]);
+  /* Similar to PR 101688 the following can result in a bougs warning because
+     of threading.  */
+  T (S (2), new int16_t[r_0_imax + 1]); // { dg-bogus "into a region of size" "" { xfail { ilp32 } } }
   T (S (9), new int16_t[r_0_imax * 2 + 1]);
 
   int r_1_imax = SR (1, INT_MAX);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
index 1e09f89af9f..6240d1cdd6d 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-Ofast -fdump-tree-dom2" } */
+/* { dg-options "-Ofast -fdump-tree-threadfull1-stats" } */
 
 void g (int);
 void g1 (int);
@@ -37,4 +37,4 @@ f (long a, long b, long c, long d, int x)
   g (c + d);
 }
 
-/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "dom2" } } */
+/* { dg-final { scan-tree-dump "Jumps threaded: 2" "threadfull1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr111294.c b/gcc/testsuite/gcc.dg/tree-ssa/pr111294.c
new file mode 100644
index 00000000000..9ad912bad0b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr111294.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -fdump-tree-optimized" } */
+
+void foo(void);
+static short a;
+static int b, c, d;
+static int *e, *f = &d;
+static int **g = &e;
+static unsigned char h;
+static short(i)(short j, int k) { return j > k ?: j; }
+static char l() {
+    if (a) return b;
+    return c;
+}
+int main() {
+    b = 0;
+    for (; b < 5; ++b)
+        ;
+    h = l();
+    if (a ^ 3 >= i(h, 11))
+        a = 0;
+    else {
+        *g = f;
+        if (e == &d & b) {
+            __builtin_unreachable();
+        } else
+            foo();
+        ;
+    }
+}
+
+/* { dg-final { scan-tree-dump-not "foo" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr59597.c b/gcc/testsuite/gcc.dg/tree-ssa/pr59597.c
index 0f66aae87bb..26c81d9dbb7 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr59597.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr59597.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-Ofast -fdisable-tree-cunrolli -fdump-tree-threadfull1-details" } */
+/* { dg-options "-Ofast -fdump-tree-ethread-details" } */
 
 typedef unsigned short u16;
 typedef unsigned char u8;
@@ -56,8 +56,4 @@ main (int argc, char argv[])
   return crc;
 }
 
-/* We used to have no threads in vrp-thread1 because all the attempted
-   ones would cross loops.  Now we get 30+ threads before VRP because
-   of loop unrolling.  A better option is to disable unrolling and
-   test for the original 4 threads that this test was testing.  */
-/* { dg-final { scan-tree-dump-times "Registering jump thread" 4 "threadfull1" } } */
+/* { dg-final { scan-tree-dump-times "Registering jump thread" 2 "ethread" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr61839_2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr61839_2.c
index 0e0f4c02113..a78e444038a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr61839_2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr61839_2.c
@@ -1,6 +1,8 @@
 /* PR tree-optimization/61839.  */
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-evrp" } */
+/* Disable jump threading, we want to avoid separating the division/modulo
+   by zero paths - we'd isolate those only later.  */
+/* { dg-options "-O2 -fno-thread-jumps -fdump-tree-evrp" } */
 /* { dg-require-effective-target int32plus } */
 
 __attribute__ ((noinline))
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-18.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-18.c
index fd6c8677212..13b9ba4f70f 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-18.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-18.c
@@ -198,7 +198,9 @@ compute_on_bytes (uint8_t *in_data, int in_len, uint8_t *out_data, int out_len)
     exits after gimple loop optimizations, which generates instructions executed
     each iteration in loop, but the results are used outside of loop:
     With -m64,
-    "Sinking _367 = (uint8_t *) _320;
+    "Sinking op_230 = op_244 + 2;
+    from bb 63 to bb 94
+    Sinking _367 = (uint8_t *) _320;
     from bb 31 to bb 90
     Sinking _320 = _321 + ivtmp.25_326;
     from bb 31 to bb 90
@@ -213,4 +215,4 @@ compute_on_bytes (uint8_t *in_data, int in_len, uint8_t *out_data, int out_len)
     base+index addressing modes, so the ip[len] address computation can't be
     made from the IV computation above.  */
 
- /* { dg-final { scan-tree-dump-times "Sunk statements: 4" 1 "sink2" { target lp64 xfail { riscv64-*-* } } } } */
+ /* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink2" { target lp64 xfail { riscv64-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/uninit-pred-9_b.c b/gcc/testsuite/gcc.dg/uninit-pred-9_b.c
index 0f508fa56e1..3c83d505ec0 100644
--- a/gcc/testsuite/gcc.dg/uninit-pred-9_b.c
+++ b/gcc/testsuite/gcc.dg/uninit-pred-9_b.c
@@ -17,7 +17,7 @@ int foo (int n, int l, int m, int r)
 
   if (l > 100)
     if ( (n <= 9) &&  (m < 100)  && (r < 19) )
-      blah(v); /* { dg-bogus "uninitialized" "bogus warning" { xfail powerpc*-*-* cris-*-* riscv*-*-* } } */
+      blah(v); /* { dg-bogus "uninitialized" "bogus warning" { xfail *-*-* } } */
 
   if ( (n <= 8) &&  (m < 99)  && (r < 19) )
       blah(v); /* { dg-bogus "uninitialized" "pr101674" { xfail mmix-*-* } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-117.c b/gcc/testsuite/gcc.dg/vect/vect-117.c
index 010d63c9ad8..4755e39f951 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-117.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-117.c
@@ -61,4 +61,4 @@ int main (void)
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 /* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 0 "vect" } } */
 
-/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" { target { lp64 } } } } */
diff --git a/gcc/tree-ssa-threadbackward.cc b/gcc/tree-ssa-threadbackward.cc
index d5da4b0c1b1..c45f4b261ad 100644
--- a/gcc/tree-ssa-threadbackward.cc
+++ b/gcc/tree-ssa-threadbackward.cc
@@ -62,7 +62,7 @@ class back_threader_profitability
 {
 public:
   back_threader_profitability (bool speed_p, gimple *stmt);
-  bool possibly_profitable_path_p (const vec<basic_block> &, tree, bool *);
+  bool possibly_profitable_path_p (const vec<basic_block> &, bool *);
   bool profitable_path_p (const vec<basic_block> &,
 			  edge taken, bool *irreducible_loop);
 private:
@@ -126,9 +126,6 @@ private:
   auto_bitmap m_imports;
   // The last statement in the path.
   gimple *m_last_stmt;
-  // This is a bit of a wart.  It's used to pass the LHS SSA name to
-  // the profitability engine.
-  tree m_name;
   // Marker to differentiate unreachable edges.
   static const edge UNREACHABLE_EDGE;
   // Set to TRUE if unknown SSA names along a path should be resolved
@@ -366,7 +363,7 @@ back_threader::find_paths_to_names (basic_block bb, bitmap interesting,
   // on the way to the backedge could be worthwhile.
   bool large_non_fsm;
   if (m_path.length () > 1
-      && (!profit.possibly_profitable_path_p (m_path, m_name, &large_non_fsm)
+      && (!profit.possibly_profitable_path_p (m_path, &large_non_fsm)
 	  || (!large_non_fsm
 	      && maybe_register_path (profit))))
     ;
@@ -517,23 +514,19 @@ back_threader::maybe_thread_block (basic_block bb)
     return;
 
   enum gimple_code code = gimple_code (stmt);
-  tree name;
-  if (code == GIMPLE_SWITCH)
-    name = gimple_switch_index (as_a <gswitch *> (stmt));
-  else if (code == GIMPLE_COND)
-    name = gimple_cond_lhs (stmt);
-  else
+  if (code != GIMPLE_SWITCH
+      && code != GIMPLE_COND)
     return;
 
   m_last_stmt = stmt;
   m_visited_bbs.empty ();
   m_path.truncate (0);
-  m_name = name;
 
   // We compute imports of the path during discovery starting
   // just with names used in the conditional.
   bitmap_clear (m_imports);
   ssa_op_iter iter;
+  tree name;
   FOR_EACH_SSA_TREE_OPERAND (name, stmt, iter, SSA_OP_USE)
     {
       if (!gimple_range_ssa_p (name))
@@ -588,15 +581,12 @@ back_threader::debug ()
    *LARGE_NON_FSM whether the thread is too large for a non-FSM thread
    but would be OK if we extend the path to cover the loop backedge.
 
-   NAME is the SSA_NAME of the variable we found to have a constant
-   value on PATH.  If unknown, SSA_NAME is NULL.
-
    ?? It seems we should be able to loosen some of the restrictions in
    this function after loop optimizations have run.  */
 
 bool
 back_threader_profitability::possibly_profitable_path_p
-				  (const vec<basic_block> &m_path, tree name,
+				  (const vec<basic_block> &m_path,
 				   bool *large_non_fsm)
 {
   gcc_checking_assert (!m_path.is_empty ());
@@ -645,44 +635,6 @@ back_threader_profitability::possibly_profitable_path_p
       if (j < m_path.length () - 1)
 	{
 	  int orig_n_insns = m_n_insns;
-	  /* PHIs in the path will create degenerate PHIS in the
-	     copied path which will then get propagated away, so
-	     looking at just the duplicate path the PHIs would
-	     seem unimportant.
-
-	     But those PHIs, because they're assignments to objects
-	     typically with lives that exist outside the thread path,
-	     will tend to generate PHIs (or at least new PHI arguments)
-	     at points where we leave the thread path and rejoin
-	     the original blocks.  So we do want to account for them.
-
-	     We ignore virtual PHIs.  We also ignore cases where BB
-	     has a single incoming edge.  That's the most common
-	     degenerate PHI we'll see here.  Finally we ignore PHIs
-	     that are associated with the value we're tracking as
-	     that object likely dies.  */
-	  if (EDGE_COUNT (bb->succs) > 1 && EDGE_COUNT (bb->preds) > 1)
-	    {
-	      for (gphi_iterator gsip = gsi_start_phis (bb);
-		   !gsi_end_p (gsip);
-		   gsi_next (&gsip))
-		{
-		  gphi *phi = gsip.phi ();
-		  tree dst = gimple_phi_result (phi);
-
-		  /* Note that if both NAME and DST are anonymous
-		     SSA_NAMEs, then we do not have enough information
-		     to consider them associated.  */
-		  if (dst != name
-		      && name
-		      && TREE_CODE (name) == SSA_NAME
-		      && (SSA_NAME_VAR (dst) != SSA_NAME_VAR (name)
-			  || !SSA_NAME_VAR (dst))
-		      && !virtual_operand_p (dst))
-		    ++m_n_insns;
-		}
-	    }
-
 	  if (!m_contains_hot_bb && m_speed_p)
 	    m_contains_hot_bb |= optimize_bb_for_speed_p (bb);
 	  for (gsi = gsi_after_labels (bb);
diff --git a/libgomp/team.c b/libgomp/team.c
index 54dfca8080a..b4fd6f2704c 100644
--- a/libgomp/team.c
+++ b/libgomp/team.c
@@ -756,6 +756,8 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
       attr = &thread_attr;
     }
 
+  if (i >= nthreads)
+    __builtin_unreachable ();
   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
 			    * (nthreads - i));