public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] tree-optimization/110515 - wrong code with LIM + PRE
@ 2023-07-06  7:29 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2023-07-06  7:29 UTC (permalink / raw)
  To: gcc-patches

In this PR we face the issue that LIM speculates a load when
hoisting it out of the loop (since it knows it cannot trap).
Unfortunately this exposes undefined behavior when the load
accesses memory with the wrong dynamic type.  This later
makes PRE use that representation instead of the original
which accesses the same memory location but using a different
dynamic type leading to a wrong disambiguation of that
original access against another and thus a wrong-code transform.

Fortunately there already is code in PRE dealing with a similar
situation for code hoisting but that left a small gap which
when fixed also fixes the wrong-code transform in this bug even
if it doesn't address the underlying issue of LIM speculating
that load.

The upside is this fix is trivially safe to backport and chances
of code generation regressions are very low.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

	PR tree-optimization/110515
	* tree-ssa-pre.cc (compute_avail): Make code dealing
	with hoisting loads with different alias-sets more
	robust.

	* g++.dg/opt/pr110515.C: New testcase.
---
 gcc/testsuite/g++.dg/opt/pr110515.C | 223 ++++++++++++++++++++++++++++
 gcc/tree-ssa-pre.cc                 |   1 +
 2 files changed, 224 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/opt/pr110515.C

diff --git a/gcc/testsuite/g++.dg/opt/pr110515.C b/gcc/testsuite/g++.dg/opt/pr110515.C
new file mode 100644
index 00000000000..7a75cea3b4b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/pr110515.C
@@ -0,0 +1,223 @@
+// { dg-do run }
+// { dg-require-effective-target c++11 }
+// { dg-options "-O2" }
+
+typedef __UINT64_TYPE__ u64;
+
+struct SmallDenseMap {
+  static constexpr u64 EmptyKey = 0xC0FFEUL;
+  struct V { u64 v; };
+
+  bool contains(u64 Val) {
+    V *TheSlot = nullptr;
+    return (LookupSlotFor(Val, TheSlot) ? 1 : 0);
+  }
+
+  void try_emplace(u64 Key) {
+    V *TheSlot = nullptr;
+    if (LookupSlotFor(Key, TheSlot))
+      return;
+
+    // Otherwise, insert the new element.
+    InsertIntoSlot(TheSlot, Key);
+  }
+
+  void moveFromOldSlots(V *OldSlotsBegin, V *OldSlotsEnd) {
+    Size = 0;
+
+    V *B_ = u.o.Slots;
+    V *E_ = B_ + u.o.Capacity;
+    for (; B_ != E_; ++B_)
+      B_->v = EmptyKey;
+
+    // Insert all the old elements.
+    V *O = OldSlotsBegin;
+    V *E = OldSlotsEnd;
+    for (; O != E; ++O) {
+      if (O->v != EmptyKey) {
+        // Insert the key/value into the new table.
+        V * N = nullptr;
+        LookupSlotFor(O->v, N);
+        N->v = O->v;
+        Size++;
+      }
+    }
+  }
+
+  void InsertIntoSlot(V *TheSlot, u64 Key) {
+    unsigned NewSize = Size + 1;
+    unsigned Capacity = getCapacity();
+    // Make sure we always keep at least one Empty value
+    if (NewSize >= Capacity) {
+      //fprintf(stderr, "GROW: size=%u capacity=%u -> ...\n", Size, Capacity);
+      grow();
+      LookupSlotFor(Key, TheSlot);
+      Capacity = getCapacity();
+      //fprintf(stderr, "GROW: ... -> size=%u capacity=%u\n", NewSize, Capacity);
+    }
+
+    Size++;
+
+    TheSlot->v = Key;
+  }
+
+  bool LookupSlotFor(u64 Val,
+                       V *&FoundSlot) {
+    V *SlotsPtr = getSlots();
+    const unsigned Capacity = getCapacity();
+
+    for (unsigned i = 0; i < Capacity; ++i) {
+      V *ThisSlot = SlotsPtr + i;
+      if (Val == ThisSlot->v) {
+        FoundSlot = ThisSlot;
+        return true;
+      }
+
+      if (ThisSlot->v == EmptyKey) {
+        FoundSlot = ThisSlot;
+        return false;
+      }
+    }
+    // Guarantee that within an array there is a match
+    // or Empty value where to insert a new vaue.
+    __builtin_trap();
+  }
+
+  // Needs to bea at least 1 to hld one empty value
+  static constexpr unsigned InlineSlots = 2;
+
+  bool Small;
+  unsigned Size;
+
+  struct LargeRep {
+    V *Slots;
+    unsigned Capacity;
+  };
+
+  union {
+      V i[InlineSlots]; // Small = true
+      LargeRep o;       // Small = false
+  } u;
+
+  explicit SmallDenseMap() : Small(true), Size(0) {
+    Size = 0;
+
+    V *B = u.i;
+    V *E = B + InlineSlots;
+    for (; B != E; ++B)
+      B->v = EmptyKey;
+  }
+
+  void grow() {
+    // assert:
+    if (!Small) __builtin_trap();
+
+    // First move the inline Slots into a temporary storage.
+    V TmpStorage[InlineSlots];
+    V *TmpBegin = TmpStorage;
+    V *TmpEnd = TmpBegin;
+
+    // Loop over the Slots, moving non-empty, non-tombstones into the
+    // temporary storage. Have the loop move the TmpEnd forward as it goes.
+    V *P = u.i;
+    V *E = P + InlineSlots;
+    for (; P != E; ++P) {
+        if (P->v != EmptyKey) {
+            TmpEnd->v = P->v;
+            ++TmpEnd;
+        }
+    }
+
+    Small = false;
+    u.o = LargeRep{new V[128], 128};
+    moveFromOldSlots(TmpBegin, TmpEnd);
+  }
+
+  V *getSlots() {
+    if (Small) {
+      V * inl = u.i;
+      return inl;
+    }
+    else {
+      LargeRep * rep = &u.o;
+      return rep->Slots;
+    }
+  }
+
+  unsigned getCapacity() {
+    if (Small) {
+      return InlineSlots;
+    }
+    else {
+      LargeRep * rep = &u.o;
+      return rep->Capacity;
+    }
+  }
+};
+
+#pragma GCC optimize(0)
+
+struct P {
+    u64 f;
+    bool s;
+};
+
+static u64 ws = 0;
+static P WorkList[128];
+
+__attribute__((noipa))
+static void popupateIni() {
+  for (u64 Var : (u64[]){8,7,6,5,4,3,0}) {
+    WorkList[ws++] = P{Var, false};
+  }
+}
+
+__attribute__((noipa))
+static void checkCycle(u64 Var) {
+    // Detect cycles.
+    static bool seen[256];
+    if (Var >= 256 || seen[Var]) __builtin_trap();
+    seen[Var] = true;
+}
+
+
+__attribute__((noipa))
+static void populateDeps(u64 Var) {
+
+    WorkList[ws++] = P{Var, true};
+    if (Var == 8)
+        WorkList[ws++] = P{0, false};
+}
+
+
+__attribute__((noipa)) __attribute__((optimize(3)))
+static void bug() {
+
+  // triggers growth on insert
+  SmallDenseMap Visited;
+
+  popupateIni();
+
+  while (ws > 0) {
+    P Item = WorkList[--ws];
+    u64 Var = Item.f;
+    bool visitedAllDependencies = Item.s;
+
+    if (Visited.contains(Var)) {
+      continue;
+    }
+
+    if (visitedAllDependencies) {
+      Visited.try_emplace(Var);
+      continue;
+    }
+
+    checkCycle(Var);
+    populateDeps(Var);
+  }
+}
+
+__attribute__((noipa))
+int main() {
+    bug();
+}
diff --git a/gcc/tree-ssa-pre.cc b/gcc/tree-ssa-pre.cc
index 7bbfa5ac43d..e33c5ba80e2 100644
--- a/gcc/tree-ssa-pre.cc
+++ b/gcc/tree-ssa-pre.cc
@@ -4245,6 +4245,7 @@ compute_avail (function *fun)
 		      else
 			{
 			  ref->set = 0;
+			  ref->base_set = 0;
 			  if (ref1->opcode == MEM_REF)
 			    ref1->op0
 			      = wide_int_to_tree (ptr_type_node,
-- 
2.35.3

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-07-06  7:29 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-06  7:29 [PATCH] tree-optimization/110515 - wrong code with LIM + PRE Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).