* [PATCH v3] x86: Properly find the maximum stack slot alignment
@ 2023-07-24 20:35 H.J. Lu
2023-07-25 11:27 ` Richard Biener
0 siblings, 1 reply; 10+ messages in thread
From: H.J. Lu @ 2023-07-24 20:35 UTC (permalink / raw)
To: gcc-patches
Don't assume that stack slots can only be accessed by stack or frame
registers. We first find all registers defined by stack or frame
registers. Then check memory accesses by such registers, including
stack and frame registers.
gcc/
PR target/109780
* config/i386/i386.cc (ix86_update_stack_alignment): New.
(ix86_find_all_reg_use): Likewise.
(ix86_find_max_used_stack_alignment): Also check memory accesses
from registers defined by stack or frame registers.
gcc/testsuite/
PR target/109780
* g++.target/i386/pr109780-1.C: New test.
* gcc.target/i386/pr109780-1.c: Likewise.
* gcc.target/i386/pr109780-2.c: Likewise.
---
gcc/config/i386/i386.cc | 128 +++++++++++++++++----
gcc/testsuite/g++.target/i386/pr109780-1.C | 72 ++++++++++++
gcc/testsuite/gcc.target/i386/pr109780-1.c | 14 +++
gcc/testsuite/gcc.target/i386/pr109780-2.c | 21 ++++
4 files changed, 214 insertions(+), 21 deletions(-)
create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C
create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index caca74d6dec..b71fd9401ef 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -8084,6 +8084,65 @@ output_probe_stack_range (rtx reg, rtx end)
return "";
}
+/* Update the maximum stack slot alignment from memory alignment in
+ PAT. */
+
+static void
+ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
+{
+ /* This insn may reference stack slot. Update the maximum stack slot
+ alignment. */
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, pat, ALL)
+ if (MEM_P (*iter))
+ {
+ unsigned int alignment = MEM_ALIGN (*iter);
+ unsigned int *stack_alignment
+ = (unsigned int *) data;
+ if (alignment > *stack_alignment)
+ *stack_alignment = alignment;
+ break;
+ }
+}
+
+/* Find all registers defined with REG. */
+
+static void
+ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
+ unsigned int reg, auto_bitmap &worklist)
+{
+ for (df_ref ref = DF_REG_USE_CHAIN (reg);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+
+ rtx set = single_set (insn);
+ if (!set)
+ continue;
+
+ rtx src = SET_SRC (set);
+ if (MEM_P (src))
+ continue;
+
+ rtx dest = SET_DEST (set);
+ if (!REG_P (dest))
+ continue;
+
+ if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest)))
+ continue;
+
+ /* Add this register to stack_slot_access. */
+ add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest));
+ bitmap_set_bit (worklist, REGNO (dest));
+ }
+}
+
/* Set stack_frame_required to false if stack frame isn't required.
Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
slot used if stack frame is required and CHECK_STACK_SLOT is true. */
@@ -8102,10 +8161,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
add_to_hard_reg_set (&set_up_by_prologue, Pmode,
HARD_FRAME_POINTER_REGNUM);
- /* The preferred stack alignment is the minimum stack alignment. */
- if (stack_alignment > crtl->preferred_stack_boundary)
- stack_alignment = crtl->preferred_stack_boundary;
-
bool require_stack_frame = false;
FOR_EACH_BB_FN (bb, cfun)
@@ -8117,27 +8172,58 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
set_up_by_prologue))
{
require_stack_frame = true;
-
- if (check_stack_slot)
- {
- /* Find the maximum stack alignment. */
- subrtx_iterator::array_type array;
- FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
- if (MEM_P (*iter)
- && (reg_mentioned_p (stack_pointer_rtx,
- *iter)
- || reg_mentioned_p (frame_pointer_rtx,
- *iter)))
- {
- unsigned int alignment = MEM_ALIGN (*iter);
- if (alignment > stack_alignment)
- stack_alignment = alignment;
- }
- }
+ break;
}
}
cfun->machine->stack_frame_required = require_stack_frame;
+
+ /* Stop if we don't need to check stack slot. */
+ if (!check_stack_slot)
+ return;
+
+ /* The preferred stack alignment is the minimum stack alignment. */
+ if (stack_alignment > crtl->preferred_stack_boundary)
+ stack_alignment = crtl->preferred_stack_boundary;
+
+ HARD_REG_SET stack_slot_access;
+ CLEAR_HARD_REG_SET (stack_slot_access);
+
+ /* Stack slot can be accessed by stack pointer, frame pointer or
+ registers defined by stack pointer or frame pointer. */
+ auto_bitmap worklist;
+ add_to_hard_reg_set (&stack_slot_access, Pmode,
+ STACK_POINTER_REGNUM);
+ bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
+ if (frame_pointer_needed)
+ {
+ add_to_hard_reg_set (&stack_slot_access, Pmode,
+ HARD_FRAME_POINTER_REGNUM);
+ bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
+ }
+ unsigned int reg;
+ do
+ {
+ reg = bitmap_clear_first_set_bit (worklist);
+ ix86_find_all_reg_use (stack_slot_access, reg, worklist);
+ }
+ while (!bitmap_empty_p (worklist));
+
+ hard_reg_set_iterator hrsi;
+ EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
+ for (df_ref ref = DF_REG_USE_CHAIN (reg);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+ note_stores (insn, ix86_update_stack_alignment,
+ &stack_alignment);
+ }
}
/* Finalize stack_realign_needed and frame_pointer_needed flags, which
diff --git a/gcc/testsuite/g++.target/i386/pr109780-1.C b/gcc/testsuite/g++.target/i386/pr109780-1.C
new file mode 100644
index 00000000000..7e3eabdec94
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr109780-1.C
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target c++17 } */
+/* { dg-options "-O2 -mavx2 -mtune=haswell" } */
+
+template <typename _Tp> struct remove_reference {
+ using type = __remove_reference(_Tp);
+};
+template <typename T> struct MaybeStorageBase {
+ T val;
+ struct Union {
+ ~Union();
+ } mStorage;
+};
+template <typename T> struct MaybeStorage : MaybeStorageBase<T> {
+ char mIsSome;
+};
+template <typename T, typename U = typename remove_reference<T>::type>
+constexpr MaybeStorage<U> Some(T &&);
+template <typename T, typename U> constexpr MaybeStorage<U> Some(T &&aValue) {
+ return {aValue};
+}
+template <class> struct Span {
+ int operator[](long idx) {
+ int *__trans_tmp_4;
+ if (__builtin_expect(idx, 0))
+ *(int *)__null = false;
+ __trans_tmp_4 = storage_.data();
+ return __trans_tmp_4[idx];
+ }
+ struct {
+ int *data() { return data_; }
+ int *data_;
+ } storage_;
+};
+struct Variant {
+ template <typename RefT> Variant(RefT) {}
+};
+long from_i, from___trans_tmp_9;
+namespace js::intl {
+struct DecimalNumber {
+ Variant string_;
+ unsigned long significandStart_;
+ unsigned long significandEnd_;
+ bool zero_ = false;
+ bool negative_;
+ template <typename CharT> DecimalNumber(CharT string) : string_(string) {}
+ template <typename CharT>
+ static MaybeStorage<DecimalNumber> from(Span<const CharT>);
+ void from();
+};
+} // namespace js::intl
+void js::intl::DecimalNumber::from() {
+ Span<const char16_t> __trans_tmp_3;
+ from(__trans_tmp_3);
+}
+template <typename CharT>
+MaybeStorage<js::intl::DecimalNumber>
+js::intl::DecimalNumber::from(Span<const CharT> chars) {
+ DecimalNumber number(chars);
+ if (auto ch = chars[from_i]) {
+ from_i++;
+ number.negative_ = ch == '-';
+ }
+ while (from___trans_tmp_9 && chars[from_i])
+ ;
+ if (chars[from_i])
+ while (chars[from_i - 1])
+ number.zero_ = true;
+ return Some(number);
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr109780-1.c b/gcc/testsuite/gcc.target/i386/pr109780-1.c
new file mode 100644
index 00000000000..6b06947f2a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109780-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=skylake" } */
+
+char perm[64];
+
+void
+__attribute__((noipa))
+foo (int n)
+{
+ for (int i = 0; i < n; ++i)
+ perm[i] = i;
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr109780-2.c b/gcc/testsuite/gcc.target/i386/pr109780-2.c
new file mode 100644
index 00000000000..152da06c6ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109780-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=skylake" } */
+
+#define N 9
+
+void
+f (double x, double y, double *res)
+{
+ y = -y;
+ for (int i = 0; i < N; ++i)
+ {
+ double tmp = y;
+ y = x;
+ x = tmp;
+ res[i] = i;
+ }
+ res[N] = y * y;
+ res[N + 1] = x;
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
--
2.41.0
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH v3] x86: Properly find the maximum stack slot alignment
2023-07-24 20:35 [PATCH v3] x86: Properly find the maximum stack slot alignment H.J. Lu
@ 2023-07-25 11:27 ` Richard Biener
0 siblings, 0 replies; 10+ messages in thread
From: Richard Biener @ 2023-07-25 11:27 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
On Mon, Jul 24, 2023 at 10:36 PM H.J. Lu via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Don't assume that stack slots can only be accessed by stack or frame
> registers. We first find all registers defined by stack or frame
> registers. Then check memory accesses by such registers, including
> stack and frame registers.
Looks good to me from an algorithmic/DF perspective - I'm leaving it for
x86 maintainers to assess the functional bits.
Thanks,
Richard.
> gcc/
>
> PR target/109780
> * config/i386/i386.cc (ix86_update_stack_alignment): New.
> (ix86_find_all_reg_use): Likewise.
> (ix86_find_max_used_stack_alignment): Also check memory accesses
> from registers defined by stack or frame registers.
>
> gcc/testsuite/
>
> PR target/109780
> * g++.target/i386/pr109780-1.C: New test.
> * gcc.target/i386/pr109780-1.c: Likewise.
> * gcc.target/i386/pr109780-2.c: Likewise.
> ---
> gcc/config/i386/i386.cc | 128 +++++++++++++++++----
> gcc/testsuite/g++.target/i386/pr109780-1.C | 72 ++++++++++++
> gcc/testsuite/gcc.target/i386/pr109780-1.c | 14 +++
> gcc/testsuite/gcc.target/i386/pr109780-2.c | 21 ++++
> 4 files changed, 214 insertions(+), 21 deletions(-)
> create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C
> create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index caca74d6dec..b71fd9401ef 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -8084,6 +8084,65 @@ output_probe_stack_range (rtx reg, rtx end)
> return "";
> }
>
> +/* Update the maximum stack slot alignment from memory alignment in
> + PAT. */
> +
> +static void
> +ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
> +{
> + /* This insn may reference stack slot. Update the maximum stack slot
> + alignment. */
> + subrtx_iterator::array_type array;
> + FOR_EACH_SUBRTX (iter, array, pat, ALL)
> + if (MEM_P (*iter))
> + {
> + unsigned int alignment = MEM_ALIGN (*iter);
> + unsigned int *stack_alignment
> + = (unsigned int *) data;
> + if (alignment > *stack_alignment)
> + *stack_alignment = alignment;
> + break;
> + }
> +}
> +
> +/* Find all registers defined with REG. */
> +
> +static void
> +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
> + unsigned int reg, auto_bitmap &worklist)
> +{
> + for (df_ref ref = DF_REG_USE_CHAIN (reg);
> + ref != NULL;
> + ref = DF_REF_NEXT_REG (ref))
> + {
> + if (DF_REF_IS_ARTIFICIAL (ref))
> + continue;
> +
> + rtx_insn *insn = DF_REF_INSN (ref);
> + if (!NONDEBUG_INSN_P (insn))
> + continue;
> +
> + rtx set = single_set (insn);
> + if (!set)
> + continue;
> +
> + rtx src = SET_SRC (set);
> + if (MEM_P (src))
> + continue;
> +
> + rtx dest = SET_DEST (set);
> + if (!REG_P (dest))
> + continue;
> +
> + if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest)))
> + continue;
> +
> + /* Add this register to stack_slot_access. */
> + add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest));
> + bitmap_set_bit (worklist, REGNO (dest));
> + }
> +}
> +
> /* Set stack_frame_required to false if stack frame isn't required.
> Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
> slot used if stack frame is required and CHECK_STACK_SLOT is true. */
> @@ -8102,10 +8161,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> add_to_hard_reg_set (&set_up_by_prologue, Pmode,
> HARD_FRAME_POINTER_REGNUM);
>
> - /* The preferred stack alignment is the minimum stack alignment. */
> - if (stack_alignment > crtl->preferred_stack_boundary)
> - stack_alignment = crtl->preferred_stack_boundary;
> -
> bool require_stack_frame = false;
>
> FOR_EACH_BB_FN (bb, cfun)
> @@ -8117,27 +8172,58 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> set_up_by_prologue))
> {
> require_stack_frame = true;
> -
> - if (check_stack_slot)
> - {
> - /* Find the maximum stack alignment. */
> - subrtx_iterator::array_type array;
> - FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
> - if (MEM_P (*iter)
> - && (reg_mentioned_p (stack_pointer_rtx,
> - *iter)
> - || reg_mentioned_p (frame_pointer_rtx,
> - *iter)))
> - {
> - unsigned int alignment = MEM_ALIGN (*iter);
> - if (alignment > stack_alignment)
> - stack_alignment = alignment;
> - }
> - }
> + break;
> }
> }
>
> cfun->machine->stack_frame_required = require_stack_frame;
> +
> + /* Stop if we don't need to check stack slot. */
> + if (!check_stack_slot)
> + return;
> +
> + /* The preferred stack alignment is the minimum stack alignment. */
> + if (stack_alignment > crtl->preferred_stack_boundary)
> + stack_alignment = crtl->preferred_stack_boundary;
> +
> + HARD_REG_SET stack_slot_access;
> + CLEAR_HARD_REG_SET (stack_slot_access);
> +
> + /* Stack slot can be accessed by stack pointer, frame pointer or
> + registers defined by stack pointer or frame pointer. */
> + auto_bitmap worklist;
> + add_to_hard_reg_set (&stack_slot_access, Pmode,
> + STACK_POINTER_REGNUM);
> + bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
> + if (frame_pointer_needed)
> + {
> + add_to_hard_reg_set (&stack_slot_access, Pmode,
> + HARD_FRAME_POINTER_REGNUM);
> + bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
> + }
> + unsigned int reg;
> + do
> + {
> + reg = bitmap_clear_first_set_bit (worklist);
> + ix86_find_all_reg_use (stack_slot_access, reg, worklist);
> + }
> + while (!bitmap_empty_p (worklist));
> +
> + hard_reg_set_iterator hrsi;
> + EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
> + for (df_ref ref = DF_REG_USE_CHAIN (reg);
> + ref != NULL;
> + ref = DF_REF_NEXT_REG (ref))
> + {
> + if (DF_REF_IS_ARTIFICIAL (ref))
> + continue;
> +
> + rtx_insn *insn = DF_REF_INSN (ref);
> + if (!NONDEBUG_INSN_P (insn))
> + continue;
> + note_stores (insn, ix86_update_stack_alignment,
> + &stack_alignment);
> + }
> }
>
> /* Finalize stack_realign_needed and frame_pointer_needed flags, which
> diff --git a/gcc/testsuite/g++.target/i386/pr109780-1.C b/gcc/testsuite/g++.target/i386/pr109780-1.C
> new file mode 100644
> index 00000000000..7e3eabdec94
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr109780-1.C
> @@ -0,0 +1,72 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target c++17 } */
> +/* { dg-options "-O2 -mavx2 -mtune=haswell" } */
> +
> +template <typename _Tp> struct remove_reference {
> + using type = __remove_reference(_Tp);
> +};
> +template <typename T> struct MaybeStorageBase {
> + T val;
> + struct Union {
> + ~Union();
> + } mStorage;
> +};
> +template <typename T> struct MaybeStorage : MaybeStorageBase<T> {
> + char mIsSome;
> +};
> +template <typename T, typename U = typename remove_reference<T>::type>
> +constexpr MaybeStorage<U> Some(T &&);
> +template <typename T, typename U> constexpr MaybeStorage<U> Some(T &&aValue) {
> + return {aValue};
> +}
> +template <class> struct Span {
> + int operator[](long idx) {
> + int *__trans_tmp_4;
> + if (__builtin_expect(idx, 0))
> + *(int *)__null = false;
> + __trans_tmp_4 = storage_.data();
> + return __trans_tmp_4[idx];
> + }
> + struct {
> + int *data() { return data_; }
> + int *data_;
> + } storage_;
> +};
> +struct Variant {
> + template <typename RefT> Variant(RefT) {}
> +};
> +long from_i, from___trans_tmp_9;
> +namespace js::intl {
> +struct DecimalNumber {
> + Variant string_;
> + unsigned long significandStart_;
> + unsigned long significandEnd_;
> + bool zero_ = false;
> + bool negative_;
> + template <typename CharT> DecimalNumber(CharT string) : string_(string) {}
> + template <typename CharT>
> + static MaybeStorage<DecimalNumber> from(Span<const CharT>);
> + void from();
> +};
> +} // namespace js::intl
> +void js::intl::DecimalNumber::from() {
> + Span<const char16_t> __trans_tmp_3;
> + from(__trans_tmp_3);
> +}
> +template <typename CharT>
> +MaybeStorage<js::intl::DecimalNumber>
> +js::intl::DecimalNumber::from(Span<const CharT> chars) {
> + DecimalNumber number(chars);
> + if (auto ch = chars[from_i]) {
> + from_i++;
> + number.negative_ = ch == '-';
> + }
> + while (from___trans_tmp_9 && chars[from_i])
> + ;
> + if (chars[from_i])
> + while (chars[from_i - 1])
> + number.zero_ = true;
> + return Some(number);
> +}
> +
> +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr109780-1.c b/gcc/testsuite/gcc.target/i386/pr109780-1.c
> new file mode 100644
> index 00000000000..6b06947f2a5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr109780-1.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=skylake" } */
> +
> +char perm[64];
> +
> +void
> +__attribute__((noipa))
> +foo (int n)
> +{
> + for (int i = 0; i < n; ++i)
> + perm[i] = i;
> +}
> +
> +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr109780-2.c b/gcc/testsuite/gcc.target/i386/pr109780-2.c
> new file mode 100644
> index 00000000000..152da06c6ad
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr109780-2.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=skylake" } */
> +
> +#define N 9
> +
> +void
> +f (double x, double y, double *res)
> +{
> + y = -y;
> + for (int i = 0; i < N; ++i)
> + {
> + double tmp = y;
> + y = x;
> + x = tmp;
> + res[i] = i;
> + }
> + res[N] = y * y;
> + res[N + 1] = x;
> +}
> +
> +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
> --
> 2.41.0
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2] x86: Properly find the maximum stack slot alignment
@ 2025-02-13 8:31 H.J. Lu
2025-02-13 9:17 ` Uros Bizjak
0 siblings, 1 reply; 10+ messages in thread
From: H.J. Lu @ 2025-02-13 8:31 UTC (permalink / raw)
To: GCC Patches, Uros Bizjak, Hongtao Liu
[-- Attachment #1: Type: text/plain, Size: 789 bytes --]
Don't assume that stack slots can only be accessed by stack or frame
registers. We first find all registers defined by stack or frame
registers. Then check memory accesses by such registers, including
stack and frame registers.
gcc/
PR target/109780
PR target/109093
* config/i386/i386.cc (ix86_update_stack_alignment): New.
(ix86_find_all_reg_use_1): Likewise.
(ix86_find_all_reg_use): Likewise.
(ix86_find_max_used_stack_alignment): Also check memory accesses
from registers defined by stack or frame registers.
gcc/testsuite/
PR target/109780
PR target/109093
* g++.target/i386/pr109780-1.C: New test.
* gcc.target/i386/pr109093-1.c: Likewise.
* gcc.target/i386/pr109780-1.c: Likewise.
* gcc.target/i386/pr109780-2.c: Likewise.
* gcc.target/i386/pr109780-3.c: Likewise.
--
H.J.
[-- Attachment #2: v2-0001-x86-Properly-find-the-maximum-stack-slot-alignmen.patch --]
[-- Type: text/x-patch, Size: 12680 bytes --]
From 820f939a024fc71e4e37b509a3aa0290e8c4e9df Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Tue, 14 Mar 2023 11:41:51 -0700
Subject: [PATCH v2] x86: Properly find the maximum stack slot alignment
Don't assume that stack slots can only be accessed by stack or frame
registers. We first find all registers defined by stack or frame
registers. Then check memory accesses by such registers, including
stack and frame registers.
gcc/
PR target/109780
PR target/109093
* config/i386/i386.cc (ix86_update_stack_alignment): New.
(ix86_find_all_reg_use_1): Likewise.
(ix86_find_all_reg_use): Likewise.
(ix86_find_max_used_stack_alignment): Also check memory accesses
from registers defined by stack or frame registers.
gcc/testsuite/
PR target/109780
PR target/109093
* g++.target/i386/pr109780-1.C: New test.
* gcc.target/i386/pr109093-1.c: Likewise.
* gcc.target/i386/pr109780-1.c: Likewise.
* gcc.target/i386/pr109780-2.c: Likewise.
* gcc.target/i386/pr109780-3.c: Likewise.
Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
---
gcc/config/i386/i386.cc | 173 ++++++++++++++++++---
gcc/testsuite/g++.target/i386/pr109780-1.C | 72 +++++++++
gcc/testsuite/gcc.target/i386/pr109093-1.c | 39 +++++
gcc/testsuite/gcc.target/i386/pr109780-1.c | 14 ++
gcc/testsuite/gcc.target/i386/pr109780-2.c | 21 +++
gcc/testsuite/gcc.target/i386/pr109780-3.c | 52 +++++++
6 files changed, 350 insertions(+), 21 deletions(-)
create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C
create mode 100644 gcc/testsuite/gcc.target/i386/pr109093-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-3.c
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 3128973ba79..4d855d9541c 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -8466,6 +8466,110 @@ output_probe_stack_range (rtx reg, rtx end)
return "";
}
+/* Update the maximum stack slot alignment from memory alignment in
+ PAT. */
+
+static void
+ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
+{
+ /* This insn may reference stack slot. Update the maximum stack slot
+ alignment. */
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, pat, ALL)
+ if (MEM_P (*iter))
+ {
+ unsigned int alignment = MEM_ALIGN (*iter);
+ unsigned int *stack_alignment
+ = (unsigned int *) data;
+ if (alignment > *stack_alignment)
+ *stack_alignment = alignment;
+ break;
+ }
+}
+
+/* Helper function for ix86_find_all_reg_use. */
+
+static void
+ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
+ auto_bitmap &worklist)
+{
+ rtx src = SET_SRC (set);
+ if (MEM_P (src))
+ return;
+
+ rtx dest = SET_DEST (set);
+ if (!REG_P (dest))
+ return;
+
+ if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest)))
+ return;
+
+ /* Add this register to stack_slot_access. */
+ add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest));
+ bitmap_set_bit (worklist, REGNO (dest));
+}
+
+/* Find all registers defined with REG. */
+
+static void
+ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
+ unsigned int reg, auto_bitmap &worklist)
+{
+ for (df_ref ref = DF_REG_USE_CHAIN (reg);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+
+ if (CALL_P (insn) || JUMP_P (insn))
+ continue;
+
+ rtx set = single_set (insn);
+ if (set)
+ ix86_find_all_reg_use_1 (set, stack_slot_access, worklist);
+
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) != PARALLEL)
+ continue;
+
+ for (int i = 0; i < XVECLEN (pat, 0); i++)
+ {
+ rtx exp = XVECEXP (pat, 0, i);
+ switch (GET_CODE (exp))
+ {
+ case ASM_OPERANDS:
+ case CLOBBER:
+ case PREFETCH:
+ case USE:
+ break;
+ case UNSPEC:
+ case UNSPEC_VOLATILE:
+ for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--)
+ {
+ rtx x = XVECEXP (exp, 0, j);
+ if (GET_CODE (x) == SET)
+ ix86_find_all_reg_use_1 (x, stack_slot_access,
+ worklist);
+ }
+ break;
+ case SET:
+ ix86_find_all_reg_use_1 (exp, stack_slot_access,
+ worklist);
+ break;
+ default:
+ debug_rtx (exp);
+ gcc_unreachable ();
+ break;
+ }
+ }
+ }
+}
+
/* Set stack_frame_required to false if stack frame isn't required.
Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
slot used if stack frame is required and CHECK_STACK_SLOT is true. */
@@ -8484,10 +8588,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
add_to_hard_reg_set (&set_up_by_prologue, Pmode,
HARD_FRAME_POINTER_REGNUM);
- /* The preferred stack alignment is the minimum stack alignment. */
- if (stack_alignment > crtl->preferred_stack_boundary)
- stack_alignment = crtl->preferred_stack_boundary;
-
bool require_stack_frame = false;
FOR_EACH_BB_FN (bb, cfun)
@@ -8499,27 +8599,58 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
set_up_by_prologue))
{
require_stack_frame = true;
-
- if (check_stack_slot)
- {
- /* Find the maximum stack alignment. */
- subrtx_iterator::array_type array;
- FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
- if (MEM_P (*iter)
- && (reg_mentioned_p (stack_pointer_rtx,
- *iter)
- || reg_mentioned_p (frame_pointer_rtx,
- *iter)))
- {
- unsigned int alignment = MEM_ALIGN (*iter);
- if (alignment > stack_alignment)
- stack_alignment = alignment;
- }
- }
+ break;
}
}
cfun->machine->stack_frame_required = require_stack_frame;
+
+ /* Stop if we don't need to check stack slot. */
+ if (!check_stack_slot)
+ return;
+
+ /* The preferred stack alignment is the minimum stack alignment. */
+ if (stack_alignment > crtl->preferred_stack_boundary)
+ stack_alignment = crtl->preferred_stack_boundary;
+
+ HARD_REG_SET stack_slot_access;
+ CLEAR_HARD_REG_SET (stack_slot_access);
+
+ /* Stack slot can be accessed by stack pointer, frame pointer or
+ registers defined by stack pointer or frame pointer. */
+ auto_bitmap worklist;
+ add_to_hard_reg_set (&stack_slot_access, Pmode,
+ STACK_POINTER_REGNUM);
+ bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
+ if (frame_pointer_needed)
+ {
+ add_to_hard_reg_set (&stack_slot_access, Pmode,
+ HARD_FRAME_POINTER_REGNUM);
+ bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
+ }
+ unsigned int reg;
+ do
+ {
+ reg = bitmap_clear_first_set_bit (worklist);
+ ix86_find_all_reg_use (stack_slot_access, reg, worklist);
+ }
+ while (!bitmap_empty_p (worklist));
+
+ hard_reg_set_iterator hrsi;
+ EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
+ for (df_ref ref = DF_REG_USE_CHAIN (reg);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+ note_stores (insn, ix86_update_stack_alignment,
+ &stack_alignment);
+ }
}
/* Finalize stack_realign_needed and frame_pointer_needed flags, which
diff --git a/gcc/testsuite/g++.target/i386/pr109780-1.C b/gcc/testsuite/g++.target/i386/pr109780-1.C
new file mode 100644
index 00000000000..7e3eabdec94
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr109780-1.C
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target c++17 } */
+/* { dg-options "-O2 -mavx2 -mtune=haswell" } */
+
+template <typename _Tp> struct remove_reference {
+ using type = __remove_reference(_Tp);
+};
+template <typename T> struct MaybeStorageBase {
+ T val;
+ struct Union {
+ ~Union();
+ } mStorage;
+};
+template <typename T> struct MaybeStorage : MaybeStorageBase<T> {
+ char mIsSome;
+};
+template <typename T, typename U = typename remove_reference<T>::type>
+constexpr MaybeStorage<U> Some(T &&);
+template <typename T, typename U> constexpr MaybeStorage<U> Some(T &&aValue) {
+ return {aValue};
+}
+template <class> struct Span {
+ int operator[](long idx) {
+ int *__trans_tmp_4;
+ if (__builtin_expect(idx, 0))
+ *(int *)__null = false;
+ __trans_tmp_4 = storage_.data();
+ return __trans_tmp_4[idx];
+ }
+ struct {
+ int *data() { return data_; }
+ int *data_;
+ } storage_;
+};
+struct Variant {
+ template <typename RefT> Variant(RefT) {}
+};
+long from_i, from___trans_tmp_9;
+namespace js::intl {
+struct DecimalNumber {
+ Variant string_;
+ unsigned long significandStart_;
+ unsigned long significandEnd_;
+ bool zero_ = false;
+ bool negative_;
+ template <typename CharT> DecimalNumber(CharT string) : string_(string) {}
+ template <typename CharT>
+ static MaybeStorage<DecimalNumber> from(Span<const CharT>);
+ void from();
+};
+} // namespace js::intl
+void js::intl::DecimalNumber::from() {
+ Span<const char16_t> __trans_tmp_3;
+ from(__trans_tmp_3);
+}
+template <typename CharT>
+MaybeStorage<js::intl::DecimalNumber>
+js::intl::DecimalNumber::from(Span<const CharT> chars) {
+ DecimalNumber number(chars);
+ if (auto ch = chars[from_i]) {
+ from_i++;
+ number.negative_ = ch == '-';
+ }
+ while (from___trans_tmp_9 && chars[from_i])
+ ;
+ if (chars[from_i])
+ while (chars[from_i - 1])
+ number.zero_ = true;
+ return Some(number);
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr109093-1.c b/gcc/testsuite/gcc.target/i386/pr109093-1.c
new file mode 100644
index 00000000000..0459d1947f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109093-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx2 -mtune=znver1 -ftrivial-auto-var-init=zero -fno-stack-protector" } */
+
+#include <cpuid.h>
+
+int a, b, c, d;
+char e, f = 1;
+short g, h, i;
+
+__attribute__ ((weak))
+void
+run (void)
+{
+ short j;
+
+ for (; g >= 0; --g)
+ {
+ int *k[10];
+
+ for (d = 0; d < 10; d++)
+ k[d] = &b;
+
+ c = *k[1];
+
+ for (; a;)
+ j = i - c / f || (e ^= h);
+ }
+}
+
+int
+main (void)
+{
+ __builtin_cpu_init ();
+
+ if (__builtin_cpu_supports ("avx2"))
+ run ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr109780-1.c b/gcc/testsuite/gcc.target/i386/pr109780-1.c
new file mode 100644
index 00000000000..6b06947f2a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109780-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=skylake" } */
+
+char perm[64];
+
+void
+__attribute__((noipa))
+foo (int n)
+{
+ for (int i = 0; i < n; ++i)
+ perm[i] = i;
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr109780-2.c b/gcc/testsuite/gcc.target/i386/pr109780-2.c
new file mode 100644
index 00000000000..152da06c6ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109780-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=skylake" } */
+
+#define N 9
+
+void
+f (double x, double y, double *res)
+{
+ y = -y;
+ for (int i = 0; i < N; ++i)
+ {
+ double tmp = y;
+ y = x;
+ x = tmp;
+ res[i] = i;
+ }
+ res[N] = y * y;
+ res[N + 1] = x;
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr109780-3.c b/gcc/testsuite/gcc.target/i386/pr109780-3.c
new file mode 100644
index 00000000000..f26fdd79af1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109780-3.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx2 -mtune=znver1 -fno-stack-protector -fno-stack-clash-protection" } */
+
+#include <cpuid.h>
+
+char a;
+static int b, c, f;
+char *d = &a;
+static char *e = &a;
+
+__attribute__ ((weak))
+void
+g (int h, int i)
+{
+ int j = 1;
+ for (; c != -3; c = c - 1)
+ {
+ int k[10];
+ f = 0;
+ for (; f < 10; f++)
+ k[f] = 0;
+ *d = k[1];
+ if (i < *d)
+ {
+ *e = h;
+ for (; j < 9; j++)
+ {
+ b = 1;
+ for (; b < 7; b++)
+ ;
+ }
+ }
+ }
+}
+
+__attribute__ ((weak))
+void
+run (void)
+{
+ g (1, 1);
+}
+
+int
+main (void)
+{
+ __builtin_cpu_init ();
+
+ if (__builtin_cpu_supports ("avx2"))
+ run ();
+
+ return 0;
+}
--
2.48.1
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH v2] x86: Properly find the maximum stack slot alignment
2025-02-13 8:31 [PATCH v2] " H.J. Lu
@ 2025-02-13 9:17 ` Uros Bizjak
2025-02-14 3:56 ` [PATCH v3] " H.J. Lu
0 siblings, 1 reply; 10+ messages in thread
From: Uros Bizjak @ 2025-02-13 9:17 UTC (permalink / raw)
To: H.J. Lu; +Cc: GCC Patches, Hongtao Liu, Richard Biener, Jakub Jelinek
On Thu, Feb 13, 2025 at 9:31 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Don't assume that stack slots can only be accessed by stack or frame
> registers. We first find all registers defined by stack or frame
> registers. Then check memory accesses by such registers, including
> stack and frame registers.
>
> gcc/
>
> PR target/109780
> PR target/109093
> * config/i386/i386.cc (ix86_update_stack_alignment): New.
> (ix86_find_all_reg_use_1): Likewise.
> (ix86_find_all_reg_use): Likewise.
> (ix86_find_max_used_stack_alignment): Also check memory accesses
> from registers defined by stack or frame registers.
>
> gcc/testsuite/
>
> PR target/109780
> PR target/109093
> * g++.target/i386/pr109780-1.C: New test.
> * gcc.target/i386/pr109093-1.c: Likewise.
> * gcc.target/i386/pr109780-1.c: Likewise.
> * gcc.target/i386/pr109780-2.c: Likewise.
> * gcc.target/i386/pr109780-3.c: Likewise.
Some non-algorithmical changes below, otherwise LGTM. Please also get
someone to review dataflow infrastructure usage, I am not well versed
with it.
+/* Helper function for ix86_find_all_reg_use. */
+
+static void
+ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
+ auto_bitmap &worklist)
+{
+ rtx src = SET_SRC (set);
+ if (MEM_P (src))
Also reject assignment from CONST_SCALAR_INT?
+ return;
+
+ rtx dest = SET_DEST (set);
+ if (!REG_P (dest))
+ return;
Can we switch these two so the test for REG_P (dest) will be first? We
are not interested in anything that doesn't assign to a register.
+/* Find all registers defined with REG. */
+
+static void
+ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
+ unsigned int reg, auto_bitmap &worklist)
+{
+ for (df_ref ref = DF_REG_USE_CHAIN (reg);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
Here we pass only NONJUMP_INSN_P (X) || JUMP_P (X) || CALL_P (X)
+ if (CALL_P (insn) || JUMP_P (insn))
+ continue;
And here remains only NONJUMP_INSN_P (X), so both above conditions
could be substituted with:
if (!NONJUMP_INSN_P (X))
continue;
+
+ rtx set = single_set (insn);
+ if (set)
+ ix86_find_all_reg_use_1 (set, stack_slot_access, worklist);
+
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) != PARALLEL)
+ continue;
+
+ for (int i = 0; i < XVECLEN (pat, 0); i++)
+ {
+ rtx exp = XVECEXP (pat, 0, i);
+ switch (GET_CODE (exp))
+ {
+ case ASM_OPERANDS:
+ case CLOBBER:
+ case PREFETCH:
+ case USE:
+ break;
+ case UNSPEC:
+ case UNSPEC_VOLATILE:
+ for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--)
+ {
+ rtx x = XVECEXP (exp, 0, j);
+ if (GET_CODE (x) == SET)
+ ix86_find_all_reg_use_1 (x, stack_slot_access,
+ worklist);
+ }
+ break;
+ case SET:
+ ix86_find_all_reg_use_1 (exp, stack_slot_access,
+ worklist);
+ break;
+ default:
+ debug_rtx (exp);
Stray debug remaining?
+ HARD_REG_SET stack_slot_access;
+ CLEAR_HARD_REG_SET (stack_slot_access);
+
+ /* Stack slot can be accessed by stack pointer, frame pointer or
+ registers defined by stack pointer or frame pointer. */
+ auto_bitmap worklist;
Please put a line of vertical space here ...
+ add_to_hard_reg_set (&stack_slot_access, Pmode,
+ STACK_POINTER_REGNUM);
+ bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
... here ...
+ if (frame_pointer_needed)
+ {
+ add_to_hard_reg_set (&stack_slot_access, Pmode,
+ HARD_FRAME_POINTER_REGNUM);
+ bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
+ }
... here ...
+ unsigned int reg;
... here ...
+ do
+ {
+ reg = bitmap_clear_first_set_bit (worklist);
+ ix86_find_all_reg_use (stack_slot_access, reg, worklist);
+ }
+ while (!bitmap_empty_p (worklist));
+
+ hard_reg_set_iterator hrsi;
... here ...
+ EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
+ for (df_ref ref = DF_REG_USE_CHAIN (reg);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
... and here.
+ if (!NONDEBUG_INSN_P (insn))
!NONJUMP_INSN_P ?
+ continue;
Also some vertical space here.
+ note_stores (insn, ix86_update_stack_alignment,
+ &stack_alignment);
+ }
}
diff --git a/gcc/testsuite/gcc.target/i386/pr109093-1.c
b/gcc/testsuite/gcc.target/i386/pr109093-1.c
new file mode 100644
index 00000000000..0459d1947f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109093-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx2 -mtune=znver1
-ftrivial-auto-var-init=zero -fno-stack-protector" } */
+
Please use
/* { dg-do run { target avx2_runtime } } */
+{
+ __builtin_cpu_init ();
+
+ if (__builtin_cpu_supports ("avx2"))
+ run ();
+
And remove the above code from the test.
diff --git a/gcc/testsuite/gcc.target/i386/pr109780-3.c
b/gcc/testsuite/gcc.target/i386/pr109780-3.c
new file mode 100644
index 00000000000..f26fdd79af1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109780-3.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx2 -mtune=znver1 -fno-stack-protector
-fno-stack-clash-protection" } */
+
Also here.
Thanks,
Uros.
^ permalink raw reply [flat|nested] 10+ messages in thread* [PATCH v3] x86: Properly find the maximum stack slot alignment
2025-02-13 9:17 ` Uros Bizjak
@ 2025-02-14 3:56 ` H.J. Lu
2025-02-14 13:11 ` Uros Bizjak
0 siblings, 1 reply; 10+ messages in thread
From: H.J. Lu @ 2025-02-14 3:56 UTC (permalink / raw)
To: Uros Bizjak; +Cc: GCC Patches, Hongtao Liu, Richard Biener, Jakub Jelinek
[-- Attachment #1.1: Type: text/plain, Size: 6428 bytes --]
On Thu, Feb 13, 2025 at 5:17 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Thu, Feb 13, 2025 at 9:31 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > Don't assume that stack slots can only be accessed by stack or frame
> > registers. We first find all registers defined by stack or frame
> > registers. Then check memory accesses by such registers, including
> > stack and frame registers.
> >
> > gcc/
> >
> > PR target/109780
> > PR target/109093
> > * config/i386/i386.cc (ix86_update_stack_alignment): New.
> > (ix86_find_all_reg_use_1): Likewise.
> > (ix86_find_all_reg_use): Likewise.
> > (ix86_find_max_used_stack_alignment): Also check memory accesses
> > from registers defined by stack or frame registers.
> >
> > gcc/testsuite/
> >
> > PR target/109780
> > PR target/109093
> > * g++.target/i386/pr109780-1.C: New test.
> > * gcc.target/i386/pr109093-1.c: Likewise.
> > * gcc.target/i386/pr109780-1.c: Likewise.
> > * gcc.target/i386/pr109780-2.c: Likewise.
> > * gcc.target/i386/pr109780-3.c: Likewise.
>
> Some non-algorithmical changes below, otherwise LGTM. Please also get
> someone to review dataflow infrastructure usage, I am not well versed
> with it.
>
> +/* Helper function for ix86_find_all_reg_use. */
> +
> +static void
> +ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
> + auto_bitmap &worklist)
> +{
> + rtx src = SET_SRC (set);
> + if (MEM_P (src))
>
> Also reject assignment from CONST_SCALAR_INT?
Done.
> + return;
> +
> + rtx dest = SET_DEST (set);
> + if (!REG_P (dest))
> + return;
>
> Can we switch these two so the test for REG_P (dest) will be first? We
> are not interested in anything that doesn't assign to a register.
Done.
> +/* Find all registers defined with REG. */
> +
> +static void
> +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
> + unsigned int reg, auto_bitmap &worklist)
> +{
> + for (df_ref ref = DF_REG_USE_CHAIN (reg);
> + ref != NULL;
> + ref = DF_REF_NEXT_REG (ref))
> + {
> + if (DF_REF_IS_ARTIFICIAL (ref))
> + continue;
> +
> + rtx_insn *insn = DF_REF_INSN (ref);
> + if (!NONDEBUG_INSN_P (insn))
> + continue;
>
> Here we pass only NONJUMP_INSN_P (X) || JUMP_P (X) || CALL_P (X)
>
> + if (CALL_P (insn) || JUMP_P (insn))
> + continue;
>
> And here remains only NONJUMP_INSN_P (X), so both above conditions
> could be substituted with:
>
> if (!NONJUMP_INSN_P (X))
> continue;
Done.
> +
> + rtx set = single_set (insn);
> + if (set)
> + ix86_find_all_reg_use_1 (set, stack_slot_access, worklist);
> +
> + rtx pat = PATTERN (insn);
> + if (GET_CODE (pat) != PARALLEL)
> + continue;
> +
> + for (int i = 0; i < XVECLEN (pat, 0); i++)
> + {
> + rtx exp = XVECEXP (pat, 0, i);
> + switch (GET_CODE (exp))
> + {
> + case ASM_OPERANDS:
> + case CLOBBER:
> + case PREFETCH:
> + case USE:
> + break;
> + case UNSPEC:
> + case UNSPEC_VOLATILE:
> + for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--)
> + {
> + rtx x = XVECEXP (exp, 0, j);
> + if (GET_CODE (x) == SET)
> + ix86_find_all_reg_use_1 (x, stack_slot_access,
> + worklist);
> + }
> + break;
> + case SET:
> + ix86_find_all_reg_use_1 (exp, stack_slot_access,
> + worklist);
> + break;
> + default:
> + debug_rtx (exp);
>
> Stray debug remaining?
Removed.
> + HARD_REG_SET stack_slot_access;
> + CLEAR_HARD_REG_SET (stack_slot_access);
> +
> + /* Stack slot can be accessed by stack pointer, frame pointer or
> + registers defined by stack pointer or frame pointer. */
> + auto_bitmap worklist;
>
> Please put a line of vertical space here ...
Done.
> + add_to_hard_reg_set (&stack_slot_access, Pmode,
> + STACK_POINTER_REGNUM);
> + bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
>
> ... here ...
Done.
> + if (frame_pointer_needed)
> + {
> + add_to_hard_reg_set (&stack_slot_access, Pmode,
> + HARD_FRAME_POINTER_REGNUM);
> + bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
> + }
>
> ... here ...
>
Done.
> + unsigned int reg;
>
> ... here ...
Done.
> + do
> + {
> + reg = bitmap_clear_first_set_bit (worklist);
> + ix86_find_all_reg_use (stack_slot_access, reg, worklist);
> + }
> + while (!bitmap_empty_p (worklist));
> +
> + hard_reg_set_iterator hrsi;
>
> ... here ...
Done.
> + EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
> + for (df_ref ref = DF_REG_USE_CHAIN (reg);
> + ref != NULL;
> + ref = DF_REF_NEXT_REG (ref))
> + {
> + if (DF_REF_IS_ARTIFICIAL (ref))
> + continue;
> +
> + rtx_insn *insn = DF_REF_INSN (ref);
>
> ... and here.
>
Done.
> + if (!NONDEBUG_INSN_P (insn))
>
> !NONJUMP_INSN_P ?
Changed.
> + continue;
>
> Also some vertical space here.
Done.
> + note_stores (insn, ix86_update_stack_alignment,
> + &stack_alignment);
> + }
> }
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr109093-1.c
> b/gcc/testsuite/gcc.target/i386/pr109093-1.c
> new file mode 100644
> index 00000000000..0459d1947f9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr109093-1.c
> @@ -0,0 +1,39 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavx2 -mtune=znver1
> -ftrivial-auto-var-init=zero -fno-stack-protector" } */
> +
>
> Please use
>
> /* { dg-do run { target avx2_runtime } } */
>
Done.
> +{
> + __builtin_cpu_init ();
> +
> + if (__builtin_cpu_supports ("avx2"))
> + run ();
> +
>
> And remove the above code from the test.
>
Done.
> diff --git a/gcc/testsuite/gcc.target/i386/pr109780-3.c
> b/gcc/testsuite/gcc.target/i386/pr109780-3.c
> new file mode 100644
> index 00000000000..f26fdd79af1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr109780-3.c
> @@ -0,0 +1,52 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavx2 -mtune=znver1 -fno-stack-protector
> -fno-stack-clash-protection" } */
> +
>
> Also here.
>
Done.
Here is the v3 patch.
Thanks.
--
H.J.
[-- Attachment #2: v3-0001-x86-Properly-find-the-maximum-stack-slot-alignmen.patch --]
[-- Type: text/x-patch, Size: 12500 bytes --]
From 4ad849b60892ed5662ecd2667a6aeeaa69a9b6c5 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Tue, 14 Mar 2023 11:41:51 -0700
Subject: [PATCH v3] x86: Properly find the maximum stack slot alignment
Don't assume that stack slots can only be accessed by stack or frame
registers. We first find all registers defined by stack or frame
registers. Then check memory accesses by such registers, including
stack and frame registers.
gcc/
PR target/109780
PR target/109093
* config/i386/i386.cc (ix86_update_stack_alignment): New.
(ix86_find_all_reg_use_1): Likewise.
(ix86_find_all_reg_use): Likewise.
(ix86_find_max_used_stack_alignment): Also check memory accesses
from registers defined by stack or frame registers.
gcc/testsuite/
PR target/109780
PR target/109093
* g++.target/i386/pr109780-1.C: New test.
* gcc.target/i386/pr109093-1.c: Likewise.
* gcc.target/i386/pr109780-1.c: Likewise.
* gcc.target/i386/pr109780-2.c: Likewise.
* gcc.target/i386/pr109780-3.c: Likewise.
Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
---
gcc/config/i386/i386.cc | 177 ++++++++++++++++++---
gcc/testsuite/g++.target/i386/pr109780-1.C | 72 +++++++++
gcc/testsuite/gcc.target/i386/pr109093-1.c | 33 ++++
gcc/testsuite/gcc.target/i386/pr109780-1.c | 14 ++
gcc/testsuite/gcc.target/i386/pr109780-2.c | 21 +++
gcc/testsuite/gcc.target/i386/pr109780-3.c | 46 ++++++
6 files changed, 342 insertions(+), 21 deletions(-)
create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C
create mode 100644 gcc/testsuite/gcc.target/i386/pr109093-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-3.c
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 3128973ba79..fafd4a511a3 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -8466,6 +8466,107 @@ output_probe_stack_range (rtx reg, rtx end)
return "";
}
+/* Update the maximum stack slot alignment from memory alignment in
+ PAT. */
+
+static void
+ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
+{
+ /* This insn may reference stack slot. Update the maximum stack slot
+ alignment. */
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, pat, ALL)
+ if (MEM_P (*iter))
+ {
+ unsigned int alignment = MEM_ALIGN (*iter);
+ unsigned int *stack_alignment
+ = (unsigned int *) data;
+ if (alignment > *stack_alignment)
+ *stack_alignment = alignment;
+ break;
+ }
+}
+
+/* Helper function for ix86_find_all_reg_use. */
+
+static void
+ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
+ auto_bitmap &worklist)
+{
+ rtx dest = SET_DEST (set);
+ if (!REG_P (dest))
+ return;
+
+ rtx src = SET_SRC (set);
+ if (MEM_P (src) || CONST_SCALAR_INT_P (src))
+ return;
+
+ if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest)))
+ return;
+
+ /* Add this register to stack_slot_access. */
+ add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest));
+ bitmap_set_bit (worklist, REGNO (dest));
+}
+
+/* Find all registers defined with REG. */
+
+static void
+ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
+ unsigned int reg, auto_bitmap &worklist)
+{
+ for (df_ref ref = DF_REG_USE_CHAIN (reg);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
+
+ if (!NONJUMP_INSN_P (insn))
+ continue;
+
+ rtx set = single_set (insn);
+ if (set)
+ ix86_find_all_reg_use_1 (set, stack_slot_access, worklist);
+
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) != PARALLEL)
+ continue;
+
+ for (int i = 0; i < XVECLEN (pat, 0); i++)
+ {
+ rtx exp = XVECEXP (pat, 0, i);
+ switch (GET_CODE (exp))
+ {
+ case ASM_OPERANDS:
+ case CLOBBER:
+ case PREFETCH:
+ case USE:
+ break;
+ case UNSPEC:
+ case UNSPEC_VOLATILE:
+ for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--)
+ {
+ rtx x = XVECEXP (exp, 0, j);
+ if (GET_CODE (x) == SET)
+ ix86_find_all_reg_use_1 (x, stack_slot_access,
+ worklist);
+ }
+ break;
+ case SET:
+ ix86_find_all_reg_use_1 (exp, stack_slot_access,
+ worklist);
+ break;
+ default:
+ gcc_unreachable ();
+ break;
+ }
+ }
+ }
+}
+
/* Set stack_frame_required to false if stack frame isn't required.
Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
slot used if stack frame is required and CHECK_STACK_SLOT is true. */
@@ -8484,10 +8585,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
add_to_hard_reg_set (&set_up_by_prologue, Pmode,
HARD_FRAME_POINTER_REGNUM);
- /* The preferred stack alignment is the minimum stack alignment. */
- if (stack_alignment > crtl->preferred_stack_boundary)
- stack_alignment = crtl->preferred_stack_boundary;
-
bool require_stack_frame = false;
FOR_EACH_BB_FN (bb, cfun)
@@ -8499,27 +8596,65 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
set_up_by_prologue))
{
require_stack_frame = true;
-
- if (check_stack_slot)
- {
- /* Find the maximum stack alignment. */
- subrtx_iterator::array_type array;
- FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
- if (MEM_P (*iter)
- && (reg_mentioned_p (stack_pointer_rtx,
- *iter)
- || reg_mentioned_p (frame_pointer_rtx,
- *iter)))
- {
- unsigned int alignment = MEM_ALIGN (*iter);
- if (alignment > stack_alignment)
- stack_alignment = alignment;
- }
- }
+ break;
}
}
cfun->machine->stack_frame_required = require_stack_frame;
+
+ /* Stop if we don't need to check stack slot. */
+ if (!check_stack_slot)
+ return;
+
+ /* The preferred stack alignment is the minimum stack alignment. */
+ if (stack_alignment > crtl->preferred_stack_boundary)
+ stack_alignment = crtl->preferred_stack_boundary;
+
+ HARD_REG_SET stack_slot_access;
+ CLEAR_HARD_REG_SET (stack_slot_access);
+
+ /* Stack slot can be accessed by stack pointer, frame pointer or
+ registers defined by stack pointer or frame pointer. */
+ auto_bitmap worklist;
+
+ add_to_hard_reg_set (&stack_slot_access, Pmode,
+ STACK_POINTER_REGNUM);
+ bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
+
+ if (frame_pointer_needed)
+ {
+ add_to_hard_reg_set (&stack_slot_access, Pmode,
+ HARD_FRAME_POINTER_REGNUM);
+ bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
+ }
+
+ unsigned int reg;
+
+ do
+ {
+ reg = bitmap_clear_first_set_bit (worklist);
+ ix86_find_all_reg_use (stack_slot_access, reg, worklist);
+ }
+ while (!bitmap_empty_p (worklist));
+
+ hard_reg_set_iterator hrsi;
+
+ EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
+ for (df_ref ref = DF_REG_USE_CHAIN (reg);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
+
+ if (!NONJUMP_INSN_P (insn))
+ continue;
+
+ note_stores (insn, ix86_update_stack_alignment,
+ &stack_alignment);
+ }
}
/* Finalize stack_realign_needed and frame_pointer_needed flags, which
diff --git a/gcc/testsuite/g++.target/i386/pr109780-1.C b/gcc/testsuite/g++.target/i386/pr109780-1.C
new file mode 100644
index 00000000000..7e3eabdec94
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr109780-1.C
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target c++17 } */
+/* { dg-options "-O2 -mavx2 -mtune=haswell" } */
+
+template <typename _Tp> struct remove_reference {
+ using type = __remove_reference(_Tp);
+};
+template <typename T> struct MaybeStorageBase {
+ T val;
+ struct Union {
+ ~Union();
+ } mStorage;
+};
+template <typename T> struct MaybeStorage : MaybeStorageBase<T> {
+ char mIsSome;
+};
+template <typename T, typename U = typename remove_reference<T>::type>
+constexpr MaybeStorage<U> Some(T &&);
+template <typename T, typename U> constexpr MaybeStorage<U> Some(T &&aValue) {
+ return {aValue};
+}
+template <class> struct Span {
+ int operator[](long idx) {
+ int *__trans_tmp_4;
+ if (__builtin_expect(idx, 0))
+ *(int *)__null = false;
+ __trans_tmp_4 = storage_.data();
+ return __trans_tmp_4[idx];
+ }
+ struct {
+ int *data() { return data_; }
+ int *data_;
+ } storage_;
+};
+struct Variant {
+ template <typename RefT> Variant(RefT) {}
+};
+long from_i, from___trans_tmp_9;
+namespace js::intl {
+struct DecimalNumber {
+ Variant string_;
+ unsigned long significandStart_;
+ unsigned long significandEnd_;
+ bool zero_ = false;
+ bool negative_;
+ template <typename CharT> DecimalNumber(CharT string) : string_(string) {}
+ template <typename CharT>
+ static MaybeStorage<DecimalNumber> from(Span<const CharT>);
+ void from();
+};
+} // namespace js::intl
+void js::intl::DecimalNumber::from() {
+ Span<const char16_t> __trans_tmp_3;
+ from(__trans_tmp_3);
+}
+template <typename CharT>
+MaybeStorage<js::intl::DecimalNumber>
+js::intl::DecimalNumber::from(Span<const CharT> chars) {
+ DecimalNumber number(chars);
+ if (auto ch = chars[from_i]) {
+ from_i++;
+ number.negative_ = ch == '-';
+ }
+ while (from___trans_tmp_9 && chars[from_i])
+ ;
+ if (chars[from_i])
+ while (chars[from_i - 1])
+ number.zero_ = true;
+ return Some(number);
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr109093-1.c b/gcc/testsuite/gcc.target/i386/pr109093-1.c
new file mode 100644
index 00000000000..58a7b006c8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109093-1.c
@@ -0,0 +1,33 @@
+/* { dg-do run { target avx2_runtime } } */
+/* { dg-options "-O2 -mavx2 -mtune=znver1 -ftrivial-auto-var-init=zero -fno-stack-protector" } */
+
+int a, b, c, d;
+char e, f = 1;
+short g, h, i;
+
+__attribute__ ((weak))
+void
+run (void)
+{
+ short j;
+
+ for (; g >= 0; --g)
+ {
+ int *k[10];
+
+ for (d = 0; d < 10; d++)
+ k[d] = &b;
+
+ c = *k[1];
+
+ for (; a;)
+ j = i - c / f || (e ^= h);
+ }
+}
+
+int
+main (void)
+{
+ run ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr109780-1.c b/gcc/testsuite/gcc.target/i386/pr109780-1.c
new file mode 100644
index 00000000000..6b06947f2a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109780-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=skylake" } */
+
+char perm[64];
+
+void
+__attribute__((noipa))
+foo (int n)
+{
+ for (int i = 0; i < n; ++i)
+ perm[i] = i;
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr109780-2.c b/gcc/testsuite/gcc.target/i386/pr109780-2.c
new file mode 100644
index 00000000000..152da06c6ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109780-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=skylake" } */
+
+#define N 9
+
+void
+f (double x, double y, double *res)
+{
+ y = -y;
+ for (int i = 0; i < N; ++i)
+ {
+ double tmp = y;
+ y = x;
+ x = tmp;
+ res[i] = i;
+ }
+ res[N] = y * y;
+ res[N + 1] = x;
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr109780-3.c b/gcc/testsuite/gcc.target/i386/pr109780-3.c
new file mode 100644
index 00000000000..a3a770a80e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109780-3.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target avx2_runtime } } */
+/* { dg-options "-O2 -mavx2 -mtune=znver1 -fno-stack-protector -fno-stack-clash-protection" } */
+
+char a;
+static int b, c, f;
+char *d = &a;
+static char *e = &a;
+
+__attribute__ ((weak))
+void
+g (int h, int i)
+{
+ int j = 1;
+ for (; c != -3; c = c - 1)
+ {
+ int k[10];
+ f = 0;
+ for (; f < 10; f++)
+ k[f] = 0;
+ *d = k[1];
+ if (i < *d)
+ {
+ *e = h;
+ for (; j < 9; j++)
+ {
+ b = 1;
+ for (; b < 7; b++)
+ ;
+ }
+ }
+ }
+}
+
+__attribute__ ((weak))
+void
+run (void)
+{
+ g (1, 1);
+}
+
+int
+main (void)
+{
+ run ();
+ return 0;
+}
--
2.48.1
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH v3] x86: Properly find the maximum stack slot alignment
2025-02-14 3:56 ` [PATCH v3] " H.J. Lu
@ 2025-02-14 13:11 ` Uros Bizjak
2025-02-14 14:08 ` Richard Biener
2025-02-17 10:45 ` Uros Bizjak
0 siblings, 2 replies; 10+ messages in thread
From: Uros Bizjak @ 2025-02-14 13:11 UTC (permalink / raw)
To: H.J. Lu; +Cc: GCC Patches, Hongtao Liu, Richard Biener, Jakub Jelinek
On Fri, Feb 14, 2025 at 4:56 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Thu, Feb 13, 2025 at 5:17 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Thu, Feb 13, 2025 at 9:31 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > Don't assume that stack slots can only be accessed by stack or frame
> > > registers. We first find all registers defined by stack or frame
> > > registers. Then check memory accesses by such registers, including
> > > stack and frame registers.
> > >
> > > gcc/
> > >
> > > PR target/109780
> > > PR target/109093
> > > * config/i386/i386.cc (ix86_update_stack_alignment): New.
> > > (ix86_find_all_reg_use_1): Likewise.
> > > (ix86_find_all_reg_use): Likewise.
> > > (ix86_find_max_used_stack_alignment): Also check memory accesses
> > > from registers defined by stack or frame registers.
> > >
> > > gcc/testsuite/
> > >
> > > PR target/109780
> > > PR target/109093
> > > * g++.target/i386/pr109780-1.C: New test.
> > > * gcc.target/i386/pr109093-1.c: Likewise.
> > > * gcc.target/i386/pr109780-1.c: Likewise.
> > > * gcc.target/i386/pr109780-2.c: Likewise.
> > > * gcc.target/i386/pr109780-3.c: Likewise.
> >
> > Some non-algorithmical changes below, otherwise LGTM. Please also get
> > someone to review dataflow infrastructure usage, I am not well versed
> > with it.
> >
> > +/* Helper function for ix86_find_all_reg_use. */
> > +
> > +static void
> > +ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
> > + auto_bitmap &worklist)
> > +{
> > + rtx src = SET_SRC (set);
> > + if (MEM_P (src))
> >
> > Also reject assignment from CONST_SCALAR_INT?
>
> Done.
>
> > + return;
> > +
> > + rtx dest = SET_DEST (set);
> > + if (!REG_P (dest))
> > + return;
> >
> > Can we switch these two so the test for REG_P (dest) will be first? We
> > are not interested in anything that doesn't assign to a register.
>
> Done.
>
> > +/* Find all registers defined with REG. */
> > +
> > +static void
> > +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
> > + unsigned int reg, auto_bitmap &worklist)
> > +{
> > + for (df_ref ref = DF_REG_USE_CHAIN (reg);
> > + ref != NULL;
> > + ref = DF_REF_NEXT_REG (ref))
> > + {
> > + if (DF_REF_IS_ARTIFICIAL (ref))
> > + continue;
> > +
> > + rtx_insn *insn = DF_REF_INSN (ref);
> > + if (!NONDEBUG_INSN_P (insn))
> > + continue;
> >
> > Here we pass only NONJUMP_INSN_P (X) || JUMP_P (X) || CALL_P (X)
> >
> > + if (CALL_P (insn) || JUMP_P (insn))
> > + continue;
> >
> > And here remains only NONJUMP_INSN_P (X), so both above conditions
> > could be substituted with:
> >
> > if (!NONJUMP_INSN_P (X))
> > continue;
>
> Done.
>
> > +
> > + rtx set = single_set (insn);
> > + if (set)
> > + ix86_find_all_reg_use_1 (set, stack_slot_access, worklist);
> > +
> > + rtx pat = PATTERN (insn);
> > + if (GET_CODE (pat) != PARALLEL)
> > + continue;
> > +
> > + for (int i = 0; i < XVECLEN (pat, 0); i++)
> > + {
> > + rtx exp = XVECEXP (pat, 0, i);
> > + switch (GET_CODE (exp))
> > + {
> > + case ASM_OPERANDS:
> > + case CLOBBER:
> > + case PREFETCH:
> > + case USE:
> > + break;
> > + case UNSPEC:
> > + case UNSPEC_VOLATILE:
> > + for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--)
> > + {
> > + rtx x = XVECEXP (exp, 0, j);
> > + if (GET_CODE (x) == SET)
> > + ix86_find_all_reg_use_1 (x, stack_slot_access,
> > + worklist);
> > + }
> > + break;
> > + case SET:
> > + ix86_find_all_reg_use_1 (exp, stack_slot_access,
> > + worklist);
> > + break;
> > + default:
> > + debug_rtx (exp);
> >
> > Stray debug remaining?
>
> Removed.
>
> > + HARD_REG_SET stack_slot_access;
> > + CLEAR_HARD_REG_SET (stack_slot_access);
> > +
> > + /* Stack slot can be accessed by stack pointer, frame pointer or
> > + registers defined by stack pointer or frame pointer. */
> > + auto_bitmap worklist;
> >
> > Please put a line of vertical space here ...
>
> Done.
>
> > + add_to_hard_reg_set (&stack_slot_access, Pmode,
> > + STACK_POINTER_REGNUM);
> > + bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
> >
> > ... here ...
>
> Done.
>
> > + if (frame_pointer_needed)
> > + {
> > + add_to_hard_reg_set (&stack_slot_access, Pmode,
> > + HARD_FRAME_POINTER_REGNUM);
> > + bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
> > + }
> >
> > ... here ...
> >
>
> Done.
>
> > + unsigned int reg;
> >
> > ... here ...
>
> Done.
>
> > + do
> > + {
> > + reg = bitmap_clear_first_set_bit (worklist);
> > + ix86_find_all_reg_use (stack_slot_access, reg, worklist);
> > + }
> > + while (!bitmap_empty_p (worklist));
> > +
> > + hard_reg_set_iterator hrsi;
> >
> > ... here ...
>
> Done.
>
> > + EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
> > + for (df_ref ref = DF_REG_USE_CHAIN (reg);
> > + ref != NULL;
> > + ref = DF_REF_NEXT_REG (ref))
> > + {
> > + if (DF_REF_IS_ARTIFICIAL (ref))
> > + continue;
> > +
> > + rtx_insn *insn = DF_REF_INSN (ref);
> >
> > ... and here.
> >
>
> Done.
>
> > + if (!NONDEBUG_INSN_P (insn))
> >
> > !NONJUMP_INSN_P ?
>
> Changed.
>
> > + continue;
> >
> > Also some vertical space here.
>
> Done.
>
> > + note_stores (insn, ix86_update_stack_alignment,
> > + &stack_alignment);
> > + }
> > }
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > b/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > new file mode 100644
> > index 00000000000..0459d1947f9
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > @@ -0,0 +1,39 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-O2 -mavx2 -mtune=znver1
> > -ftrivial-auto-var-init=zero -fno-stack-protector" } */
> > +
> >
> > Please use
> >
> > /* { dg-do run { target avx2_runtime } } */
> >
> Done.
>
> > +{
> > + __builtin_cpu_init ();
> > +
> > + if (__builtin_cpu_supports ("avx2"))
> > + run ();
> > +
> >
> > And remove the above code from the test.
> >
> Done.
>
> > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > b/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > new file mode 100644
> > index 00000000000..f26fdd79af1
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > @@ -0,0 +1,52 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-O2 -mavx2 -mtune=znver1 -fno-stack-protector
> > -fno-stack-clash-protection" } */
> > +
> >
> > Also here.
> >
>
> Done.
>
> Here is the v3 patch.
LGTM, modulo DF part that would need another pair of eyes.
Thanks,
Uros.
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH v3] x86: Properly find the maximum stack slot alignment
2025-02-14 13:11 ` Uros Bizjak
@ 2025-02-14 14:08 ` Richard Biener
2025-02-15 0:27 ` H.J. Lu
2025-02-17 10:45 ` Uros Bizjak
1 sibling, 1 reply; 10+ messages in thread
From: Richard Biener @ 2025-02-14 14:08 UTC (permalink / raw)
To: Uros Bizjak; +Cc: H.J. Lu, GCC Patches, Hongtao Liu, Jakub Jelinek
[-- Attachment #1: Type: text/plain, Size: 7490 bytes --]
On Fri, 14 Feb 2025, Uros Bizjak wrote:
> On Fri, Feb 14, 2025 at 4:56 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Thu, Feb 13, 2025 at 5:17 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > >
> > > On Thu, Feb 13, 2025 at 9:31 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > Don't assume that stack slots can only be accessed by stack or frame
> > > > registers. We first find all registers defined by stack or frame
> > > > registers. Then check memory accesses by such registers, including
> > > > stack and frame registers.
> > > >
> > > > gcc/
> > > >
> > > > PR target/109780
> > > > PR target/109093
> > > > * config/i386/i386.cc (ix86_update_stack_alignment): New.
> > > > (ix86_find_all_reg_use_1): Likewise.
> > > > (ix86_find_all_reg_use): Likewise.
> > > > (ix86_find_max_used_stack_alignment): Also check memory accesses
> > > > from registers defined by stack or frame registers.
> > > >
> > > > gcc/testsuite/
> > > >
> > > > PR target/109780
> > > > PR target/109093
> > > > * g++.target/i386/pr109780-1.C: New test.
> > > > * gcc.target/i386/pr109093-1.c: Likewise.
> > > > * gcc.target/i386/pr109780-1.c: Likewise.
> > > > * gcc.target/i386/pr109780-2.c: Likewise.
> > > > * gcc.target/i386/pr109780-3.c: Likewise.
> > >
> > > Some non-algorithmical changes below, otherwise LGTM. Please also get
> > > someone to review dataflow infrastructure usage, I am not well versed
> > > with it.
> > >
> > > +/* Helper function for ix86_find_all_reg_use. */
> > > +
> > > +static void
> > > +ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
> > > + auto_bitmap &worklist)
> > > +{
> > > + rtx src = SET_SRC (set);
> > > + if (MEM_P (src))
> > >
> > > Also reject assignment from CONST_SCALAR_INT?
> >
> > Done.
> >
> > > + return;
> > > +
> > > + rtx dest = SET_DEST (set);
> > > + if (!REG_P (dest))
> > > + return;
> > >
> > > Can we switch these two so the test for REG_P (dest) will be first? We
> > > are not interested in anything that doesn't assign to a register.
> >
> > Done.
> >
> > > +/* Find all registers defined with REG. */
> > > +
> > > +static void
> > > +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
> > > + unsigned int reg, auto_bitmap &worklist)
> > > +{
> > > + for (df_ref ref = DF_REG_USE_CHAIN (reg);
> > > + ref != NULL;
> > > + ref = DF_REF_NEXT_REG (ref))
> > > + {
> > > + if (DF_REF_IS_ARTIFICIAL (ref))
> > > + continue;
> > > +
> > > + rtx_insn *insn = DF_REF_INSN (ref);
> > > + if (!NONDEBUG_INSN_P (insn))
> > > + continue;
> > >
> > > Here we pass only NONJUMP_INSN_P (X) || JUMP_P (X) || CALL_P (X)
> > >
> > > + if (CALL_P (insn) || JUMP_P (insn))
> > > + continue;
> > >
> > > And here remains only NONJUMP_INSN_P (X), so both above conditions
> > > could be substituted with:
> > >
> > > if (!NONJUMP_INSN_P (X))
> > > continue;
> >
> > Done.
> >
> > > +
> > > + rtx set = single_set (insn);
> > > + if (set)
> > > + ix86_find_all_reg_use_1 (set, stack_slot_access, worklist);
> > > +
> > > + rtx pat = PATTERN (insn);
> > > + if (GET_CODE (pat) != PARALLEL)
> > > + continue;
> > > +
> > > + for (int i = 0; i < XVECLEN (pat, 0); i++)
> > > + {
> > > + rtx exp = XVECEXP (pat, 0, i);
> > > + switch (GET_CODE (exp))
> > > + {
> > > + case ASM_OPERANDS:
> > > + case CLOBBER:
> > > + case PREFETCH:
> > > + case USE:
> > > + break;
> > > + case UNSPEC:
> > > + case UNSPEC_VOLATILE:
> > > + for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--)
> > > + {
> > > + rtx x = XVECEXP (exp, 0, j);
> > > + if (GET_CODE (x) == SET)
> > > + ix86_find_all_reg_use_1 (x, stack_slot_access,
> > > + worklist);
> > > + }
> > > + break;
> > > + case SET:
> > > + ix86_find_all_reg_use_1 (exp, stack_slot_access,
> > > + worklist);
> > > + break;
> > > + default:
> > > + debug_rtx (exp);
> > >
> > > Stray debug remaining?
> >
> > Removed.
> >
> > > + HARD_REG_SET stack_slot_access;
> > > + CLEAR_HARD_REG_SET (stack_slot_access);
> > > +
> > > + /* Stack slot can be accessed by stack pointer, frame pointer or
> > > + registers defined by stack pointer or frame pointer. */
> > > + auto_bitmap worklist;
> > >
> > > Please put a line of vertical space here ...
> >
> > Done.
> >
> > > + add_to_hard_reg_set (&stack_slot_access, Pmode,
> > > + STACK_POINTER_REGNUM);
> > > + bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
> > >
> > > ... here ...
> >
> > Done.
> >
> > > + if (frame_pointer_needed)
> > > + {
> > > + add_to_hard_reg_set (&stack_slot_access, Pmode,
> > > + HARD_FRAME_POINTER_REGNUM);
> > > + bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
> > > + }
> > >
> > > ... here ...
> > >
> >
> > Done.
> >
> > > + unsigned int reg;
> > >
> > > ... here ...
> >
> > Done.
> >
> > > + do
> > > + {
> > > + reg = bitmap_clear_first_set_bit (worklist);
> > > + ix86_find_all_reg_use (stack_slot_access, reg, worklist);
> > > + }
> > > + while (!bitmap_empty_p (worklist));
> > > +
> > > + hard_reg_set_iterator hrsi;
> > >
> > > ... here ...
> >
> > Done.
> >
> > > + EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
> > > + for (df_ref ref = DF_REG_USE_CHAIN (reg);
> > > + ref != NULL;
> > > + ref = DF_REF_NEXT_REG (ref))
> > > + {
> > > + if (DF_REF_IS_ARTIFICIAL (ref))
> > > + continue;
> > > +
> > > + rtx_insn *insn = DF_REF_INSN (ref);
> > >
> > > ... and here.
> > >
> >
> > Done.
> >
> > > + if (!NONDEBUG_INSN_P (insn))
> > >
> > > !NONJUMP_INSN_P ?
> >
> > Changed.
> >
> > > + continue;
> > >
> > > Also some vertical space here.
> >
> > Done.
> >
> > > + note_stores (insn, ix86_update_stack_alignment,
> > > + &stack_alignment);
> > > + }
> > > }
> > >
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > > b/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > > new file mode 100644
> > > index 00000000000..0459d1947f9
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > > @@ -0,0 +1,39 @@
> > > +/* { dg-do run } */
> > > +/* { dg-options "-O2 -mavx2 -mtune=znver1
> > > -ftrivial-auto-var-init=zero -fno-stack-protector" } */
> > > +
> > >
> > > Please use
> > >
> > > /* { dg-do run { target avx2_runtime } } */
> > >
> > Done.
> >
> > > +{
> > > + __builtin_cpu_init ();
> > > +
> > > + if (__builtin_cpu_supports ("avx2"))
> > > + run ();
> > > +
> > >
> > > And remove the above code from the test.
> > >
> > Done.
> >
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > > b/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > > new file mode 100644
> > > index 00000000000..f26fdd79af1
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > > @@ -0,0 +1,52 @@
> > > +/* { dg-do run } */
> > > +/* { dg-options "-O2 -mavx2 -mtune=znver1 -fno-stack-protector
> > > -fno-stack-clash-protection" } */
> > > +
> > >
> > > Also here.
> > >
> >
> > Done.
> >
> > Here is the v3 patch.
>
> LGTM, modulo DF part that would need another pair of eyes.
Looks good, it's not easy to follow the context this is invoked from
but I guess if it's not properly df_analyze()ed it would have exploded
during testing.
Richard.
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH v3] x86: Properly find the maximum stack slot alignment
2025-02-14 14:08 ` Richard Biener
@ 2025-02-15 0:27 ` H.J. Lu
2025-02-20 7:08 ` Uros Bizjak
0 siblings, 1 reply; 10+ messages in thread
From: H.J. Lu @ 2025-02-15 0:27 UTC (permalink / raw)
To: Richard Biener; +Cc: Uros Bizjak, GCC Patches, Hongtao Liu, Jakub Jelinek
On Fri, Feb 14, 2025 at 10:08 PM Richard Biener <rguenther@suse.de> wrote:
>
> On Fri, 14 Feb 2025, Uros Bizjak wrote:
>
> > On Fri, Feb 14, 2025 at 4:56 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Thu, Feb 13, 2025 at 5:17 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > > >
> > > > On Thu, Feb 13, 2025 at 9:31 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > >
> > > > > Don't assume that stack slots can only be accessed by stack or frame
> > > > > registers. We first find all registers defined by stack or frame
> > > > > registers. Then check memory accesses by such registers, including
> > > > > stack and frame registers.
> > > > >
> > > > > gcc/
> > > > >
> > > > > PR target/109780
> > > > > PR target/109093
> > > > > * config/i386/i386.cc (ix86_update_stack_alignment): New.
> > > > > (ix86_find_all_reg_use_1): Likewise.
> > > > > (ix86_find_all_reg_use): Likewise.
> > > > > (ix86_find_max_used_stack_alignment): Also check memory accesses
> > > > > from registers defined by stack or frame registers.
> > > > >
> > > > > gcc/testsuite/
> > > > >
> > > > > PR target/109780
> > > > > PR target/109093
> > > > > * g++.target/i386/pr109780-1.C: New test.
> > > > > * gcc.target/i386/pr109093-1.c: Likewise.
> > > > > * gcc.target/i386/pr109780-1.c: Likewise.
> > > > > * gcc.target/i386/pr109780-2.c: Likewise.
> > > > > * gcc.target/i386/pr109780-3.c: Likewise.
> > > >
> > > > Some non-algorithmical changes below, otherwise LGTM. Please also get
> > > > someone to review dataflow infrastructure usage, I am not well versed
> > > > with it.
> > > >
> > > > +/* Helper function for ix86_find_all_reg_use. */
> > > > +
> > > > +static void
> > > > +ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
> > > > + auto_bitmap &worklist)
> > > > +{
> > > > + rtx src = SET_SRC (set);
> > > > + if (MEM_P (src))
> > > >
> > > > Also reject assignment from CONST_SCALAR_INT?
> > >
> > > Done.
> > >
> > > > + return;
> > > > +
> > > > + rtx dest = SET_DEST (set);
> > > > + if (!REG_P (dest))
> > > > + return;
> > > >
> > > > Can we switch these two so the test for REG_P (dest) will be first? We
> > > > are not interested in anything that doesn't assign to a register.
> > >
> > > Done.
> > >
> > > > +/* Find all registers defined with REG. */
> > > > +
> > > > +static void
> > > > +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
> > > > + unsigned int reg, auto_bitmap &worklist)
> > > > +{
> > > > + for (df_ref ref = DF_REG_USE_CHAIN (reg);
> > > > + ref != NULL;
> > > > + ref = DF_REF_NEXT_REG (ref))
> > > > + {
> > > > + if (DF_REF_IS_ARTIFICIAL (ref))
> > > > + continue;
> > > > +
> > > > + rtx_insn *insn = DF_REF_INSN (ref);
> > > > + if (!NONDEBUG_INSN_P (insn))
> > > > + continue;
> > > >
> > > > Here we pass only NONJUMP_INSN_P (X) || JUMP_P (X) || CALL_P (X)
> > > >
> > > > + if (CALL_P (insn) || JUMP_P (insn))
> > > > + continue;
> > > >
> > > > And here remains only NONJUMP_INSN_P (X), so both above conditions
> > > > could be substituted with:
> > > >
> > > > if (!NONJUMP_INSN_P (X))
> > > > continue;
> > >
> > > Done.
> > >
> > > > +
> > > > + rtx set = single_set (insn);
> > > > + if (set)
> > > > + ix86_find_all_reg_use_1 (set, stack_slot_access, worklist);
> > > > +
> > > > + rtx pat = PATTERN (insn);
> > > > + if (GET_CODE (pat) != PARALLEL)
> > > > + continue;
> > > > +
> > > > + for (int i = 0; i < XVECLEN (pat, 0); i++)
> > > > + {
> > > > + rtx exp = XVECEXP (pat, 0, i);
> > > > + switch (GET_CODE (exp))
> > > > + {
> > > > + case ASM_OPERANDS:
> > > > + case CLOBBER:
> > > > + case PREFETCH:
> > > > + case USE:
> > > > + break;
> > > > + case UNSPEC:
> > > > + case UNSPEC_VOLATILE:
> > > > + for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--)
> > > > + {
> > > > + rtx x = XVECEXP (exp, 0, j);
> > > > + if (GET_CODE (x) == SET)
> > > > + ix86_find_all_reg_use_1 (x, stack_slot_access,
> > > > + worklist);
> > > > + }
> > > > + break;
> > > > + case SET:
> > > > + ix86_find_all_reg_use_1 (exp, stack_slot_access,
> > > > + worklist);
> > > > + break;
> > > > + default:
> > > > + debug_rtx (exp);
> > > >
> > > > Stray debug remaining?
> > >
> > > Removed.
> > >
> > > > + HARD_REG_SET stack_slot_access;
> > > > + CLEAR_HARD_REG_SET (stack_slot_access);
> > > > +
> > > > + /* Stack slot can be accessed by stack pointer, frame pointer or
> > > > + registers defined by stack pointer or frame pointer. */
> > > > + auto_bitmap worklist;
> > > >
> > > > Please put a line of vertical space here ...
> > >
> > > Done.
> > >
> > > > + add_to_hard_reg_set (&stack_slot_access, Pmode,
> > > > + STACK_POINTER_REGNUM);
> > > > + bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
> > > >
> > > > ... here ...
> > >
> > > Done.
> > >
> > > > + if (frame_pointer_needed)
> > > > + {
> > > > + add_to_hard_reg_set (&stack_slot_access, Pmode,
> > > > + HARD_FRAME_POINTER_REGNUM);
> > > > + bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
> > > > + }
> > > >
> > > > ... here ...
> > > >
> > >
> > > Done.
> > >
> > > > + unsigned int reg;
> > > >
> > > > ... here ...
> > >
> > > Done.
> > >
> > > > + do
> > > > + {
> > > > + reg = bitmap_clear_first_set_bit (worklist);
> > > > + ix86_find_all_reg_use (stack_slot_access, reg, worklist);
> > > > + }
> > > > + while (!bitmap_empty_p (worklist));
> > > > +
> > > > + hard_reg_set_iterator hrsi;
> > > >
> > > > ... here ...
> > >
> > > Done.
> > >
> > > > + EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
> > > > + for (df_ref ref = DF_REG_USE_CHAIN (reg);
> > > > + ref != NULL;
> > > > + ref = DF_REF_NEXT_REG (ref))
> > > > + {
> > > > + if (DF_REF_IS_ARTIFICIAL (ref))
> > > > + continue;
> > > > +
> > > > + rtx_insn *insn = DF_REF_INSN (ref);
> > > >
> > > > ... and here.
> > > >
> > >
> > > Done.
> > >
> > > > + if (!NONDEBUG_INSN_P (insn))
> > > >
> > > > !NONJUMP_INSN_P ?
> > >
> > > Changed.
> > >
> > > > + continue;
> > > >
> > > > Also some vertical space here.
> > >
> > > Done.
> > >
> > > > + note_stores (insn, ix86_update_stack_alignment,
> > > > + &stack_alignment);
> > > > + }
> > > > }
> > > >
> > > > diff --git a/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > > > b/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > > > new file mode 100644
> > > > index 00000000000..0459d1947f9
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > > > @@ -0,0 +1,39 @@
> > > > +/* { dg-do run } */
> > > > +/* { dg-options "-O2 -mavx2 -mtune=znver1
> > > > -ftrivial-auto-var-init=zero -fno-stack-protector" } */
> > > > +
> > > >
> > > > Please use
> > > >
> > > > /* { dg-do run { target avx2_runtime } } */
> > > >
> > > Done.
> > >
> > > > +{
> > > > + __builtin_cpu_init ();
> > > > +
> > > > + if (__builtin_cpu_supports ("avx2"))
> > > > + run ();
> > > > +
> > > >
> > > > And remove the above code from the test.
> > > >
> > > Done.
> > >
> > > > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > > > b/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > > > new file mode 100644
> > > > index 00000000000..f26fdd79af1
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > > > @@ -0,0 +1,52 @@
> > > > +/* { dg-do run } */
> > > > +/* { dg-options "-O2 -mavx2 -mtune=znver1 -fno-stack-protector
> > > > -fno-stack-clash-protection" } */
> > > > +
> > > >
> > > > Also here.
> > > >
> > >
> > > Done.
> > >
> > > Here is the v3 patch.
> >
> > LGTM, modulo DF part that would need another pair of eyes.
>
> Looks good, it's not easy to follow the context this is invoked from
> but I guess if it's not properly df_analyze()ed it would have exploded
> during testing.
>
> Richard.
I will check it in the next few days.
Thanks.
--
H.J.
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH v3] x86: Properly find the maximum stack slot alignment
2025-02-15 0:27 ` H.J. Lu
@ 2025-02-20 7:08 ` Uros Bizjak
0 siblings, 0 replies; 10+ messages in thread
From: Uros Bizjak @ 2025-02-20 7:08 UTC (permalink / raw)
To: H.J. Lu; +Cc: Richard Biener, GCC Patches, Hongtao Liu, Jakub Jelinek
On Sat, Feb 15, 2025 at 1:27 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Fri, Feb 14, 2025 at 10:08 PM Richard Biener <rguenther@suse.de> wrote:
> >
> > On Fri, 14 Feb 2025, Uros Bizjak wrote:
> >
> > > On Fri, Feb 14, 2025 at 4:56 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > On Thu, Feb 13, 2025 at 5:17 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > > > >
> > > > > On Thu, Feb 13, 2025 at 9:31 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > > >
> > > > > > Don't assume that stack slots can only be accessed by stack or frame
> > > > > > registers. We first find all registers defined by stack or frame
> > > > > > registers. Then check memory accesses by such registers, including
> > > > > > stack and frame registers.
> > > > > >
> > > > > > gcc/
> > > > > >
> > > > > > PR target/109780
> > > > > > PR target/109093
> > > > > > * config/i386/i386.cc (ix86_update_stack_alignment): New.
> > > > > > (ix86_find_all_reg_use_1): Likewise.
> > > > > > (ix86_find_all_reg_use): Likewise.
> > > > > > (ix86_find_max_used_stack_alignment): Also check memory accesses
> > > > > > from registers defined by stack or frame registers.
> > > > > >
> > > > > > gcc/testsuite/
> > > > > >
> > > > > > PR target/109780
> > > > > > PR target/109093
> > > > > > * g++.target/i386/pr109780-1.C: New test.
> > > > > > * gcc.target/i386/pr109093-1.c: Likewise.
> > > > > > * gcc.target/i386/pr109780-1.c: Likewise.
> > > > > > * gcc.target/i386/pr109780-2.c: Likewise.
> > > > > > * gcc.target/i386/pr109780-3.c: Likewise.
> > > > >
> > > > > Some non-algorithmical changes below, otherwise LGTM. Please also get
> > > > > someone to review dataflow infrastructure usage, I am not well versed
> > > > > with it.
> > > > >
> > > > > +/* Helper function for ix86_find_all_reg_use. */
> > > > > +
> > > > > +static void
> > > > > +ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
> > > > > + auto_bitmap &worklist)
> > > > > +{
> > > > > + rtx src = SET_SRC (set);
> > > > > + if (MEM_P (src))
> > > > >
> > > > > Also reject assignment from CONST_SCALAR_INT?
> > > >
> > > > Done.
> > > >
> > > > > + return;
> > > > > +
> > > > > + rtx dest = SET_DEST (set);
> > > > > + if (!REG_P (dest))
> > > > > + return;
> > > > >
> > > > > Can we switch these two so the test for REG_P (dest) will be first? We
> > > > > are not interested in anything that doesn't assign to a register.
> > > >
> > > > Done.
> > > >
> > > > > +/* Find all registers defined with REG. */
> > > > > +
> > > > > +static void
> > > > > +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
> > > > > + unsigned int reg, auto_bitmap &worklist)
> > > > > +{
> > > > > + for (df_ref ref = DF_REG_USE_CHAIN (reg);
> > > > > + ref != NULL;
> > > > > + ref = DF_REF_NEXT_REG (ref))
> > > > > + {
> > > > > + if (DF_REF_IS_ARTIFICIAL (ref))
> > > > > + continue;
> > > > > +
> > > > > + rtx_insn *insn = DF_REF_INSN (ref);
> > > > > + if (!NONDEBUG_INSN_P (insn))
> > > > > + continue;
> > > > >
> > > > > Here we pass only NONJUMP_INSN_P (X) || JUMP_P (X) || CALL_P (X)
> > > > >
> > > > > + if (CALL_P (insn) || JUMP_P (insn))
> > > > > + continue;
> > > > >
> > > > > And here remains only NONJUMP_INSN_P (X), so both above conditions
> > > > > could be substituted with:
> > > > >
> > > > > if (!NONJUMP_INSN_P (X))
> > > > > continue;
> > > >
> > > > Done.
> > > >
> > > > > +
> > > > > + rtx set = single_set (insn);
> > > > > + if (set)
> > > > > + ix86_find_all_reg_use_1 (set, stack_slot_access, worklist);
> > > > > +
> > > > > + rtx pat = PATTERN (insn);
> > > > > + if (GET_CODE (pat) != PARALLEL)
> > > > > + continue;
> > > > > +
> > > > > + for (int i = 0; i < XVECLEN (pat, 0); i++)
> > > > > + {
> > > > > + rtx exp = XVECEXP (pat, 0, i);
> > > > > + switch (GET_CODE (exp))
> > > > > + {
> > > > > + case ASM_OPERANDS:
> > > > > + case CLOBBER:
> > > > > + case PREFETCH:
> > > > > + case USE:
> > > > > + break;
> > > > > + case UNSPEC:
> > > > > + case UNSPEC_VOLATILE:
> > > > > + for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--)
> > > > > + {
> > > > > + rtx x = XVECEXP (exp, 0, j);
> > > > > + if (GET_CODE (x) == SET)
> > > > > + ix86_find_all_reg_use_1 (x, stack_slot_access,
> > > > > + worklist);
> > > > > + }
> > > > > + break;
> > > > > + case SET:
> > > > > + ix86_find_all_reg_use_1 (exp, stack_slot_access,
> > > > > + worklist);
> > > > > + break;
> > > > > + default:
> > > > > + debug_rtx (exp);
> > > > >
> > > > > Stray debug remaining?
> > > >
> > > > Removed.
> > > >
> > > > > + HARD_REG_SET stack_slot_access;
> > > > > + CLEAR_HARD_REG_SET (stack_slot_access);
> > > > > +
> > > > > + /* Stack slot can be accessed by stack pointer, frame pointer or
> > > > > + registers defined by stack pointer or frame pointer. */
> > > > > + auto_bitmap worklist;
> > > > >
> > > > > Please put a line of vertical space here ...
> > > >
> > > > Done.
> > > >
> > > > > + add_to_hard_reg_set (&stack_slot_access, Pmode,
> > > > > + STACK_POINTER_REGNUM);
> > > > > + bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
> > > > >
> > > > > ... here ...
> > > >
> > > > Done.
> > > >
> > > > > + if (frame_pointer_needed)
> > > > > + {
> > > > > + add_to_hard_reg_set (&stack_slot_access, Pmode,
> > > > > + HARD_FRAME_POINTER_REGNUM);
> > > > > + bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
> > > > > + }
> > > > >
> > > > > ... here ...
> > > > >
> > > >
> > > > Done.
> > > >
> > > > > + unsigned int reg;
> > > > >
> > > > > ... here ...
> > > >
> > > > Done.
> > > >
> > > > > + do
> > > > > + {
> > > > > + reg = bitmap_clear_first_set_bit (worklist);
> > > > > + ix86_find_all_reg_use (stack_slot_access, reg, worklist);
> > > > > + }
> > > > > + while (!bitmap_empty_p (worklist));
> > > > > +
> > > > > + hard_reg_set_iterator hrsi;
> > > > >
> > > > > ... here ...
> > > >
> > > > Done.
> > > >
> > > > > + EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
> > > > > + for (df_ref ref = DF_REG_USE_CHAIN (reg);
> > > > > + ref != NULL;
> > > > > + ref = DF_REF_NEXT_REG (ref))
> > > > > + {
> > > > > + if (DF_REF_IS_ARTIFICIAL (ref))
> > > > > + continue;
> > > > > +
> > > > > + rtx_insn *insn = DF_REF_INSN (ref);
> > > > >
> > > > > ... and here.
> > > > >
> > > >
> > > > Done.
> > > >
> > > > > + if (!NONDEBUG_INSN_P (insn))
> > > > >
> > > > > !NONJUMP_INSN_P ?
> > > >
> > > > Changed.
> > > >
> > > > > + continue;
> > > > >
> > > > > Also some vertical space here.
> > > >
> > > > Done.
> > > >
> > > > > + note_stores (insn, ix86_update_stack_alignment,
> > > > > + &stack_alignment);
> > > > > + }
> > > > > }
> > > > >
> > > > > diff --git a/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > > > > b/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > > > > new file mode 100644
> > > > > index 00000000000..0459d1947f9
> > > > > --- /dev/null
> > > > > +++ b/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > > > > @@ -0,0 +1,39 @@
> > > > > +/* { dg-do run } */
> > > > > +/* { dg-options "-O2 -mavx2 -mtune=znver1
> > > > > -ftrivial-auto-var-init=zero -fno-stack-protector" } */
> > > > > +
> > > > >
> > > > > Please use
> > > > >
> > > > > /* { dg-do run { target avx2_runtime } } */
> > > > >
> > > > Done.
> > > >
> > > > > +{
> > > > > + __builtin_cpu_init ();
> > > > > +
> > > > > + if (__builtin_cpu_supports ("avx2"))
> > > > > + run ();
> > > > > +
> > > > >
> > > > > And remove the above code from the test.
> > > > >
> > > > Done.
> > > >
> > > > > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > > > > b/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > > > > new file mode 100644
> > > > > index 00000000000..f26fdd79af1
> > > > > --- /dev/null
> > > > > +++ b/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > > > > @@ -0,0 +1,52 @@
> > > > > +/* { dg-do run } */
> > > > > +/* { dg-options "-O2 -mavx2 -mtune=znver1 -fno-stack-protector
> > > > > -fno-stack-clash-protection" } */
> > > > > +
> > > > >
> > > > > Also here.
> > > > >
> > > >
> > > > Done.
> > > >
> > > > Here is the v3 patch.
> > >
> > > LGTM, modulo DF part that would need another pair of eyes.
> >
> > Looks good, it's not easy to follow the context this is invoked from
> > but I guess if it's not properly df_analyze()ed it would have exploded
> > during testing.
> >
> > Richard.
>
> I will check it in the next few days.
This patch caused PR 118936.
As for stage4, the patch is more invasive and involved than I assumed,
so let's revert it for gcc-15 and retry for gcc-16.
Thanks,
Uros.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v3] x86: Properly find the maximum stack slot alignment
2025-02-14 13:11 ` Uros Bizjak
2025-02-14 14:08 ` Richard Biener
@ 2025-02-17 10:45 ` Uros Bizjak
1 sibling, 0 replies; 10+ messages in thread
From: Uros Bizjak @ 2025-02-17 10:45 UTC (permalink / raw)
To: H.J. Lu; +Cc: GCC Patches, Hongtao Liu, Richard Biener, Jakub Jelinek
[-- Attachment #1: Type: text/plain, Size: 5095 bytes --]
On Fri, Feb 14, 2025 at 2:11 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Fri, Feb 14, 2025 at 4:56 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Thu, Feb 13, 2025 at 5:17 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > >
> > > On Thu, Feb 13, 2025 at 9:31 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > Don't assume that stack slots can only be accessed by stack or frame
> > > > registers. We first find all registers defined by stack or frame
> > > > registers. Then check memory accesses by such registers, including
> > > > stack and frame registers.
> > > >
> > > > gcc/
> > > >
> > > > PR target/109780
> > > > PR target/109093
> > > > * config/i386/i386.cc (ix86_update_stack_alignment): New.
> > > > (ix86_find_all_reg_use_1): Likewise.
> > > > (ix86_find_all_reg_use): Likewise.
> > > > (ix86_find_max_used_stack_alignment): Also check memory accesses
> > > > from registers defined by stack or frame registers.
> > > >
> > > > gcc/testsuite/
> > > >
> > > > PR target/109780
> > > > PR target/109093
> > > > * g++.target/i386/pr109780-1.C: New test.
> > > > * gcc.target/i386/pr109093-1.c: Likewise.
> > > > * gcc.target/i386/pr109780-1.c: Likewise.
> > > > * gcc.target/i386/pr109780-2.c: Likewise.
> > > > * gcc.target/i386/pr109780-3.c: Likewise.
> > >
> > > Some non-algorithmical changes below, otherwise LGTM. Please also get
> > > someone to review dataflow infrastructure usage, I am not well versed
> > > with it.
> > >
> > > +/* Helper function for ix86_find_all_reg_use. */
> > > +
> > > +static void
> > > +ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
> > > + auto_bitmap &worklist)
> > > +{
> > > + rtx src = SET_SRC (set);
> > > + if (MEM_P (src))
> > >
> > > Also reject assignment from CONST_SCALAR_INT?
> >
> > Done.
> >
> > > + return;
> > > +
> > > + rtx dest = SET_DEST (set);
> > > + if (!REG_P (dest))
> > > + return;
> > >
> > > Can we switch these two so the test for REG_P (dest) will be first? We
> > > are not interested in anything that doesn't assign to a register.
> >
> > Done.
> >
> > > +/* Find all registers defined with REG. */
> > > +
> > > +static void
> > > +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
> > > + unsigned int reg, auto_bitmap &worklist)
> > > +{
> > > + for (df_ref ref = DF_REG_USE_CHAIN (reg);
> > > + ref != NULL;
> > > + ref = DF_REF_NEXT_REG (ref))
> > > + {
> > > + if (DF_REF_IS_ARTIFICIAL (ref))
> > > + continue;
> > > +
> > > + rtx_insn *insn = DF_REF_INSN (ref);
> > > + if (!NONDEBUG_INSN_P (insn))
> > > + continue;
> > >
> > > Here we pass only NONJUMP_INSN_P (X) || JUMP_P (X) || CALL_P (X)
> > >
> > > + if (CALL_P (insn) || JUMP_P (insn))
> > > + continue;
> > >
> > > And here remains only NONJUMP_INSN_P (X), so both above conditions
> > > could be substituted with:
> > >
> > > if (!NONJUMP_INSN_P (X))
> > > continue;
> >
> > Done.
> >
> > > +
> > > + rtx set = single_set (insn);
> > > + if (set)
> > > + ix86_find_all_reg_use_1 (set, stack_slot_access, worklist);
> > > +
> > > + rtx pat = PATTERN (insn);
> > > + if (GET_CODE (pat) != PARALLEL)
> > > + continue;
> > > +
> > > + for (int i = 0; i < XVECLEN (pat, 0); i++)
> > > + {
> > > + rtx exp = XVECEXP (pat, 0, i);
> > > + switch (GET_CODE (exp))
> > > + {
> > > + case ASM_OPERANDS:
> > > + case CLOBBER:
> > > + case PREFETCH:
> > > + case USE:
> > > + break;
> > > + case UNSPEC:
> > > + case UNSPEC_VOLATILE:
> > > + for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--)
> > > + {
> > > + rtx x = XVECEXP (exp, 0, j);
> > > + if (GET_CODE (x) == SET)
> > > + ix86_find_all_reg_use_1 (x, stack_slot_access,
> > > + worklist);
> > > + }
> > > + break;
> > > + case SET:
> > > + ix86_find_all_reg_use_1 (exp, stack_slot_access,
> > > + worklist);
> > > + break;
> > > + default:
> > > + debug_rtx (exp);
> > >
> > > Stray debug remaining?
Some more looking in the above loop... A comment in rtlanal.cc says that:
--q--
/* All the other possibilities never store and can use a normal
rtx walk. This includes:
- USE
- TRAP_IF
- PREFETCH
- UNSPEC
- UNSPEC_VOLATILE. */
--/q--
So, based on the above, it is possible to considerably simplify the
loop above, all the way to:
--cut here--
for (int i = 0; i < XVECLEN (pat, 0); i++)
{
rtx exp = XVECEXP (pat, 0, i);
if (GET_CODE (exp) == SET)
ix86_find_all_reg_use_1 (exp, stack_slot_access, worklist);
}
--cut here--
We are only interested in SETs that set a register from a stack/frame
pointer, not in their general uses.
Attached patch implements this simplification.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 1020 bytes --]
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index fafd4a511a3..560e6525b56 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -8538,31 +8538,9 @@ ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
for (int i = 0; i < XVECLEN (pat, 0); i++)
{
rtx exp = XVECEXP (pat, 0, i);
- switch (GET_CODE (exp))
- {
- case ASM_OPERANDS:
- case CLOBBER:
- case PREFETCH:
- case USE:
- break;
- case UNSPEC:
- case UNSPEC_VOLATILE:
- for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--)
- {
- rtx x = XVECEXP (exp, 0, j);
- if (GET_CODE (x) == SET)
- ix86_find_all_reg_use_1 (x, stack_slot_access,
- worklist);
- }
- break;
- case SET:
- ix86_find_all_reg_use_1 (exp, stack_slot_access,
- worklist);
- break;
- default:
- gcc_unreachable ();
- break;
- }
+
+ if (GET_CODE (exp) == SET)
+ ix86_find_all_reg_use_1 (exp, stack_slot_access, worklist);
}
}
}
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH] x86: Properly find the maximum stack slot alignment
@ 2025-04-20 21:24 H.J. Lu
2025-04-22 9:59 ` Uros Bizjak
0 siblings, 1 reply; 10+ messages in thread
From: H.J. Lu @ 2025-04-20 21:24 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak
Don't assume that stack slots can only be accessed by stack or frame
registers. We first find all registers defined by stack or frame
registers. Then check memory accesses by such registers, including
stack and frame registers.
gcc/
PR target/109780
PR target/109093
* config/i386/i386.cc (stack_access_data): New.
(ix86_update_stack_alignment): Likewise.
(ix86_find_all_reg_use_1): Likewise.
(ix86_find_all_reg_use): Likewise.
(ix86_find_max_used_stack_alignment): Also check memory accesses
from registers defined by stack or frame registers.
gcc/testsuite/
PR target/109780
PR target/109093
* g++.target/i386/pr109780-1.C: New test.
* gcc.target/i386/pr109093-1.c: Likewise.
* gcc.target/i386/pr109780-1.c: Likewise.
* gcc.target/i386/pr109780-2.c: Likewise.
* gcc.target/i386/pr109780-3.c: Likewise.
Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
---
gcc/config/i386/i386.cc | 174 ++++++++++++++++++---
gcc/testsuite/g++.target/i386/pr109780-1.C | 72 +++++++++
gcc/testsuite/gcc.target/i386/pr109093-1.c | 33 ++++
gcc/testsuite/gcc.target/i386/pr109780-1.c | 14 ++
gcc/testsuite/gcc.target/i386/pr109780-2.c | 21 +++
gcc/testsuite/gcc.target/i386/pr109780-3.c | 46 ++++++
6 files changed, 339 insertions(+), 21 deletions(-)
create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C
create mode 100644 gcc/testsuite/gcc.target/i386/pr109093-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-3.c
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 28603c2943e..9e4e76857e6 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -8473,6 +8473,103 @@ output_probe_stack_range (rtx reg, rtx end)
return "";
}
+/* Data passed to ix86_update_stack_alignment. */
+struct stack_access_data
+{
+ /* The stack access register. */
+ const_rtx reg;
+ /* Pointer to stack alignment. */
+ unsigned int *stack_alignment;
+};
+
+/* Update the maximum stack slot alignment from memory alignment in
+ PAT. */
+
+static void
+ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
+{
+ /* This insn may reference stack slot. Update the maximum stack slot
+ alignment if the memory is referenced by the stack access register.
+ */
+ stack_access_data *p = (stack_access_data *) data;
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, pat, ALL)
+ {
+ auto op = *iter;
+ if (GET_CODE (op) == ZERO_EXTEND)
+ op = XEXP (op, 0);
+ if (MEM_P (op) && reg_mentioned_p (p->reg, op))
+ {
+ unsigned int alignment = MEM_ALIGN (op);
+ if (alignment > *p->stack_alignment)
+ *p->stack_alignment = alignment;
+ break;
+ }
+ }
+}
+
+/* Helper function for ix86_find_all_reg_use. */
+
+static void
+ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
+ auto_bitmap &worklist)
+{
+ rtx dest = SET_DEST (set);
+ if (!REG_P (dest))
+ return;
+
+ rtx src = SET_SRC (set);
+
+ if (GET_CODE (src) == ZERO_EXTEND)
+ src = XEXP (src, 0);
+
+ if (MEM_P (src) || CONST_SCALAR_INT_P (src))
+ return;
+
+ if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest)))
+ return;
+
+ /* Add this register to stack_slot_access. */
+ add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest));
+ bitmap_set_bit (worklist, REGNO (dest));
+}
+
+/* Find all registers defined with REG. */
+
+static void
+ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
+ unsigned int reg, auto_bitmap &worklist)
+{
+ for (df_ref ref = DF_REG_USE_CHAIN (reg);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
+
+ if (!NONJUMP_INSN_P (insn))
+ continue;
+
+ rtx set = single_set (insn);
+ if (set)
+ ix86_find_all_reg_use_1 (set, stack_slot_access, worklist);
+
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) != PARALLEL)
+ continue;
+
+ for (int i = 0; i < XVECLEN (pat, 0); i++)
+ {
+ rtx exp = XVECEXP (pat, 0, i);
+
+ if (GET_CODE (exp) == SET)
+ ix86_find_all_reg_use_1 (exp, stack_slot_access, worklist);
+ }
+ }
+}
+
/* Set stack_frame_required to false if stack frame isn't required.
Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
slot used if stack frame is required and CHECK_STACK_SLOT is true. */
@@ -8491,10 +8588,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
add_to_hard_reg_set (&set_up_by_prologue, Pmode,
HARD_FRAME_POINTER_REGNUM);
- /* The preferred stack alignment is the minimum stack alignment. */
- if (stack_alignment > crtl->preferred_stack_boundary)
- stack_alignment = crtl->preferred_stack_boundary;
-
bool require_stack_frame = false;
FOR_EACH_BB_FN (bb, cfun)
@@ -8506,27 +8599,66 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
set_up_by_prologue))
{
require_stack_frame = true;
-
- if (check_stack_slot)
- {
- /* Find the maximum stack alignment. */
- subrtx_iterator::array_type array;
- FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
- if (MEM_P (*iter)
- && (reg_mentioned_p (stack_pointer_rtx,
- *iter)
- || reg_mentioned_p (frame_pointer_rtx,
- *iter)))
- {
- unsigned int alignment = MEM_ALIGN (*iter);
- if (alignment > stack_alignment)
- stack_alignment = alignment;
- }
- }
+ break;
}
}
cfun->machine->stack_frame_required = require_stack_frame;
+
+ /* Stop if we don't need to check stack slot. */
+ if (!check_stack_slot)
+ return;
+
+ /* The preferred stack alignment is the minimum stack alignment. */
+ if (stack_alignment > crtl->preferred_stack_boundary)
+ stack_alignment = crtl->preferred_stack_boundary;
+
+ HARD_REG_SET stack_slot_access;
+ CLEAR_HARD_REG_SET (stack_slot_access);
+
+ /* Stack slot can be accessed by stack pointer, frame pointer or
+ registers defined by stack pointer or frame pointer. */
+ auto_bitmap worklist;
+
+ add_to_hard_reg_set (&stack_slot_access, Pmode,
+ STACK_POINTER_REGNUM);
+ bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
+
+ if (frame_pointer_needed)
+ {
+ add_to_hard_reg_set (&stack_slot_access, Pmode,
+ HARD_FRAME_POINTER_REGNUM);
+ bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
+ }
+
+ unsigned int reg;
+
+ do
+ {
+ reg = bitmap_clear_first_set_bit (worklist);
+ ix86_find_all_reg_use (stack_slot_access, reg, worklist);
+ }
+ while (!bitmap_empty_p (worklist));
+
+ hard_reg_set_iterator hrsi;
+ stack_access_data data = { nullptr, &stack_alignment };
+
+ EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
+ for (df_ref ref = DF_REG_USE_CHAIN (reg);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
+
+ if (!NONJUMP_INSN_P (insn))
+ continue;
+
+ data.reg = DF_REF_REG (ref);
+ note_stores (insn, ix86_update_stack_alignment, &data);
+ }
}
/* Finalize stack_realign_needed and frame_pointer_needed flags, which
diff --git a/gcc/testsuite/g++.target/i386/pr109780-1.C b/gcc/testsuite/g++.target/i386/pr109780-1.C
new file mode 100644
index 00000000000..7e3eabdec94
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr109780-1.C
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target c++17 } */
+/* { dg-options "-O2 -mavx2 -mtune=haswell" } */
+
+template <typename _Tp> struct remove_reference {
+ using type = __remove_reference(_Tp);
+};
+template <typename T> struct MaybeStorageBase {
+ T val;
+ struct Union {
+ ~Union();
+ } mStorage;
+};
+template <typename T> struct MaybeStorage : MaybeStorageBase<T> {
+ char mIsSome;
+};
+template <typename T, typename U = typename remove_reference<T>::type>
+constexpr MaybeStorage<U> Some(T &&);
+template <typename T, typename U> constexpr MaybeStorage<U> Some(T &&aValue) {
+ return {aValue};
+}
+template <class> struct Span {
+ int operator[](long idx) {
+ int *__trans_tmp_4;
+ if (__builtin_expect(idx, 0))
+ *(int *)__null = false;
+ __trans_tmp_4 = storage_.data();
+ return __trans_tmp_4[idx];
+ }
+ struct {
+ int *data() { return data_; }
+ int *data_;
+ } storage_;
+};
+struct Variant {
+ template <typename RefT> Variant(RefT) {}
+};
+long from_i, from___trans_tmp_9;
+namespace js::intl {
+struct DecimalNumber {
+ Variant string_;
+ unsigned long significandStart_;
+ unsigned long significandEnd_;
+ bool zero_ = false;
+ bool negative_;
+ template <typename CharT> DecimalNumber(CharT string) : string_(string) {}
+ template <typename CharT>
+ static MaybeStorage<DecimalNumber> from(Span<const CharT>);
+ void from();
+};
+} // namespace js::intl
+void js::intl::DecimalNumber::from() {
+ Span<const char16_t> __trans_tmp_3;
+ from(__trans_tmp_3);
+}
+template <typename CharT>
+MaybeStorage<js::intl::DecimalNumber>
+js::intl::DecimalNumber::from(Span<const CharT> chars) {
+ DecimalNumber number(chars);
+ if (auto ch = chars[from_i]) {
+ from_i++;
+ number.negative_ = ch == '-';
+ }
+ while (from___trans_tmp_9 && chars[from_i])
+ ;
+ if (chars[from_i])
+ while (chars[from_i - 1])
+ number.zero_ = true;
+ return Some(number);
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr109093-1.c b/gcc/testsuite/gcc.target/i386/pr109093-1.c
new file mode 100644
index 00000000000..58a7b006c8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109093-1.c
@@ -0,0 +1,33 @@
+/* { dg-do run { target avx2_runtime } } */
+/* { dg-options "-O2 -mavx2 -mtune=znver1 -ftrivial-auto-var-init=zero -fno-stack-protector" } */
+
+int a, b, c, d;
+char e, f = 1;
+short g, h, i;
+
+__attribute__ ((weak))
+void
+run (void)
+{
+ short j;
+
+ for (; g >= 0; --g)
+ {
+ int *k[10];
+
+ for (d = 0; d < 10; d++)
+ k[d] = &b;
+
+ c = *k[1];
+
+ for (; a;)
+ j = i - c / f || (e ^= h);
+ }
+}
+
+int
+main (void)
+{
+ run ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr109780-1.c b/gcc/testsuite/gcc.target/i386/pr109780-1.c
new file mode 100644
index 00000000000..6b06947f2a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109780-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=skylake" } */
+
+char perm[64];
+
+void
+__attribute__((noipa))
+foo (int n)
+{
+ for (int i = 0; i < n; ++i)
+ perm[i] = i;
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr109780-2.c b/gcc/testsuite/gcc.target/i386/pr109780-2.c
new file mode 100644
index 00000000000..152da06c6ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109780-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=skylake" } */
+
+#define N 9
+
+void
+f (double x, double y, double *res)
+{
+ y = -y;
+ for (int i = 0; i < N; ++i)
+ {
+ double tmp = y;
+ y = x;
+ x = tmp;
+ res[i] = i;
+ }
+ res[N] = y * y;
+ res[N + 1] = x;
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr109780-3.c b/gcc/testsuite/gcc.target/i386/pr109780-3.c
new file mode 100644
index 00000000000..a3a770a80e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109780-3.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target avx2_runtime } } */
+/* { dg-options "-O2 -mavx2 -mtune=znver1 -fno-stack-protector -fno-stack-clash-protection" } */
+
+char a;
+static int b, c, f;
+char *d = &a;
+static char *e = &a;
+
+__attribute__ ((weak))
+void
+g (int h, int i)
+{
+ int j = 1;
+ for (; c != -3; c = c - 1)
+ {
+ int k[10];
+ f = 0;
+ for (; f < 10; f++)
+ k[f] = 0;
+ *d = k[1];
+ if (i < *d)
+ {
+ *e = h;
+ for (; j < 9; j++)
+ {
+ b = 1;
+ for (; b < 7; b++)
+ ;
+ }
+ }
+ }
+}
+
+__attribute__ ((weak))
+void
+run (void)
+{
+ g (1, 1);
+}
+
+int
+main (void)
+{
+ run ();
+ return 0;
+}
--
2.49.0
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH] x86: Properly find the maximum stack slot alignment
2025-04-20 21:24 [PATCH] " H.J. Lu
@ 2025-04-22 9:59 ` Uros Bizjak
[not found] ` <CAFULd4YaLu9Kw9MBSvOv1=YbNoXfMXDbQ-QazD0rp_XjGTjoqw@mail.gmail.com>
0 siblings, 1 reply; 10+ messages in thread
From: Uros Bizjak @ 2025-04-22 9:59 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 4930 bytes --]
On Sun, Apr 20, 2025 at 11:26 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Don't assume that stack slots can only be accessed by stack or frame
> registers. We first find all registers defined by stack or frame
> registers. Then check memory accesses by such registers, including
> stack and frame registers.
>
> gcc/
>
> PR target/109780
> PR target/109093
> * config/i386/i386.cc (stack_access_data): New.
> (ix86_update_stack_alignment): Likewise.
> (ix86_find_all_reg_use_1): Likewise.
> (ix86_find_all_reg_use): Likewise.
> (ix86_find_max_used_stack_alignment): Also check memory accesses
> from registers defined by stack or frame registers.
>
> gcc/testsuite/
>
> PR target/109780
> PR target/109093
> * g++.target/i386/pr109780-1.C: New test.
> * gcc.target/i386/pr109093-1.c: Likewise.
> * gcc.target/i386/pr109780-1.c: Likewise.
> * gcc.target/i386/pr109780-2.c: Likewise.
> * gcc.target/i386/pr109780-3.c: Likewise.
>
> Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
> ---
> gcc/config/i386/i386.cc | 174 ++++++++++++++++++---
> gcc/testsuite/g++.target/i386/pr109780-1.C | 72 +++++++++
> gcc/testsuite/gcc.target/i386/pr109093-1.c | 33 ++++
> gcc/testsuite/gcc.target/i386/pr109780-1.c | 14 ++
> gcc/testsuite/gcc.target/i386/pr109780-2.c | 21 +++
> gcc/testsuite/gcc.target/i386/pr109780-3.c | 46 ++++++
> 6 files changed, 339 insertions(+), 21 deletions(-)
> create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C
> create mode 100644 gcc/testsuite/gcc.target/i386/pr109093-1.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-3.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index 28603c2943e..9e4e76857e6 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -8473,6 +8473,103 @@ output_probe_stack_range (rtx reg, rtx end)
> return "";
> }
>
> +/* Data passed to ix86_update_stack_alignment. */
> +struct stack_access_data
> +{
> + /* The stack access register. */
> + const_rtx reg;
> + /* Pointer to stack alignment. */
> + unsigned int *stack_alignment;
> +};
> +
> +/* Update the maximum stack slot alignment from memory alignment in
> + PAT. */
> +
> +static void
> +ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
> +{
> + /* This insn may reference stack slot. Update the maximum stack slot
> + alignment if the memory is referenced by the stack access register.
> + */
> + stack_access_data *p = (stack_access_data *) data;
> + subrtx_iterator::array_type array;
> + FOR_EACH_SUBRTX (iter, array, pat, ALL)
> + {
> + auto op = *iter;
> + if (GET_CODE (op) == ZERO_EXTEND)
> + op = XEXP (op, 0);
No need for the above, FOR_EACH_SUBRTX will iterate over ZERO_EXTEND.
> + if (MEM_P (op) && reg_mentioned_p (p->reg, op))
> + {
> + unsigned int alignment = MEM_ALIGN (op);
> + if (alignment > *p->stack_alignment)
> + *p->stack_alignment = alignment;
> + break;
> + }
> + }
> +}
> +
> +/* Helper function for ix86_find_all_reg_use. */
> +
> +static void
> +ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
> + auto_bitmap &worklist)
> +{
> + rtx dest = SET_DEST (set);
> + if (!REG_P (dest))
> + return;
> +
> + rtx src = SET_SRC (set);
> +
> + if (GET_CODE (src) == ZERO_EXTEND)
> + src = XEXP (src, 0);
> +
> + if (MEM_P (src) || CONST_SCALAR_INT_P (src))
> + return;
> +
> + if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest)))
> + return;
> +
> + /* Add this register to stack_slot_access. */
> + add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest));
> + bitmap_set_bit (worklist, REGNO (dest));
> +}
The above should also use FOR_EACH_SUBRTX iterators. Also, the
function will also record non-integer registers, such as flags_reg and
XMM registers as candidates. Please see the attached patch that
implements my proposed version.
> +
> +/* Find all registers defined with REG. */
> +
> +static void
> +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
> + unsigned int reg, auto_bitmap &worklist)
> +{
> + for (df_ref ref = DF_REG_USE_CHAIN (reg);
> + ref != NULL;
> + ref = DF_REF_NEXT_REG (ref))
This will still record the whole chain of dependencies. This is firmly
on the safe side, but perhaps is a slight overkill. OTOH, my
implementation of ix86_find_all_reg_use_1 will reject all non-integer
registers, so let's keep it as is.
Can you please test the attached patch?
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 5914 bytes --]
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index d15f91ddd2c..288dc9d55ff 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -8473,6 +8473,116 @@ output_probe_stack_range (rtx reg, rtx end)
return "";
}
+/* Data passed to ix86_update_stack_alignment. */
+struct stack_access_data
+{
+ /* The stack access register. */
+ const_rtx reg;
+ /* Pointer to stack alignment. */
+ unsigned int *stack_alignment;
+};
+
+/* Update the maximum stack slot alignment from memory alignment in PAT. */
+
+static void
+ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
+{
+ /* This insn may reference stack slot. Update the maximum stack slot
+ alignment if the memory is referenced by the stack access register. */
+ stack_access_data *p = (stack_access_data *) data;
+
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, pat, ALL)
+ {
+ auto op = *iter;
+ if (MEM_P (op) && reg_mentioned_p (p->reg, op))
+ {
+ unsigned int alignment = MEM_ALIGN (op);
+
+ if (alignment > *p->stack_alignment)
+ *p->stack_alignment = alignment;
+ break;
+ }
+ }
+}
+
+/* Helper function for ix86_find_all_reg_use. */
+
+static void
+ix86_find_all_reg_use_1 (rtx set, unsigned int regno,
+ HARD_REG_SET &stack_slot_access,
+ auto_bitmap &worklist)
+{
+ rtx dest = SET_DEST (set);
+
+ if (!REG_P (dest))
+ return;
+
+ unsigned int dst_regno = REGNO (dest);
+
+ if (TEST_HARD_REG_BIT (stack_slot_access, dst_regno))
+ return;
+
+ if (!GENERAL_REGNO_P (dst_regno))
+ return;
+
+ rtx src = SET_SRC (set);
+
+ subrtx_var_iterator::array_type array;
+ FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
+ {
+ auto op = *iter;
+
+ if (MEM_P (op))
+ iter.skip_subrtxes ();
+
+ if (REG_P (op) && REGNO (op) == regno)
+ {
+ /* Add this register to stack_slot_access. */
+ add_to_hard_reg_set (&stack_slot_access, Pmode, dst_regno);
+ bitmap_set_bit (worklist, dst_regno);
+ }
+ }
+}
+
+/* Find all registers defined with REG. */
+
+static void
+ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
+ unsigned int reg, auto_bitmap &worklist)
+{
+ for (df_ref ref = DF_REG_USE_CHAIN (reg);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
+
+ if (!NONJUMP_INSN_P (insn))
+ continue;
+
+ rtx set = single_set (insn);
+ if (set)
+ ix86_find_all_reg_use_1 (set, DF_REF_REGNO (ref),
+ stack_slot_access, worklist);
+
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) != PARALLEL)
+ continue;
+
+ for (int i = 0; i < XVECLEN (pat, 0); i++)
+ {
+ rtx exp = XVECEXP (pat, 0, i);
+
+ if (GET_CODE (exp) == SET)
+ ix86_find_all_reg_use_1 (exp, DF_REF_REGNO (ref),
+ stack_slot_access, worklist);
+ }
+ }
+}
+
/* Set stack_frame_required to false if stack frame isn't required.
Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
slot used if stack frame is required and CHECK_STACK_SLOT is true. */
@@ -8491,10 +8601,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
add_to_hard_reg_set (&set_up_by_prologue, Pmode,
HARD_FRAME_POINTER_REGNUM);
- /* The preferred stack alignment is the minimum stack alignment. */
- if (stack_alignment > crtl->preferred_stack_boundary)
- stack_alignment = crtl->preferred_stack_boundary;
-
bool require_stack_frame = false;
FOR_EACH_BB_FN (bb, cfun)
@@ -8506,27 +8612,66 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
set_up_by_prologue))
{
require_stack_frame = true;
-
- if (check_stack_slot)
- {
- /* Find the maximum stack alignment. */
- subrtx_iterator::array_type array;
- FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
- if (MEM_P (*iter)
- && (reg_mentioned_p (stack_pointer_rtx,
- *iter)
- || reg_mentioned_p (frame_pointer_rtx,
- *iter)))
- {
- unsigned int alignment = MEM_ALIGN (*iter);
- if (alignment > stack_alignment)
- stack_alignment = alignment;
- }
- }
+ break;
}
}
cfun->machine->stack_frame_required = require_stack_frame;
+
+ /* Stop if we don't need to check stack slot. */
+ if (!check_stack_slot)
+ return;
+
+ /* The preferred stack alignment is the minimum stack alignment. */
+ if (stack_alignment > crtl->preferred_stack_boundary)
+ stack_alignment = crtl->preferred_stack_boundary;
+
+ HARD_REG_SET stack_slot_access;
+ CLEAR_HARD_REG_SET (stack_slot_access);
+
+ /* Stack slot can be accessed by stack pointer, frame pointer or
+ registers defined by stack pointer or frame pointer. */
+ auto_bitmap worklist;
+
+ add_to_hard_reg_set (&stack_slot_access, Pmode,
+ STACK_POINTER_REGNUM);
+ bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
+
+ if (frame_pointer_needed)
+ {
+ add_to_hard_reg_set (&stack_slot_access, Pmode,
+ HARD_FRAME_POINTER_REGNUM);
+ bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
+ }
+
+ unsigned int reg;
+
+ do
+ {
+ reg = bitmap_clear_first_set_bit (worklist);
+ ix86_find_all_reg_use (stack_slot_access, reg, worklist);
+ }
+ while (!bitmap_empty_p (worklist));
+
+ hard_reg_set_iterator hrsi;
+ stack_access_data data = { nullptr, &stack_alignment };
+
+ EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
+ for (df_ref ref = DF_REG_USE_CHAIN (reg);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
+
+ if (!NONJUMP_INSN_P (insn))
+ continue;
+
+ data.reg = DF_REF_REG (ref);
+ note_stores (insn, ix86_update_stack_alignment, &data);
+ }
}
/* Finalize stack_realign_needed and frame_pointer_needed flags, which
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2025-04-28 13:11 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-24 20:35 [PATCH v3] x86: Properly find the maximum stack slot alignment H.J. Lu
2023-07-25 11:27 ` Richard Biener
2025-02-13 8:31 [PATCH v2] " H.J. Lu
2025-02-13 9:17 ` Uros Bizjak
2025-02-14 3:56 ` [PATCH v3] " H.J. Lu
2025-02-14 13:11 ` Uros Bizjak
2025-02-14 14:08 ` Richard Biener
2025-02-15 0:27 ` H.J. Lu
2025-02-20 7:08 ` Uros Bizjak
2025-02-17 10:45 ` Uros Bizjak
2025-04-20 21:24 [PATCH] " H.J. Lu
2025-04-22 9:59 ` Uros Bizjak
[not found] ` <CAFULd4YaLu9Kw9MBSvOv1=YbNoXfMXDbQ-QazD0rp_XjGTjoqw@mail.gmail.com>
[not found] ` <CAMe9rOqEc39pUm=_Z-T=BrKWiukJpah-NkSLWskoWUy9xx40Bg@mail.gmail.com>
[not found] ` <CAFULd4bdVVnH3BEtAwmuGst+xKeqahrKzPEdkgbLJo0-vVMwZg@mail.gmail.com>
2025-04-28 0:03 ` [PATCH v3] " H.J. Lu
2025-04-28 13:10 ` Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).