From: Richard Biener <richard.guenther@gmail.com>
To: "H.J. Lu" <hjl.tools@gmail.com>
Cc: gcc-patches@gcc.gnu.org
Subject: Re: [PATCH v3] x86: Properly find the maximum stack slot alignment
Date: Tue, 25 Jul 2023 13:27:32 +0200 [thread overview]
Message-ID: <CAFiYyc3TDOPdfff041jg8jaFtx=R7Obv518AtQr=SAH3qiEUVA@mail.gmail.com> (raw)
In-Reply-To: <20230724203536.40091-1-hjl.tools@gmail.com>
On Mon, Jul 24, 2023 at 10:36 PM H.J. Lu via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Don't assume that stack slots can only be accessed by stack or frame
> registers. We first find all registers defined by stack or frame
> registers. Then check memory accesses by such registers, including
> stack and frame registers.
Looks good to me from an algorithmic/DF perspective - I'm leaving it for
x86 maintainers to assess the functional bits.
Thanks,
Richard.
> gcc/
>
> PR target/109780
> * config/i386/i386.cc (ix86_update_stack_alignment): New.
> (ix86_find_all_reg_use): Likewise.
> (ix86_find_max_used_stack_alignment): Also check memory accesses
> from registers defined by stack or frame registers.
>
> gcc/testsuite/
>
> PR target/109780
> * g++.target/i386/pr109780-1.C: New test.
> * gcc.target/i386/pr109780-1.c: Likewise.
> * gcc.target/i386/pr109780-2.c: Likewise.
> ---
> gcc/config/i386/i386.cc | 128 +++++++++++++++++----
> gcc/testsuite/g++.target/i386/pr109780-1.C | 72 ++++++++++++
> gcc/testsuite/gcc.target/i386/pr109780-1.c | 14 +++
> gcc/testsuite/gcc.target/i386/pr109780-2.c | 21 ++++
> 4 files changed, 214 insertions(+), 21 deletions(-)
> create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C
> create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index caca74d6dec..b71fd9401ef 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -8084,6 +8084,65 @@ output_probe_stack_range (rtx reg, rtx end)
> return "";
> }
>
> +/* Update the maximum stack slot alignment from memory alignment in
> + PAT. */
> +
> +static void
> +ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
> +{
> + /* This insn may reference stack slot. Update the maximum stack slot
> + alignment. */
> + subrtx_iterator::array_type array;
> + FOR_EACH_SUBRTX (iter, array, pat, ALL)
> + if (MEM_P (*iter))
> + {
> + unsigned int alignment = MEM_ALIGN (*iter);
> + unsigned int *stack_alignment
> + = (unsigned int *) data;
> + if (alignment > *stack_alignment)
> + *stack_alignment = alignment;
> + break;
> + }
> +}
> +
> +/* Find all registers defined with REG. */
> +
> +static void
> +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
> + unsigned int reg, auto_bitmap &worklist)
> +{
> + for (df_ref ref = DF_REG_USE_CHAIN (reg);
> + ref != NULL;
> + ref = DF_REF_NEXT_REG (ref))
> + {
> + if (DF_REF_IS_ARTIFICIAL (ref))
> + continue;
> +
> + rtx_insn *insn = DF_REF_INSN (ref);
> + if (!NONDEBUG_INSN_P (insn))
> + continue;
> +
> + rtx set = single_set (insn);
> + if (!set)
> + continue;
> +
> + rtx src = SET_SRC (set);
> + if (MEM_P (src))
> + continue;
> +
> + rtx dest = SET_DEST (set);
> + if (!REG_P (dest))
> + continue;
> +
> + if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest)))
> + continue;
> +
> + /* Add this register to stack_slot_access. */
> + add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest));
> + bitmap_set_bit (worklist, REGNO (dest));
> + }
> +}
> +
> /* Set stack_frame_required to false if stack frame isn't required.
> Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
> slot used if stack frame is required and CHECK_STACK_SLOT is true. */
> @@ -8102,10 +8161,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> add_to_hard_reg_set (&set_up_by_prologue, Pmode,
> HARD_FRAME_POINTER_REGNUM);
>
> - /* The preferred stack alignment is the minimum stack alignment. */
> - if (stack_alignment > crtl->preferred_stack_boundary)
> - stack_alignment = crtl->preferred_stack_boundary;
> -
> bool require_stack_frame = false;
>
> FOR_EACH_BB_FN (bb, cfun)
> @@ -8117,27 +8172,58 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> set_up_by_prologue))
> {
> require_stack_frame = true;
> -
> - if (check_stack_slot)
> - {
> - /* Find the maximum stack alignment. */
> - subrtx_iterator::array_type array;
> - FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
> - if (MEM_P (*iter)
> - && (reg_mentioned_p (stack_pointer_rtx,
> - *iter)
> - || reg_mentioned_p (frame_pointer_rtx,
> - *iter)))
> - {
> - unsigned int alignment = MEM_ALIGN (*iter);
> - if (alignment > stack_alignment)
> - stack_alignment = alignment;
> - }
> - }
> + break;
> }
> }
>
> cfun->machine->stack_frame_required = require_stack_frame;
> +
> + /* Stop if we don't need to check stack slot. */
> + if (!check_stack_slot)
> + return;
> +
> + /* The preferred stack alignment is the minimum stack alignment. */
> + if (stack_alignment > crtl->preferred_stack_boundary)
> + stack_alignment = crtl->preferred_stack_boundary;
> +
> + HARD_REG_SET stack_slot_access;
> + CLEAR_HARD_REG_SET (stack_slot_access);
> +
> + /* Stack slot can be accessed by stack pointer, frame pointer or
> + registers defined by stack pointer or frame pointer. */
> + auto_bitmap worklist;
> + add_to_hard_reg_set (&stack_slot_access, Pmode,
> + STACK_POINTER_REGNUM);
> + bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
> + if (frame_pointer_needed)
> + {
> + add_to_hard_reg_set (&stack_slot_access, Pmode,
> + HARD_FRAME_POINTER_REGNUM);
> + bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
> + }
> + unsigned int reg;
> + do
> + {
> + reg = bitmap_clear_first_set_bit (worklist);
> + ix86_find_all_reg_use (stack_slot_access, reg, worklist);
> + }
> + while (!bitmap_empty_p (worklist));
> +
> + hard_reg_set_iterator hrsi;
> + EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
> + for (df_ref ref = DF_REG_USE_CHAIN (reg);
> + ref != NULL;
> + ref = DF_REF_NEXT_REG (ref))
> + {
> + if (DF_REF_IS_ARTIFICIAL (ref))
> + continue;
> +
> + rtx_insn *insn = DF_REF_INSN (ref);
> + if (!NONDEBUG_INSN_P (insn))
> + continue;
> + note_stores (insn, ix86_update_stack_alignment,
> + &stack_alignment);
> + }
> }
>
> /* Finalize stack_realign_needed and frame_pointer_needed flags, which
> diff --git a/gcc/testsuite/g++.target/i386/pr109780-1.C b/gcc/testsuite/g++.target/i386/pr109780-1.C
> new file mode 100644
> index 00000000000..7e3eabdec94
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr109780-1.C
> @@ -0,0 +1,72 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target c++17 } */
> +/* { dg-options "-O2 -mavx2 -mtune=haswell" } */
> +
> +template <typename _Tp> struct remove_reference {
> + using type = __remove_reference(_Tp);
> +};
> +template <typename T> struct MaybeStorageBase {
> + T val;
> + struct Union {
> + ~Union();
> + } mStorage;
> +};
> +template <typename T> struct MaybeStorage : MaybeStorageBase<T> {
> + char mIsSome;
> +};
> +template <typename T, typename U = typename remove_reference<T>::type>
> +constexpr MaybeStorage<U> Some(T &&);
> +template <typename T, typename U> constexpr MaybeStorage<U> Some(T &&aValue) {
> + return {aValue};
> +}
> +template <class> struct Span {
> + int operator[](long idx) {
> + int *__trans_tmp_4;
> + if (__builtin_expect(idx, 0))
> + *(int *)__null = false;
> + __trans_tmp_4 = storage_.data();
> + return __trans_tmp_4[idx];
> + }
> + struct {
> + int *data() { return data_; }
> + int *data_;
> + } storage_;
> +};
> +struct Variant {
> + template <typename RefT> Variant(RefT) {}
> +};
> +long from_i, from___trans_tmp_9;
> +namespace js::intl {
> +struct DecimalNumber {
> + Variant string_;
> + unsigned long significandStart_;
> + unsigned long significandEnd_;
> + bool zero_ = false;
> + bool negative_;
> + template <typename CharT> DecimalNumber(CharT string) : string_(string) {}
> + template <typename CharT>
> + static MaybeStorage<DecimalNumber> from(Span<const CharT>);
> + void from();
> +};
> +} // namespace js::intl
> +void js::intl::DecimalNumber::from() {
> + Span<const char16_t> __trans_tmp_3;
> + from(__trans_tmp_3);
> +}
> +template <typename CharT>
> +MaybeStorage<js::intl::DecimalNumber>
> +js::intl::DecimalNumber::from(Span<const CharT> chars) {
> + DecimalNumber number(chars);
> + if (auto ch = chars[from_i]) {
> + from_i++;
> + number.negative_ = ch == '-';
> + }
> + while (from___trans_tmp_9 && chars[from_i])
> + ;
> + if (chars[from_i])
> + while (chars[from_i - 1])
> + number.zero_ = true;
> + return Some(number);
> +}
> +
> +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr109780-1.c b/gcc/testsuite/gcc.target/i386/pr109780-1.c
> new file mode 100644
> index 00000000000..6b06947f2a5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr109780-1.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=skylake" } */
> +
> +char perm[64];
> +
> +void
> +__attribute__((noipa))
> +foo (int n)
> +{
> + for (int i = 0; i < n; ++i)
> + perm[i] = i;
> +}
> +
> +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr109780-2.c b/gcc/testsuite/gcc.target/i386/pr109780-2.c
> new file mode 100644
> index 00000000000..152da06c6ad
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr109780-2.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=skylake" } */
> +
> +#define N 9
> +
> +void
> +f (double x, double y, double *res)
> +{
> + y = -y;
> + for (int i = 0; i < N; ++i)
> + {
> + double tmp = y;
> + y = x;
> + x = tmp;
> + res[i] = i;
> + }
> + res[N] = y * y;
> + res[N + 1] = x;
> +}
> +
> +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
> --
> 2.41.0
>
prev parent reply other threads:[~2023-07-25 11:28 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-07-24 20:35 H.J. Lu
2023-07-25 11:27 ` Richard Biener [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAFiYyc3TDOPdfff041jg8jaFtx=R7Obv518AtQr=SAH3qiEUVA@mail.gmail.com' \
--to=richard.guenther@gmail.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=hjl.tools@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).