public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: "H.J. Lu" <hjl.tools@gmail.com>
To: Richard Biener <richard.guenther@gmail.com>
Cc: gcc-patches@gcc.gnu.org
Subject: Re: [PATCH v2] x86: Properly find the maximum stack slot alignment
Date: Mon, 24 Jul 2023 11:12:27 -0700	[thread overview]
Message-ID: <CAMe9rOoH0RrwMM069mZc2nL7MzbmcNfNU++MFkB8OxA05c7O6Q@mail.gmail.com> (raw)
In-Reply-To: <CAFiYyc2+wo2sj=DFBmNY0r5tg8SYe-B5F4ZVVsWDuki7maM4JA@mail.gmail.com>

On Mon, Jul 10, 2023 at 3:32 AM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Fri, Jul 7, 2023 at 5:14 PM H.J. Lu via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Don't assume that stack slots can only be accessed by stack or frame
> > registers.  We first find all registers defined by stack or frame
> > registers.  Then check memory accesses by such registers, including
> > stack and frame registers.
> >
> > gcc/
> >
> >         PR target/109780
> >         * config/i386/i386.cc (ix86_update_stack_alignment): New.
> >         (ix86_find_all_reg_use): Likewise.
> >         (ix86_find_max_used_stack_alignment): Also check memory accesses
> >         from registers defined by stack or frame registers.
> >
> > gcc/testsuite/
> >
> >         PR target/109780
> >         * g++.target/i386/pr109780-1.C: New test.
> >         * gcc.target/i386/pr109780-1.c: Likewise.
> >         * gcc.target/i386/pr109780-2.c: Likewise.
> > ---
> >  gcc/config/i386/i386.cc                    | 120 +++++++++++++++++----
> >  gcc/testsuite/g++.target/i386/pr109780-1.C |  72 +++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr109780-1.c |  14 +++
> >  gcc/testsuite/gcc.target/i386/pr109780-2.c |  21 ++++
> >  4 files changed, 206 insertions(+), 21 deletions(-)
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index caca74d6dec..27f349b0ccb 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -8084,6 +8084,63 @@ output_probe_stack_range (rtx reg, rtx end)
> >    return "";
> >  }
> >
> > +/* Update the maximum stack slot alignment from memory alignment in
> > +   PAT.  */
> > +
> > +static void
> > +ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
> > +{
> > +  /* This insn may reference stack slot.  Update the maximum stack slot
> > +     alignment.  */
> > +  subrtx_iterator::array_type array;
> > +  FOR_EACH_SUBRTX (iter, array, pat, ALL)
> > +    if (MEM_P (*iter))
> > +      {
> > +       unsigned int alignment = MEM_ALIGN (*iter);
> > +       unsigned int *stack_alignment
> > +         = (unsigned int *) data;
> > +       if (alignment > *stack_alignment)
> > +         *stack_alignment = alignment;
> > +       break;
> > +      }
> > +}
> > +
> > +/* Find all registers defined with REG.  */
> > +
> > +static void
> > +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access, int reg)
> > +{
> > +  for (df_ref ref = DF_REG_USE_CHAIN (reg);
> > +       ref != NULL;
> > +       ref = DF_REF_NEXT_REG (ref))
> > +    {
> > +      if (DF_REF_IS_ARTIFICIAL (ref))
> > +       continue;
> > +
> > +      rtx_insn *insn = DF_REF_INSN (ref);
> > +      if (!NONDEBUG_INSN_P (insn))
> > +       continue;
> > +
> > +      rtx set = single_set (insn);
> > +      if (!set)
> > +       continue;
> > +
> > +      rtx src = SET_SRC (set);
> > +      if (MEM_P (src))
> > +       continue;
> > +
> > +      rtx dest = SET_DEST (set);
> > +      if (!REG_P (dest))
> > +       continue;
> > +
> > +      if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest)))
> > +       continue;
> > +
> > +      /* Add this register to stack_slot_access.  */
> > +      add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest));
> > +    }
> > +}
> > +
> >  /* Set stack_frame_required to false if stack frame isn't required.
> >     Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
> >     slot used if stack frame is required and CHECK_STACK_SLOT is true.  */
> > @@ -8102,10 +8159,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> >    add_to_hard_reg_set (&set_up_by_prologue, Pmode,
> >                        HARD_FRAME_POINTER_REGNUM);
> >
> > -  /* The preferred stack alignment is the minimum stack alignment.  */
> > -  if (stack_alignment > crtl->preferred_stack_boundary)
> > -    stack_alignment = crtl->preferred_stack_boundary;
> > -
> >    bool require_stack_frame = false;
> >
> >    FOR_EACH_BB_FN (bb, cfun)
> > @@ -8117,27 +8170,52 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> >                                        set_up_by_prologue))
> >           {
> >             require_stack_frame = true;
> > -
> > -           if (check_stack_slot)
> > -             {
> > -               /* Find the maximum stack alignment.  */
> > -               subrtx_iterator::array_type array;
> > -               FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
> > -                 if (MEM_P (*iter)
> > -                     && (reg_mentioned_p (stack_pointer_rtx,
> > -                                          *iter)
> > -                         || reg_mentioned_p (frame_pointer_rtx,
> > -                                             *iter)))
> > -                   {
> > -                     unsigned int alignment = MEM_ALIGN (*iter);
> > -                     if (alignment > stack_alignment)
> > -                       stack_alignment = alignment;
> > -                   }
> > -             }
> > +           break;
> >           }
> >      }
> >
> >    cfun->machine->stack_frame_required = require_stack_frame;
> > +
> > +  /* Stop if we don't need to check stack slot.  */
> > +  if (!check_stack_slot)
> > +    return;
> > +
> > +  /* The preferred stack alignment is the minimum stack alignment.  */
> > +  if (stack_alignment > crtl->preferred_stack_boundary)
> > +    stack_alignment = crtl->preferred_stack_boundary;
> > +
> > +  HARD_REG_SET stack_slot_access;
> > +  CLEAR_HARD_REG_SET (stack_slot_access);
> > +
> > +  /* Stack slot can be accessed by stack pointer, frame pointer or
> > +     registers defined by stack pointer or frame pointer.  */
> > +  add_to_hard_reg_set (&stack_slot_access, Pmode,
> > +                      STACK_POINTER_REGNUM);
>
> I'm possibly missing the point, but wasn't the purpose of the change to detect
> "indirect" uses of the stack pointer?  This now handles two levels, thus
> *sp and tem = sp + 4; *tem, but it doesn't seem to handle tem2 = tem + 4;
> *tem2?  The previous patch did I think, but its compile-time complexity wasn't
> sound.
>
> I think the ix86_find_all_reg_use wants to wrap itself inside sth like
>
>   auto_bitmap worklist;
>
>   bitmap_set_bit (worklist, reg);
>   do
>      {
>         int reg = bitmap_clear_first_set_bit (worklist);
>         /* it's current processing of reg */
>         when it adds to stack_slot_access also add to worklist
>      }
>   while (!bitmap_empty_p (worklist));

Will fix.

> > +  ix86_find_all_reg_use (stack_slot_access, STACK_POINTER_REGNUM);
> > +  if (frame_pointer_needed)
> > +    {
> > +      add_to_hard_reg_set (&stack_slot_access, Pmode,
> > +                          HARD_FRAME_POINTER_REGNUM);
> > +      ix86_find_all_reg_use (stack_slot_access,
> > +                            HARD_FRAME_POINTER_REGNUM);
> > +    }
> > +
> > +  for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> > +    if (GENERAL_REGNO_P (i)
> > +       && TEST_HARD_REG_BIT (stack_slot_access, i))
>
> You can as well iterate over the stack_slot_access regset here?

Will fix.

Thanks.

> > +      for (df_ref ref = DF_REG_USE_CHAIN (i);
> > +          ref != NULL;
> > +          ref = DF_REF_NEXT_REG (ref))
> > +       {
> > +         if (DF_REF_IS_ARTIFICIAL (ref))
> > +           continue;
> > +
> > +         rtx_insn *insn = DF_REF_INSN (ref);
> > +         if (!NONDEBUG_INSN_P (insn))
> > +           continue;
> > +         note_stores (insn, ix86_update_stack_alignment,
> > +                      &stack_alignment);
> > +       }
> >  }
> >
> >  /* Finalize stack_realign_needed and frame_pointer_needed flags, which
> > diff --git a/gcc/testsuite/g++.target/i386/pr109780-1.C b/gcc/testsuite/g++.target/i386/pr109780-1.C
> > new file mode 100644
> > index 00000000000..7e3eabdec94
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr109780-1.C
> > @@ -0,0 +1,72 @@
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target c++17 } */
> > +/* { dg-options "-O2 -mavx2 -mtune=haswell" } */
> > +
> > +template <typename _Tp> struct remove_reference {
> > +  using type = __remove_reference(_Tp);
> > +};
> > +template <typename T> struct MaybeStorageBase {
> > +  T val;
> > +  struct Union {
> > +    ~Union();
> > +  } mStorage;
> > +};
> > +template <typename T> struct MaybeStorage : MaybeStorageBase<T> {
> > +  char mIsSome;
> > +};
> > +template <typename T, typename U = typename remove_reference<T>::type>
> > +constexpr MaybeStorage<U> Some(T &&);
> > +template <typename T, typename U> constexpr MaybeStorage<U> Some(T &&aValue) {
> > +  return {aValue};
> > +}
> > +template <class> struct Span {
> > +  int operator[](long idx) {
> > +    int *__trans_tmp_4;
> > +    if (__builtin_expect(idx, 0))
> > +      *(int *)__null = false;
> > +    __trans_tmp_4 = storage_.data();
> > +    return __trans_tmp_4[idx];
> > +  }
> > +  struct {
> > +    int *data() { return data_; }
> > +    int *data_;
> > +  } storage_;
> > +};
> > +struct Variant {
> > +  template <typename RefT> Variant(RefT) {}
> > +};
> > +long from_i, from___trans_tmp_9;
> > +namespace js::intl {
> > +struct DecimalNumber {
> > +  Variant string_;
> > +  unsigned long significandStart_;
> > +  unsigned long significandEnd_;
> > +  bool zero_ = false;
> > +  bool negative_;
> > +  template <typename CharT> DecimalNumber(CharT string) : string_(string) {}
> > +  template <typename CharT>
> > +  static MaybeStorage<DecimalNumber> from(Span<const CharT>);
> > +  void from();
> > +};
> > +} // namespace js::intl
> > +void js::intl::DecimalNumber::from() {
> > +  Span<const char16_t> __trans_tmp_3;
> > +  from(__trans_tmp_3);
> > +}
> > +template <typename CharT>
> > +MaybeStorage<js::intl::DecimalNumber>
> > +js::intl::DecimalNumber::from(Span<const CharT> chars) {
> > +  DecimalNumber number(chars);
> > +  if (auto ch = chars[from_i]) {
> > +    from_i++;
> > +    number.negative_ = ch == '-';
> > +  }
> > +  while (from___trans_tmp_9 && chars[from_i])
> > +    ;
> > +  if (chars[from_i])
> > +    while (chars[from_i - 1])
> > +      number.zero_ = true;
> > +  return Some(number);
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-1.c b/gcc/testsuite/gcc.target/i386/pr109780-1.c
> > new file mode 100644
> > index 00000000000..6b06947f2a5
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr109780-1.c
> > @@ -0,0 +1,14 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -march=skylake" } */
> > +
> > +char perm[64];
> > +
> > +void
> > +__attribute__((noipa))
> > +foo (int n)
> > +{
> > +  for (int i = 0; i < n; ++i)
> > +    perm[i] = i;
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-2.c b/gcc/testsuite/gcc.target/i386/pr109780-2.c
> > new file mode 100644
> > index 00000000000..152da06c6ad
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr109780-2.c
> > @@ -0,0 +1,21 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -march=skylake" } */
> > +
> > +#define N 9
> > +
> > +void
> > +f (double x, double y, double *res)
> > +{
> > +  y = -y;
> > +  for (int i = 0; i < N; ++i)
> > +    {
> > +      double tmp = y;
> > +      y = x;
> > +      x = tmp;
> > +      res[i] = i;
> > +    }
> > +  res[N] = y * y;
> > +  res[N + 1] = x;
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
> > --
> > 2.41.0
> >



-- 
H.J.

      reply	other threads:[~2023-07-24 18:13 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-07 15:13 H.J. Lu
2023-07-10 10:32 ` Richard Biener
2023-07-24 18:12   ` H.J. Lu [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAMe9rOoH0RrwMM069mZc2nL7MzbmcNfNU++MFkB8OxA05c7O6Q@mail.gmail.com \
    --to=hjl.tools@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=richard.guenther@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).