From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-lj1-x235.google.com (mail-lj1-x235.google.com [IPv6:2a00:1450:4864:20::235]) by sourceware.org (Postfix) with ESMTPS id 325AD3858D32 for ; Mon, 10 Jul 2023 10:32:25 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 325AD3858D32 Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=gmail.com Received: by mail-lj1-x235.google.com with SMTP id 38308e7fff4ca-2b6b98ac328so63493971fa.0 for ; Mon, 10 Jul 2023 03:32:25 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20221208; t=1688985144; x=1691577144; h=content-transfer-encoding:cc:to:subject:message-id:date:from :in-reply-to:references:mime-version:from:to:cc:subject:date :message-id:reply-to; bh=YjUitv29oJVUN6nH3PHpdqetB1DtGDbJrCxXl+nbsZI=; b=l4KN4MMH7vF82Iu4mOvU51Khk911m9No+DVkII5Sm1ITVMpXgsR5VKr7HV1+Si12QB 9U5UAoudqxw9BDMzentTPU+B0B5Ag672EXTrWip/eQyHJrWr3Mgmw8FiZeos2XuI3xAK Xsg0OivWzyXDFdFTfBH5zIGbwLYuNwc110uMdbr8/02kSWveV3KhR1raf2UsPtH9D+WS 3uZA7q6PBb7MQqbeH6PdPaTDTqnbPIKj/lG2Na+XDjbZ+ys1VWIKSI03iMF2JrtU1zwG 8bR7xT/Hir987uwUAe3EpOsSNEyEvoNz/iLmjP3aBQLBDvmzmGN5UlaorQ1tPV+05URg 1xGw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1688985144; x=1691577144; h=content-transfer-encoding:cc:to:subject:message-id:date:from :in-reply-to:references:mime-version:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=YjUitv29oJVUN6nH3PHpdqetB1DtGDbJrCxXl+nbsZI=; b=K5nwFh+zT5ZYlAGzbAHu8/mOHs+MxbUIY76GbC2BIAFyGpPXxAA2H1FzWs0WcJzLZu gtoOeznsznw+7zQupQ/7was9sBMTqVzaKNFS2MNhc8jdbupKY6yrC336xtCJlXnc4l6q 675Z7Xw59y60JlwxKU5t5pegROHX0e22tdYyPCcOXcUDewxsqDHS4eF/Sc7luKzMALzj JIZ0/oobmt/QFSsBcWZnvoYTqORR1qGq4zv37cz+f7KeVya8APr0KSqZOFLZmOg/rNWv IF39V98bjYdCS0SkweDkDWc4xhI2h89bFAHzLmYxnnjKa5YO7sLWVfMAjaSjihVbLi1a MX8w== X-Gm-Message-State: ABy/qLZ5h5yrlkfRLfayUgfd7EqpKDQgZ2YIViD6D3i8yJTk+T9DAIT0 JZlf+odUsznlXeDi8FgYKdo1JkSpT0xJFkTBGHmhpl5+0bw= X-Google-Smtp-Source: APBJJlGm4Qbq3bCrC04h2QFRjIrRYS3lA6ogJfFf0nKdKIXQ1Tz/B/XWIhAt9cNePEKszY3EMk33y9YD1aZMsQNUXhk= X-Received: by 2002:a2e:9104:0:b0:2b6:e7c7:b039 with SMTP id m4-20020a2e9104000000b002b6e7c7b039mr9425070ljg.28.1688985143371; Mon, 10 Jul 2023 03:32:23 -0700 (PDT) MIME-Version: 1.0 References: <20230707151336.691534-1-hjl.tools@gmail.com> In-Reply-To: <20230707151336.691534-1-hjl.tools@gmail.com> From: Richard Biener Date: Mon, 10 Jul 2023 12:32:03 +0200 Message-ID: Subject: Re: [PATCH v2] x86: Properly find the maximum stack slot alignment To: "H.J. Lu" Cc: gcc-patches@gcc.gnu.org Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Spam-Status: No, score=-7.7 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,FREEMAIL_FROM,GIT_PATCH_0,KAM_SHORT,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,TXREP,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: On Fri, Jul 7, 2023 at 5:14=E2=80=AFPM H.J. Lu via Gcc-patches wrote: > > Don't assume that stack slots can only be accessed by stack or frame > registers. We first find all registers defined by stack or frame > registers. Then check memory accesses by such registers, including > stack and frame registers. > > gcc/ > > PR target/109780 > * config/i386/i386.cc (ix86_update_stack_alignment): New. > (ix86_find_all_reg_use): Likewise. > (ix86_find_max_used_stack_alignment): Also check memory accesses > from registers defined by stack or frame registers. > > gcc/testsuite/ > > PR target/109780 > * g++.target/i386/pr109780-1.C: New test. > * gcc.target/i386/pr109780-1.c: Likewise. > * gcc.target/i386/pr109780-2.c: Likewise. > --- > gcc/config/i386/i386.cc | 120 +++++++++++++++++---- > gcc/testsuite/g++.target/i386/pr109780-1.C | 72 +++++++++++++ > gcc/testsuite/gcc.target/i386/pr109780-1.c | 14 +++ > gcc/testsuite/gcc.target/i386/pr109780-2.c | 21 ++++ > 4 files changed, 206 insertions(+), 21 deletions(-) > create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C > create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index caca74d6dec..27f349b0ccb 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -8084,6 +8084,63 @@ output_probe_stack_range (rtx reg, rtx end) > return ""; > } > > +/* Update the maximum stack slot alignment from memory alignment in > + PAT. */ > + > +static void > +ix86_update_stack_alignment (rtx, const_rtx pat, void *data) > +{ > + /* This insn may reference stack slot. Update the maximum stack slot > + alignment. */ > + subrtx_iterator::array_type array; > + FOR_EACH_SUBRTX (iter, array, pat, ALL) > + if (MEM_P (*iter)) > + { > + unsigned int alignment =3D MEM_ALIGN (*iter); > + unsigned int *stack_alignment > + =3D (unsigned int *) data; > + if (alignment > *stack_alignment) > + *stack_alignment =3D alignment; > + break; > + } > +} > + > +/* Find all registers defined with REG. */ > + > +static void > +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access, int reg) > +{ > + for (df_ref ref =3D DF_REG_USE_CHAIN (reg); > + ref !=3D NULL; > + ref =3D DF_REF_NEXT_REG (ref)) > + { > + if (DF_REF_IS_ARTIFICIAL (ref)) > + continue; > + > + rtx_insn *insn =3D DF_REF_INSN (ref); > + if (!NONDEBUG_INSN_P (insn)) > + continue; > + > + rtx set =3D single_set (insn); > + if (!set) > + continue; > + > + rtx src =3D SET_SRC (set); > + if (MEM_P (src)) > + continue; > + > + rtx dest =3D SET_DEST (set); > + if (!REG_P (dest)) > + continue; > + > + if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest))) > + continue; > + > + /* Add this register to stack_slot_access. */ > + add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest)); > + } > +} > + > /* Set stack_frame_required to false if stack frame isn't required. > Update STACK_ALIGNMENT to the largest alignment, in bits, of stack > slot used if stack frame is required and CHECK_STACK_SLOT is true. *= / > @@ -8102,10 +8159,6 @@ ix86_find_max_used_stack_alignment (unsigned int &= stack_alignment, > add_to_hard_reg_set (&set_up_by_prologue, Pmode, > HARD_FRAME_POINTER_REGNUM); > > - /* The preferred stack alignment is the minimum stack alignment. */ > - if (stack_alignment > crtl->preferred_stack_boundary) > - stack_alignment =3D crtl->preferred_stack_boundary; > - > bool require_stack_frame =3D false; > > FOR_EACH_BB_FN (bb, cfun) > @@ -8117,27 +8170,52 @@ ix86_find_max_used_stack_alignment (unsigned int = &stack_alignment, > set_up_by_prologue)) > { > require_stack_frame =3D true; > - > - if (check_stack_slot) > - { > - /* Find the maximum stack alignment. */ > - subrtx_iterator::array_type array; > - FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) > - if (MEM_P (*iter) > - && (reg_mentioned_p (stack_pointer_rtx, > - *iter) > - || reg_mentioned_p (frame_pointer_rtx, > - *iter))) > - { > - unsigned int alignment =3D MEM_ALIGN (*iter); > - if (alignment > stack_alignment) > - stack_alignment =3D alignment; > - } > - } > + break; > } > } > > cfun->machine->stack_frame_required =3D require_stack_frame; > + > + /* Stop if we don't need to check stack slot. */ > + if (!check_stack_slot) > + return; > + > + /* The preferred stack alignment is the minimum stack alignment. */ > + if (stack_alignment > crtl->preferred_stack_boundary) > + stack_alignment =3D crtl->preferred_stack_boundary; > + > + HARD_REG_SET stack_slot_access; > + CLEAR_HARD_REG_SET (stack_slot_access); > + > + /* Stack slot can be accessed by stack pointer, frame pointer or > + registers defined by stack pointer or frame pointer. */ > + add_to_hard_reg_set (&stack_slot_access, Pmode, > + STACK_POINTER_REGNUM); I'm possibly missing the point, but wasn't the purpose of the change to det= ect "indirect" uses of the stack pointer? This now handles two levels, thus *sp and tem =3D sp + 4; *tem, but it doesn't seem to handle tem2 =3D tem + = 4; *tem2? The previous patch did I think, but its compile-time complexity was= n't sound. I think the ix86_find_all_reg_use wants to wrap itself inside sth like auto_bitmap worklist; bitmap_set_bit (worklist, reg); do { int reg =3D bitmap_clear_first_set_bit (worklist); /* it's current processing of reg */ when it adds to stack_slot_access also add to worklist } while (!bitmap_empty_p (worklist)); > + ix86_find_all_reg_use (stack_slot_access, STACK_POINTER_REGNUM); > + if (frame_pointer_needed) > + { > + add_to_hard_reg_set (&stack_slot_access, Pmode, > + HARD_FRAME_POINTER_REGNUM); > + ix86_find_all_reg_use (stack_slot_access, > + HARD_FRAME_POINTER_REGNUM); > + } > + > + for (int i =3D 0; i < FIRST_PSEUDO_REGISTER; i++) > + if (GENERAL_REGNO_P (i) > + && TEST_HARD_REG_BIT (stack_slot_access, i)) You can as well iterate over the stack_slot_access regset here? > + for (df_ref ref =3D DF_REG_USE_CHAIN (i); > + ref !=3D NULL; > + ref =3D DF_REF_NEXT_REG (ref)) > + { > + if (DF_REF_IS_ARTIFICIAL (ref)) > + continue; > + > + rtx_insn *insn =3D DF_REF_INSN (ref); > + if (!NONDEBUG_INSN_P (insn)) > + continue; > + note_stores (insn, ix86_update_stack_alignment, > + &stack_alignment); > + } > } > > /* Finalize stack_realign_needed and frame_pointer_needed flags, which > diff --git a/gcc/testsuite/g++.target/i386/pr109780-1.C b/gcc/testsuite/g= ++.target/i386/pr109780-1.C > new file mode 100644 > index 00000000000..7e3eabdec94 > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/pr109780-1.C > @@ -0,0 +1,72 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target c++17 } */ > +/* { dg-options "-O2 -mavx2 -mtune=3Dhaswell" } */ > + > +template struct remove_reference { > + using type =3D __remove_reference(_Tp); > +}; > +template struct MaybeStorageBase { > + T val; > + struct Union { > + ~Union(); > + } mStorage; > +}; > +template struct MaybeStorage : MaybeStorageBase { > + char mIsSome; > +}; > +template ::type> > +constexpr MaybeStorage Some(T &&); > +template constexpr MaybeStorage Some(T &&aVa= lue) { > + return {aValue}; > +} > +template struct Span { > + int operator[](long idx) { > + int *__trans_tmp_4; > + if (__builtin_expect(idx, 0)) > + *(int *)__null =3D false; > + __trans_tmp_4 =3D storage_.data(); > + return __trans_tmp_4[idx]; > + } > + struct { > + int *data() { return data_; } > + int *data_; > + } storage_; > +}; > +struct Variant { > + template Variant(RefT) {} > +}; > +long from_i, from___trans_tmp_9; > +namespace js::intl { > +struct DecimalNumber { > + Variant string_; > + unsigned long significandStart_; > + unsigned long significandEnd_; > + bool zero_ =3D false; > + bool negative_; > + template DecimalNumber(CharT string) : string_(string= ) {} > + template > + static MaybeStorage from(Span); > + void from(); > +}; > +} // namespace js::intl > +void js::intl::DecimalNumber::from() { > + Span __trans_tmp_3; > + from(__trans_tmp_3); > +} > +template > +MaybeStorage > +js::intl::DecimalNumber::from(Span chars) { > + DecimalNumber number(chars); > + if (auto ch =3D chars[from_i]) { > + from_i++; > + number.negative_ =3D ch =3D=3D '-'; > + } > + while (from___trans_tmp_9 && chars[from_i]) > + ; > + if (chars[from_i]) > + while (chars[from_i - 1]) > + number.zero_ =3D true; > + return Some(number); > +} > + > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" = } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-1.c b/gcc/testsuite/g= cc.target/i386/pr109780-1.c > new file mode 100644 > index 00000000000..6b06947f2a5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr109780-1.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=3Dskylake" } */ > + > +char perm[64]; > + > +void > +__attribute__((noipa)) > +foo (int n) > +{ > + for (int i =3D 0; i < n; ++i) > + perm[i] =3D i; > +} > + > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" = } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-2.c b/gcc/testsuite/g= cc.target/i386/pr109780-2.c > new file mode 100644 > index 00000000000..152da06c6ad > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr109780-2.c > @@ -0,0 +1,21 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=3Dskylake" } */ > + > +#define N 9 > + > +void > +f (double x, double y, double *res) > +{ > + y =3D -y; > + for (int i =3D 0; i < N; ++i) > + { > + double tmp =3D y; > + y =3D x; > + x =3D tmp; > + res[i] =3D i; > + } > + res[N] =3D y * y; > + res[N + 1] =3D x; > +} > + > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" = } } */ > -- > 2.41.0 >