From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-yw1-x1135.google.com (mail-yw1-x1135.google.com [IPv6:2607:f8b0:4864:20::1135]) by sourceware.org (Postfix) with ESMTPS id 26AD43858C5E for ; Mon, 24 Jul 2023 18:13:05 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 26AD43858C5E Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=gmail.com Received: by mail-yw1-x1135.google.com with SMTP id 00721157ae682-584034c706dso10534307b3.1 for ; Mon, 24 Jul 2023 11:13:05 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20221208; t=1690222384; x=1690827184; h=content-transfer-encoding:cc:to:subject:message-id:date:from :in-reply-to:references:mime-version:from:to:cc:subject:date :message-id:reply-to; bh=yjlfQs2YWqFNx1Jg4tEPzYbMX5a9tyCOJJhGgQIGGBo=; b=ACKGAZnKfNIwfI5nbcC3oKuossP2/kosekYca6RfhNlXVJw/11iwApvdjH8PLs86Ua 47SxnYvtXuvLNsVikc3kL15y3fAowV4s/4BLhBp/v8UulbQk0Stc2OipMGsCacemsjzW FhhhbcFr3xNuqoIQb9LsiffxE7J7AePtTOzXSBs4I7tfp3Cn7CbfOEV/+MvdIz948oF/ k8GLjY3gvGgn0b5QiiAnGX6B5ewY0f/Et+4CggB/+13+4iX3Wo7rHzPoBGh6/U4ak1kH dngo0iFxHsvN7KlgRuZ5z1g+wLFN6mVtWTsvChEhZJM7dq8Jbz32MR6iQtwRYAjsg2z4 vLQA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1690222384; x=1690827184; h=content-transfer-encoding:cc:to:subject:message-id:date:from :in-reply-to:references:mime-version:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=yjlfQs2YWqFNx1Jg4tEPzYbMX5a9tyCOJJhGgQIGGBo=; b=fdbP2+wsTr0/wYmZB9llYMMPiAACKtdyyKwrANH9/hiqbsqLP8Y845cWe9RjfJPW0K epYm0xzMS5CjAUlBTNus4GEcqLrok2rsMHcr21do7tW4kq5cwykIxzKwvdio3Hr9ChtD IyZAAlydLxEIKtlZheQiH4Xux+FpHHX9siZkvIxZdfczun6eJpVvVrH4sWTwodlhkzlP cYwDazFWQyOh4aqlqW0z941xoWRPI+fA7Bm0clRwhZBTts9/peh0UF97szneXsk7krFL TBQsIEPSHgt+Hk6RlXSjvgDvRlRfRbbs6Z3szxKggiB62m/A/2C1A84uV7nMPAWjwMBQ fWZw== X-Gm-Message-State: ABy/qLbg4tIjmiT+2rZdjE7eUYwGUfRI1pxrodKtKDJcbLSULsuk98+b OPMeRGkbTMCdSxewMsXP/DlEQsaxn1vb73IRZnY= X-Google-Smtp-Source: APBJJlFXD7YLTk1rwNqJj30n+X24p7J8qaVPugADmahSmmGxkOr9RKqFJdJPKil1PkJEkryVZRJFEfuRNrVV4evtm68= X-Received: by 2002:a0d:c083:0:b0:583:b802:c004 with SMTP id b125-20020a0dc083000000b00583b802c004mr5657378ywd.40.1690222383825; Mon, 24 Jul 2023 11:13:03 -0700 (PDT) MIME-Version: 1.0 References: <20230707151336.691534-1-hjl.tools@gmail.com> In-Reply-To: From: "H.J. Lu" Date: Mon, 24 Jul 2023 11:12:27 -0700 Message-ID: Subject: Re: [PATCH v2] x86: Properly find the maximum stack slot alignment To: Richard Biener Cc: gcc-patches@gcc.gnu.org Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Spam-Status: No, score=-3022.2 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,FREEMAIL_FROM,GIT_PATCH_0,KAM_SHORT,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,TXREP,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: On Mon, Jul 10, 2023 at 3:32=E2=80=AFAM Richard Biener wrote: > > On Fri, Jul 7, 2023 at 5:14=E2=80=AFPM H.J. Lu via Gcc-patches > wrote: > > > > Don't assume that stack slots can only be accessed by stack or frame > > registers. We first find all registers defined by stack or frame > > registers. Then check memory accesses by such registers, including > > stack and frame registers. > > > > gcc/ > > > > PR target/109780 > > * config/i386/i386.cc (ix86_update_stack_alignment): New. > > (ix86_find_all_reg_use): Likewise. > > (ix86_find_max_used_stack_alignment): Also check memory accesse= s > > from registers defined by stack or frame registers. > > > > gcc/testsuite/ > > > > PR target/109780 > > * g++.target/i386/pr109780-1.C: New test. > > * gcc.target/i386/pr109780-1.c: Likewise. > > * gcc.target/i386/pr109780-2.c: Likewise. > > --- > > gcc/config/i386/i386.cc | 120 +++++++++++++++++---- > > gcc/testsuite/g++.target/i386/pr109780-1.C | 72 +++++++++++++ > > gcc/testsuite/gcc.target/i386/pr109780-1.c | 14 +++ > > gcc/testsuite/gcc.target/i386/pr109780-2.c | 21 ++++ > > 4 files changed, 206 insertions(+), 21 deletions(-) > > create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C > > create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c > > > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > > index caca74d6dec..27f349b0ccb 100644 > > --- a/gcc/config/i386/i386.cc > > +++ b/gcc/config/i386/i386.cc > > @@ -8084,6 +8084,63 @@ output_probe_stack_range (rtx reg, rtx end) > > return ""; > > } > > > > +/* Update the maximum stack slot alignment from memory alignment in > > + PAT. */ > > + > > +static void > > +ix86_update_stack_alignment (rtx, const_rtx pat, void *data) > > +{ > > + /* This insn may reference stack slot. Update the maximum stack slo= t > > + alignment. */ > > + subrtx_iterator::array_type array; > > + FOR_EACH_SUBRTX (iter, array, pat, ALL) > > + if (MEM_P (*iter)) > > + { > > + unsigned int alignment =3D MEM_ALIGN (*iter); > > + unsigned int *stack_alignment > > + =3D (unsigned int *) data; > > + if (alignment > *stack_alignment) > > + *stack_alignment =3D alignment; > > + break; > > + } > > +} > > + > > +/* Find all registers defined with REG. */ > > + > > +static void > > +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access, int reg) > > +{ > > + for (df_ref ref =3D DF_REG_USE_CHAIN (reg); > > + ref !=3D NULL; > > + ref =3D DF_REF_NEXT_REG (ref)) > > + { > > + if (DF_REF_IS_ARTIFICIAL (ref)) > > + continue; > > + > > + rtx_insn *insn =3D DF_REF_INSN (ref); > > + if (!NONDEBUG_INSN_P (insn)) > > + continue; > > + > > + rtx set =3D single_set (insn); > > + if (!set) > > + continue; > > + > > + rtx src =3D SET_SRC (set); > > + if (MEM_P (src)) > > + continue; > > + > > + rtx dest =3D SET_DEST (set); > > + if (!REG_P (dest)) > > + continue; > > + > > + if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest))) > > + continue; > > + > > + /* Add this register to stack_slot_access. */ > > + add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest)); > > + } > > +} > > + > > /* Set stack_frame_required to false if stack frame isn't required. > > Update STACK_ALIGNMENT to the largest alignment, in bits, of stack > > slot used if stack frame is required and CHECK_STACK_SLOT is true. = */ > > @@ -8102,10 +8159,6 @@ ix86_find_max_used_stack_alignment (unsigned int= &stack_alignment, > > add_to_hard_reg_set (&set_up_by_prologue, Pmode, > > HARD_FRAME_POINTER_REGNUM); > > > > - /* The preferred stack alignment is the minimum stack alignment. */ > > - if (stack_alignment > crtl->preferred_stack_boundary) > > - stack_alignment =3D crtl->preferred_stack_boundary; > > - > > bool require_stack_frame =3D false; > > > > FOR_EACH_BB_FN (bb, cfun) > > @@ -8117,27 +8170,52 @@ ix86_find_max_used_stack_alignment (unsigned in= t &stack_alignment, > > set_up_by_prologue)) > > { > > require_stack_frame =3D true; > > - > > - if (check_stack_slot) > > - { > > - /* Find the maximum stack alignment. */ > > - subrtx_iterator::array_type array; > > - FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) > > - if (MEM_P (*iter) > > - && (reg_mentioned_p (stack_pointer_rtx, > > - *iter) > > - || reg_mentioned_p (frame_pointer_rtx, > > - *iter))) > > - { > > - unsigned int alignment =3D MEM_ALIGN (*iter); > > - if (alignment > stack_alignment) > > - stack_alignment =3D alignment; > > - } > > - } > > + break; > > } > > } > > > > cfun->machine->stack_frame_required =3D require_stack_frame; > > + > > + /* Stop if we don't need to check stack slot. */ > > + if (!check_stack_slot) > > + return; > > + > > + /* The preferred stack alignment is the minimum stack alignment. */ > > + if (stack_alignment > crtl->preferred_stack_boundary) > > + stack_alignment =3D crtl->preferred_stack_boundary; > > + > > + HARD_REG_SET stack_slot_access; > > + CLEAR_HARD_REG_SET (stack_slot_access); > > + > > + /* Stack slot can be accessed by stack pointer, frame pointer or > > + registers defined by stack pointer or frame pointer. */ > > + add_to_hard_reg_set (&stack_slot_access, Pmode, > > + STACK_POINTER_REGNUM); > > I'm possibly missing the point, but wasn't the purpose of the change to d= etect > "indirect" uses of the stack pointer? This now handles two levels, thus > *sp and tem =3D sp + 4; *tem, but it doesn't seem to handle tem2 =3D tem = + 4; > *tem2? The previous patch did I think, but its compile-time complexity w= asn't > sound. > > I think the ix86_find_all_reg_use wants to wrap itself inside sth like > > auto_bitmap worklist; > > bitmap_set_bit (worklist, reg); > do > { > int reg =3D bitmap_clear_first_set_bit (worklist); > /* it's current processing of reg */ > when it adds to stack_slot_access also add to worklist > } > while (!bitmap_empty_p (worklist)); Will fix. > > + ix86_find_all_reg_use (stack_slot_access, STACK_POINTER_REGNUM); > > + if (frame_pointer_needed) > > + { > > + add_to_hard_reg_set (&stack_slot_access, Pmode, > > + HARD_FRAME_POINTER_REGNUM); > > + ix86_find_all_reg_use (stack_slot_access, > > + HARD_FRAME_POINTER_REGNUM); > > + } > > + > > + for (int i =3D 0; i < FIRST_PSEUDO_REGISTER; i++) > > + if (GENERAL_REGNO_P (i) > > + && TEST_HARD_REG_BIT (stack_slot_access, i)) > > You can as well iterate over the stack_slot_access regset here? Will fix. Thanks. > > + for (df_ref ref =3D DF_REG_USE_CHAIN (i); > > + ref !=3D NULL; > > + ref =3D DF_REF_NEXT_REG (ref)) > > + { > > + if (DF_REF_IS_ARTIFICIAL (ref)) > > + continue; > > + > > + rtx_insn *insn =3D DF_REF_INSN (ref); > > + if (!NONDEBUG_INSN_P (insn)) > > + continue; > > + note_stores (insn, ix86_update_stack_alignment, > > + &stack_alignment); > > + } > > } > > > > /* Finalize stack_realign_needed and frame_pointer_needed flags, which > > diff --git a/gcc/testsuite/g++.target/i386/pr109780-1.C b/gcc/testsuite= /g++.target/i386/pr109780-1.C > > new file mode 100644 > > index 00000000000..7e3eabdec94 > > --- /dev/null > > +++ b/gcc/testsuite/g++.target/i386/pr109780-1.C > > @@ -0,0 +1,72 @@ > > +/* { dg-do compile } */ > > +/* { dg-require-effective-target c++17 } */ > > +/* { dg-options "-O2 -mavx2 -mtune=3Dhaswell" } */ > > + > > +template struct remove_reference { > > + using type =3D __remove_reference(_Tp); > > +}; > > +template struct MaybeStorageBase { > > + T val; > > + struct Union { > > + ~Union(); > > + } mStorage; > > +}; > > +template struct MaybeStorage : MaybeStorageBase { > > + char mIsSome; > > +}; > > +template ::typ= e> > > +constexpr MaybeStorage Some(T &&); > > +template constexpr MaybeStorage Some(T &&a= Value) { > > + return {aValue}; > > +} > > +template struct Span { > > + int operator[](long idx) { > > + int *__trans_tmp_4; > > + if (__builtin_expect(idx, 0)) > > + *(int *)__null =3D false; > > + __trans_tmp_4 =3D storage_.data(); > > + return __trans_tmp_4[idx]; > > + } > > + struct { > > + int *data() { return data_; } > > + int *data_; > > + } storage_; > > +}; > > +struct Variant { > > + template Variant(RefT) {} > > +}; > > +long from_i, from___trans_tmp_9; > > +namespace js::intl { > > +struct DecimalNumber { > > + Variant string_; > > + unsigned long significandStart_; > > + unsigned long significandEnd_; > > + bool zero_ =3D false; > > + bool negative_; > > + template DecimalNumber(CharT string) : string_(stri= ng) {} > > + template > > + static MaybeStorage from(Span); > > + void from(); > > +}; > > +} // namespace js::intl > > +void js::intl::DecimalNumber::from() { > > + Span __trans_tmp_3; > > + from(__trans_tmp_3); > > +} > > +template > > +MaybeStorage > > +js::intl::DecimalNumber::from(Span chars) { > > + DecimalNumber number(chars); > > + if (auto ch =3D chars[from_i]) { > > + from_i++; > > + number.negative_ =3D ch =3D=3D '-'; > > + } > > + while (from___trans_tmp_9 && chars[from_i]) > > + ; > > + if (chars[from_i]) > > + while (chars[from_i - 1]) > > + number.zero_ =3D true; > > + return Some(number); > > +} > > + > > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp= " } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-1.c b/gcc/testsuite= /gcc.target/i386/pr109780-1.c > > new file mode 100644 > > index 00000000000..6b06947f2a5 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr109780-1.c > > @@ -0,0 +1,14 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -march=3Dskylake" } */ > > + > > +char perm[64]; > > + > > +void > > +__attribute__((noipa)) > > +foo (int n) > > +{ > > + for (int i =3D 0; i < n; ++i) > > + perm[i] =3D i; > > +} > > + > > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp= " } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-2.c b/gcc/testsuite= /gcc.target/i386/pr109780-2.c > > new file mode 100644 > > index 00000000000..152da06c6ad > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr109780-2.c > > @@ -0,0 +1,21 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -march=3Dskylake" } */ > > + > > +#define N 9 > > + > > +void > > +f (double x, double y, double *res) > > +{ > > + y =3D -y; > > + for (int i =3D 0; i < N; ++i) > > + { > > + double tmp =3D y; > > + y =3D x; > > + x =3D tmp; > > + res[i] =3D i; > > + } > > + res[N] =3D y * y; > > + res[N + 1] =3D x; > > +} > > + > > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp= " } } */ > > -- > > 2.41.0 > > --=20 H.J.