From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pf1-x432.google.com (mail-pf1-x432.google.com [IPv6:2607:f8b0:4864:20::432]) by sourceware.org (Postfix) with ESMTPS id C2297385E00A for ; Fri, 7 Jul 2023 15:13:39 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org C2297385E00A Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=gmail.com Received: by mail-pf1-x432.google.com with SMTP id d2e1a72fcca58-666ecf9a081so1588466b3a.2 for ; Fri, 07 Jul 2023 08:13:39 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20221208; t=1688742818; x=1691334818; h=content-transfer-encoding:mime-version:message-id:date:subject:to :from:from:to:cc:subject:date:message-id:reply-to; bh=J32hDmH8ZOVcc37Md6ocznk8/JD0ZoLTtzEeh2F2mJU=; b=pNvDG0Gb2cUKTmRB34/BBy9EGcAa9LyIjpGY/WsssCfbTLG7dKo6m0f/GWb4sthIWo fAM1uoC6fU1jOLqMwcivrk0kF6kMteefbyCQHS+YfvWfLD2tF0Q01bFyhML/9Kqw60Qb aIaed5Jp5TeQrxZoFgrS1GYXLdnglfkNO1RMlhMOEF0qtqS9pPOJ64yOcCBPITBA09pC LzYc/ADOmt9xfLK+jn4IYjP/+pbpFv8WfrXozdTPCM7jSWOhGydQ5Ubva4ywu21KU2cH cW/7F0jq+8DscyA/qAmK5Gl1cNjl/aSxVhTjxpFbNeWL+CxFPLFU0BTK4kPDk8+6/P2V ms9A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1688742818; x=1691334818; h=content-transfer-encoding:mime-version:message-id:date:subject:to :from:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=J32hDmH8ZOVcc37Md6ocznk8/JD0ZoLTtzEeh2F2mJU=; b=KqYn86Ag7amXz6onubmLRtVc5+xLjy/K1E2Osnhc2lxO2ffH6Xi9foIDOHrDuX0Okp XKcDe54DZFK+eOFewQAU2ho/8ezQPCRKnjJyCGMlVkz1Vb8kuv9+LbCzXRdCubNVnHhd HLSVmZE+vfapeLXduDQGfS/doXSxEuZRRAQ3IK1sSEVapZGy/FWowCYJe87Dlm8WBGSn QYFZRd+iYjpvJ1j8jBSFeBboXu1Hud0N/jSBHMZLKFu7Lsa6qFJBJkLpDdyZp/wa+SBS s8hWUix3N8HPHgqx5MuLsLz9j3R29TiLTRgGPKONJlkeFEqfEesprYKVcdEaAWY1I/vX 5zig== X-Gm-Message-State: ABy/qLbRWNlOAQ/k8AEWYpc1zzJoyyCJy228ah0CIZGbwjNxDkwgmsoY UqwvWUiGVr0OVKh5XlwyBBFTG3tEqjY= X-Google-Smtp-Source: APBJJlFN+ME4E7Kh1UKikD0JrjaCFXifWhbDeyz+dBB3d/+bO6fgRwnNAwhGE0tDRmNTCLDiTcITgQ== X-Received: by 2002:aa7:88c6:0:b0:662:5146:c761 with SMTP id k6-20020aa788c6000000b006625146c761mr6172290pff.17.1688742818197; Fri, 07 Jul 2023 08:13:38 -0700 (PDT) Received: from gnu-cfl-3.localdomain ([172.59.160.24]) by smtp.gmail.com with ESMTPSA id q16-20020a62e110000000b00682a27905b9sm3124705pfh.13.2023.07.07.08.13.37 for (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Fri, 07 Jul 2023 08:13:37 -0700 (PDT) Received: from gnu-cfl-3.. (localhost [IPv6:::1]) by gnu-cfl-3.localdomain (Postfix) with ESMTP id A062E740164 for ; Fri, 7 Jul 2023 08:13:36 -0700 (PDT) From: "H.J. Lu" To: gcc-patches@gcc.gnu.org Subject: [PATCH v2] x86: Properly find the maximum stack slot alignment Date: Fri, 7 Jul 2023 08:13:36 -0700 Message-ID: <20230707151336.691534-1-hjl.tools@gmail.com> X-Mailer: git-send-email 2.41.0 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-3025.3 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,FREEMAIL_FROM,GIT_PATCH_0,KAM_SHORT,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,TXREP,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: Don't assume that stack slots can only be accessed by stack or frame registers. We first find all registers defined by stack or frame registers. Then check memory accesses by such registers, including stack and frame registers. gcc/ PR target/109780 * config/i386/i386.cc (ix86_update_stack_alignment): New. (ix86_find_all_reg_use): Likewise. (ix86_find_max_used_stack_alignment): Also check memory accesses from registers defined by stack or frame registers. gcc/testsuite/ PR target/109780 * g++.target/i386/pr109780-1.C: New test. * gcc.target/i386/pr109780-1.c: Likewise. * gcc.target/i386/pr109780-2.c: Likewise. --- gcc/config/i386/i386.cc | 120 +++++++++++++++++---- gcc/testsuite/g++.target/i386/pr109780-1.C | 72 +++++++++++++ gcc/testsuite/gcc.target/i386/pr109780-1.c | 14 +++ gcc/testsuite/gcc.target/i386/pr109780-2.c | 21 ++++ 4 files changed, 206 insertions(+), 21 deletions(-) create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index caca74d6dec..27f349b0ccb 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -8084,6 +8084,63 @@ output_probe_stack_range (rtx reg, rtx end) return ""; } +/* Update the maximum stack slot alignment from memory alignment in + PAT. */ + +static void +ix86_update_stack_alignment (rtx, const_rtx pat, void *data) +{ + /* This insn may reference stack slot. Update the maximum stack slot + alignment. */ + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, pat, ALL) + if (MEM_P (*iter)) + { + unsigned int alignment = MEM_ALIGN (*iter); + unsigned int *stack_alignment + = (unsigned int *) data; + if (alignment > *stack_alignment) + *stack_alignment = alignment; + break; + } +} + +/* Find all registers defined with REG. */ + +static void +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access, int reg) +{ + for (df_ref ref = DF_REG_USE_CHAIN (reg); + ref != NULL; + ref = DF_REF_NEXT_REG (ref)) + { + if (DF_REF_IS_ARTIFICIAL (ref)) + continue; + + rtx_insn *insn = DF_REF_INSN (ref); + if (!NONDEBUG_INSN_P (insn)) + continue; + + rtx set = single_set (insn); + if (!set) + continue; + + rtx src = SET_SRC (set); + if (MEM_P (src)) + continue; + + rtx dest = SET_DEST (set); + if (!REG_P (dest)) + continue; + + if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest))) + continue; + + /* Add this register to stack_slot_access. */ + add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest)); + } +} + /* Set stack_frame_required to false if stack frame isn't required. Update STACK_ALIGNMENT to the largest alignment, in bits, of stack slot used if stack frame is required and CHECK_STACK_SLOT is true. */ @@ -8102,10 +8159,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, add_to_hard_reg_set (&set_up_by_prologue, Pmode, HARD_FRAME_POINTER_REGNUM); - /* The preferred stack alignment is the minimum stack alignment. */ - if (stack_alignment > crtl->preferred_stack_boundary) - stack_alignment = crtl->preferred_stack_boundary; - bool require_stack_frame = false; FOR_EACH_BB_FN (bb, cfun) @@ -8117,27 +8170,52 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, set_up_by_prologue)) { require_stack_frame = true; - - if (check_stack_slot) - { - /* Find the maximum stack alignment. */ - subrtx_iterator::array_type array; - FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) - if (MEM_P (*iter) - && (reg_mentioned_p (stack_pointer_rtx, - *iter) - || reg_mentioned_p (frame_pointer_rtx, - *iter))) - { - unsigned int alignment = MEM_ALIGN (*iter); - if (alignment > stack_alignment) - stack_alignment = alignment; - } - } + break; } } cfun->machine->stack_frame_required = require_stack_frame; + + /* Stop if we don't need to check stack slot. */ + if (!check_stack_slot) + return; + + /* The preferred stack alignment is the minimum stack alignment. */ + if (stack_alignment > crtl->preferred_stack_boundary) + stack_alignment = crtl->preferred_stack_boundary; + + HARD_REG_SET stack_slot_access; + CLEAR_HARD_REG_SET (stack_slot_access); + + /* Stack slot can be accessed by stack pointer, frame pointer or + registers defined by stack pointer or frame pointer. */ + add_to_hard_reg_set (&stack_slot_access, Pmode, + STACK_POINTER_REGNUM); + ix86_find_all_reg_use (stack_slot_access, STACK_POINTER_REGNUM); + if (frame_pointer_needed) + { + add_to_hard_reg_set (&stack_slot_access, Pmode, + HARD_FRAME_POINTER_REGNUM); + ix86_find_all_reg_use (stack_slot_access, + HARD_FRAME_POINTER_REGNUM); + } + + for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (GENERAL_REGNO_P (i) + && TEST_HARD_REG_BIT (stack_slot_access, i)) + for (df_ref ref = DF_REG_USE_CHAIN (i); + ref != NULL; + ref = DF_REF_NEXT_REG (ref)) + { + if (DF_REF_IS_ARTIFICIAL (ref)) + continue; + + rtx_insn *insn = DF_REF_INSN (ref); + if (!NONDEBUG_INSN_P (insn)) + continue; + note_stores (insn, ix86_update_stack_alignment, + &stack_alignment); + } } /* Finalize stack_realign_needed and frame_pointer_needed flags, which diff --git a/gcc/testsuite/g++.target/i386/pr109780-1.C b/gcc/testsuite/g++.target/i386/pr109780-1.C new file mode 100644 index 00000000000..7e3eabdec94 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr109780-1.C @@ -0,0 +1,72 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target c++17 } */ +/* { dg-options "-O2 -mavx2 -mtune=haswell" } */ + +template struct remove_reference { + using type = __remove_reference(_Tp); +}; +template struct MaybeStorageBase { + T val; + struct Union { + ~Union(); + } mStorage; +}; +template struct MaybeStorage : MaybeStorageBase { + char mIsSome; +}; +template ::type> +constexpr MaybeStorage Some(T &&); +template constexpr MaybeStorage Some(T &&aValue) { + return {aValue}; +} +template struct Span { + int operator[](long idx) { + int *__trans_tmp_4; + if (__builtin_expect(idx, 0)) + *(int *)__null = false; + __trans_tmp_4 = storage_.data(); + return __trans_tmp_4[idx]; + } + struct { + int *data() { return data_; } + int *data_; + } storage_; +}; +struct Variant { + template Variant(RefT) {} +}; +long from_i, from___trans_tmp_9; +namespace js::intl { +struct DecimalNumber { + Variant string_; + unsigned long significandStart_; + unsigned long significandEnd_; + bool zero_ = false; + bool negative_; + template DecimalNumber(CharT string) : string_(string) {} + template + static MaybeStorage from(Span); + void from(); +}; +} // namespace js::intl +void js::intl::DecimalNumber::from() { + Span __trans_tmp_3; + from(__trans_tmp_3); +} +template +MaybeStorage +js::intl::DecimalNumber::from(Span chars) { + DecimalNumber number(chars); + if (auto ch = chars[from_i]) { + from_i++; + number.negative_ = ch == '-'; + } + while (from___trans_tmp_9 && chars[from_i]) + ; + if (chars[from_i]) + while (chars[from_i - 1]) + number.zero_ = true; + return Some(number); +} + +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr109780-1.c b/gcc/testsuite/gcc.target/i386/pr109780-1.c new file mode 100644 index 00000000000..6b06947f2a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr109780-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=skylake" } */ + +char perm[64]; + +void +__attribute__((noipa)) +foo (int n) +{ + for (int i = 0; i < n; ++i) + perm[i] = i; +} + +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr109780-2.c b/gcc/testsuite/gcc.target/i386/pr109780-2.c new file mode 100644 index 00000000000..152da06c6ad --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr109780-2.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=skylake" } */ + +#define N 9 + +void +f (double x, double y, double *res) +{ + y = -y; + for (int i = 0; i < N; ++i) + { + double tmp = y; + y = x; + x = tmp; + res[i] = i; + } + res[N] = y * y; + res[N + 1] = x; +} + +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */ -- 2.41.0