From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id 4318C3858D38; Mon, 23 Jan 2023 14:18:09 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 4318C3858D38 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1674483489; bh=1tUwSfSwc8C37oXaPTa1Cr+m+fEkZQYZmhAGh8JoppI=; h=From:To:Subject:Date:From; b=LWQydS3CjggKSwSvI+uuTJc1Mgl9EoQ5z+FOQhV2MzhN4XRhdA7CmmzWabo3sQ7Em XBygLYwQ5QfRck896dUzNnX7ppUfMei6HnPxqKebDsPCj88Uewx6XCPUq8BqqR+vz3 DGcgDeiL07s0r2TJgEpgykvQ6YtYmg+mDExV2i7w= From: "kungfujesus06 at gmail dot com" To: gcc-bugs@gcc.gnu.org Subject: [Bug c/108498] New: ppc64 big endian generates uninitialized reads with -fstore-merging Date: Mon, 23 Jan 2023 14:18:08 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: new X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: c X-Bugzilla-Version: 12.0 X-Bugzilla-Keywords: X-Bugzilla-Severity: normal X-Bugzilla-Who: kungfujesus06 at gmail dot com X-Bugzilla-Status: UNCONFIRMED X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: bug_id short_desc product version bug_status bug_severity priority component assigned_to reporter target_milestone Message-ID: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 List-Id: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D108498 Bug ID: 108498 Summary: ppc64 big endian generates uninitialized reads with -fstore-merging Product: gcc Version: 12.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c Assignee: unassigned at gcc dot gnu.org Reporter: kungfujesus06 at gmail dot com Target Milestone: --- It seems that populating a bit field of many with store merging enabled produces an access to uninitialized memory. Here's the minimal reproducer: ``` #include #include #define NVFX_FP_MASK_X 1 #define NVFX_FP_MASK_Y 2 #define NVFX_FP_MASK_Z 4 #define NVFX_FP_MASK_W 8 #define NVFX_FP_MASK_ALL 0xf #define NV40_FP_OP_OUT_NONE (1U << 30) #define NVFX_FP_OP_OPCODE_MUL 0x02 #define NVFX_COND_TR 7 #define NVFXSR_NONE 0 #define arith(s,o,d,m,s0,s1,s2) \ nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, -1, \ (d), (m), (s0), (s1), (s2)) struct nvfx_reg { int8_t type; int32_t index; }; struct nvfx_src { struct nvfx_reg reg; uint8_t indirect : 1; uint8_t indirect_reg : 1; uint8_t indirect_swz : 2; uint8_t negate : 1; uint8_t abs : 1; uint8_t swz[4]; }; struct nvfx_insn { uint8_t op; char scale; int8_t unit; uint8_t mask; uint8_t cc_swz[4]; uint8_t sat : 1; uint8_t cc_update : 1; uint8_t cc_update_reg : 1; uint8_t cc_test : 3; uint8_t cc_test_reg : 1; struct nvfx_reg dst; struct nvfx_src src[3]; }; static inline struct nvfx_insn nvfx_insn(uint8_t sat, unsigned op, int unit, struct nvfx_reg dst, unsigned mask, struct nvfx_src s0, struct nvfx_src s1, struct nvfx_src s2) { struct nvfx_insn insn =3D { .op =3D op, .scale =3D 0, .unit =3D unit, .sat =3D sat, .mask =3D mask, .cc_update =3D 0, .cc_update_reg =3D 0, .cc_test =3D NVFX_COND_TR, .cc_test_reg =3D 0, .cc_swz =3D { 0, 1, 2, 3 }, .dst =3D dst, .src =3D {s0, s1, s2} }; return insn; } static inline struct nvfx_reg nvfx_reg(int type, int index) { struct nvfx_reg temp =3D { .type =3D type, .index =3D index, }; return temp; } static inline struct nvfx_src nvfx_src(struct nvfx_reg reg) { struct nvfx_src temp =3D { .reg =3D reg, .abs =3D 0, .negate =3D 0, .swz =3D { 0, 1, 2, 3 }, .indirect =3D 0, }; return temp; } struct nvfx_insn emit_test(void) { const struct nvfx_src none =3D nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); struct nvfx_insn insn; struct nvfx_src src[2]; struct nvfx_reg tmp =3D {0, 1}; int mask, sat, unit =3D 0; int ai =3D -1, ci =3D -1, ii =3D -1; int i; src[0].reg.type =3D 0; src[0].reg.index =3D 2; src[1].reg.type =3D 4; src[1].reg.index =3D 8; return arith(0, MUL, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], src[1= ], none); } int main(void) { struct nvfx_insn ins =3D emit_test(); printf("sat? =3D %d\n", ins.sat); } ``` This should print 0, with -fstore-merging it often prints 1. Valgrind shows it's access unitialized memory. The assembly with that optimization disabl= ed and enabled are in the bug report filed here: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8134=