From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2100) id AAEB9387087B; Sat, 22 Aug 2020 23:29:05 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org AAEB9387087B DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1598138945; bh=9gU7kS0RRZ8ZAHMY0o4Q6D7uDCDHbQeMbykodD/ES/g=; h=From:To:Subject:Date:From; b=uYaYP1aCLWAazFGF5Pl3rQFGuqtHSeJV69PyjwHRsAVrEGf0iGtd3m7wWTm4AnIwd UENw88SJZkx34pBkJzMS9kW6EzyJfl+KuJIb6+0RV0UVfHK203QLtP3Ge3zfYUvwxb g1Csry+muFSY7g6eqbJ9rVil3hBS2an1GPEKYdwk= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Giuliano Belinassi To: gcc-cvs@gcc.gnu.org Subject: [gcc/devel/autopar_devel] x86: Enable FMA in rsqrt2 expander X-Act-Checkin: gcc X-Git-Author: H.J. Lu X-Git-Refname: refs/heads/devel/autopar_devel X-Git-Oldrev: a014f47b3efc284e5e638042f81718cdc2c11ce5 X-Git-Newrev: 7d54540883078fd9faabdab9d39df8f3e5714f69 Message-Id: <20200822232905.AAEB9387087B@sourceware.org> Date: Sat, 22 Aug 2020 23:29:05 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 22 Aug 2020 23:29:05 -0000 https://gcc.gnu.org/g:7d54540883078fd9faabdab9d39df8f3e5714f69 commit 7d54540883078fd9faabdab9d39df8f3e5714f69 Author: H.J. Lu Date: Wed Jan 23 06:33:58 2019 -0800 x86: Enable FMA in rsqrt2 expander Enable FMA in rsqrt2 expander and fold rsqrtv16sf2 expander into rsqrt2 expander which expands to UNSPEC_RSQRT28 for TARGET_AVX512ER. Although it doesn't show performance change in our workloads, FMA can improve other workloads. gcc/ PR target/88713 * config/i386/i386-expand.c (ix86_emit_swsqrtsf): Enable FMA. * config/i386/sse.md (VF_AVX512VL_VF1_128_256): New. (rsqrt2): Replace VF1_128_256 with VF_AVX512VL_VF1_128_256. (rsqrtv16sf2): Removed. gcc/testsuite/ PR target/88713 * gcc.target/i386/pr88713-1.c: New test. * gcc.target/i386/pr88713-2.c: Likewise. Diff: --- gcc/config/i386/i386-expand.c | 18 +++++++++++++----- gcc/config/i386/sse.md | 24 ++++++++++-------------- gcc/testsuite/gcc.target/i386/pr88713-1.c | 13 +++++++++++++ gcc/testsuite/gcc.target/i386/pr88713-2.c | 6 ++++++ 4 files changed, 42 insertions(+), 19 deletions(-) diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index d81dd73f034..49718b7a41c 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -15535,14 +15535,22 @@ void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip) } } + mthree = force_reg (mode, mthree); + /* e0 = x0 * a */ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a))); - /* e1 = e0 * x0 */ - emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0))); - /* e2 = e1 - 3. */ - mthree = force_reg (mode, mthree); - emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree))); + if (TARGET_FMA || TARGET_AVX512F) + emit_insn (gen_rtx_SET (e2, + gen_rtx_FMA (mode, e0, x0, mthree))); + else + { + /* e1 = e0 * x0 */ + emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0))); + + /* e2 = e1 - 3. */ + emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree))); + } mhalf = force_reg (mode, mhalf); if (recip) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 431571a4bc1..d3ad5833e1f 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -326,6 +326,12 @@ [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) +;; AVX512VL SF/DF plus 128- and 256-bit SF vector modes +(define_mode_iterator VF_AVX512VL_VF1_128_256 + [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX512VL") + (V2DF "TARGET_AVX512VL")]) + (define_mode_iterator VF2_AVX512VL [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) @@ -2070,26 +2076,16 @@ (set_attr "mode" "")]) (define_expand "rsqrt2" - [(set (match_operand:VF1_128_256 0 "register_operand") - (unspec:VF1_128_256 - [(match_operand:VF1_128_256 1 "vector_operand")] UNSPEC_RSQRT))] + [(set (match_operand:VF_AVX512VL_VF1_128_256 0 "register_operand") + (unspec:VF_AVX512VL_VF1_128_256 + [(match_operand:VF_AVX512VL_VF1_128_256 1 "vector_operand")] + UNSPEC_RSQRT))] "TARGET_SSE && TARGET_SSE_MATH" { ix86_emit_swsqrtsf (operands[0], operands[1], mode, true); DONE; }) -(define_expand "rsqrtv16sf2" - [(set (match_operand:V16SF 0 "register_operand") - (unspec:V16SF - [(match_operand:V16SF 1 "vector_operand")] - UNSPEC_RSQRT28))] - "TARGET_AVX512ER && TARGET_SSE_MATH" -{ - ix86_emit_swsqrtsf (operands[0], operands[1], V16SFmode, true); - DONE; -}) - (define_insn "_rsqrt2" [(set (match_operand:VF1_128_256 0 "register_operand" "=x") (unspec:VF1_128_256 diff --git a/gcc/testsuite/gcc.target/i386/pr88713-1.c b/gcc/testsuite/gcc.target/i386/pr88713-1.c new file mode 100644 index 00000000000..26a0da57a41 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88713-1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mno-avx512f -mfma" } */ + +extern float sqrtf (float); + +void +rsqrt (float* restrict r, float* restrict a) +{ + for (int i = 0; i < 64; i++) + r[i] = sqrtf(a[i]); +} + +/* { dg-final { scan-assembler "\tvfmadd\[123\]+ps" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr88713-2.c b/gcc/testsuite/gcc.target/i386/pr88713-2.c new file mode 100644 index 00000000000..8b55dfcf924 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88713-2.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -march=skylake-avx512 -mno-fma" } */ + +#include "pr88713-1.c" + +/* { dg-final { scan-assembler "\tvfmadd\[123\]+ps" } } */