From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 7922) id 3460F3858D1E; Tue, 25 Apr 2023 10:44:57 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 3460F3858D1E DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1682419497; bh=W8eZF1FQp9hsaTOpBqbY/ATjq2o3McTfrAy35OkWZqM=; h=From:To:Subject:Date:From; b=RvcR/PbHntSVqnk9AVxitbVJinCrCue9AYI4oYdqAEqUBlQ7ayAUK9YHp/jz9hgF3 a+ogVNMnfY4PJwJzdBkK5HmI2wei3C4VoyDYM/fz+C9opG9K9fyiMA6nENtqobY43T X+YIlVqr80KPSM6Stl78nTHMuqsVGOL88QDuyiks= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Victor Do Nascimento To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-215] aarch64: Leveraging the use of STP instruction for vec_duplicate X-Act-Checkin: gcc X-Git-Author: Victor Do Nascimento X-Git-Refname: refs/heads/master X-Git-Oldrev: a024ac7bca9b9de1d2e0c19d4bb11df293e27a7d X-Git-Newrev: 85279b0bddc1c5a7d181e2168e26ded354b21f32 Message-Id: <20230425104457.3460F3858D1E@sourceware.org> Date: Tue, 25 Apr 2023 10:44:57 +0000 (GMT) List-Id: https://gcc.gnu.org/g:85279b0bddc1c5a7d181e2168e26ded354b21f32 commit r14-215-g85279b0bddc1c5a7d181e2168e26ded354b21f32 Author: Victor Do Nascimento Date: Tue Apr 25 10:57:00 2023 +0100 aarch64: Leveraging the use of STP instruction for vec_duplicate The backend pattern for storing a pair of identical values in 32 and 64-bit modes with the machine instruction STP was missing, and multiple instructions were needed to reproduce this behavior as a result of failed RTL pattern match in combine pass. For the test case: typedef long long v2di __attribute__((vector_size (16))); typedef int v2si __attribute__((vector_size (8))); void foo (v2di *x, long long a) { v2di tmp = {a, a}; *x = tmp; } void foo2 (v2si *x, int a) { v2si tmp = {a, a}; *x = tmp; } at -O2 on aarch64 gives: foo: stp x1, x1, [x0] ret foo2: stp w1, w1, [x0] ret instead of: foo: dup v0.2d, x1 str q0, [x0] ret foo2: dup v0.2s, w1 str d0, [x0] ret Bootstrapped and regtested on aarch64-none-linux-gnu. gcc/ * config/aarch64/aarch64-simd.md(aarch64_simd_stp): New. * config/aarch64/constraints.md: Make "Umn" relaxed memory constraint. * config/aarch64/iterators.md(ldpstp_vel_sz): New. gcc/testsuite/ * gcc.target/aarch64/stp_vec_dup_32_64-1.c: New. Diff: --- gcc/config/aarch64/aarch64-simd.md | 10 ++++ gcc/config/aarch64/constraints.md | 2 +- gcc/config/aarch64/iterators.md | 3 ++ .../gcc.target/aarch64/stp_vec_dup_32_64-1.c | 57 ++++++++++++++++++++++ 4 files changed, 71 insertions(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 9f2fce6f033..cfad812658f 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -257,6 +257,16 @@ [(set_attr "type" "neon_stp")] ) +(define_insn "aarch64_simd_stp" + [(set (match_operand:VP_2E 0 "aarch64_mem_pair_lanes_operand" "=Umn,Umn") + (vec_duplicate:VP_2E (match_operand: 1 "register_operand" "w,r")))] + "TARGET_SIMD" + "@ + stp\\t%1, %1, %y0 + stp\\t%1, %1, %y0" + [(set_attr "type" "neon_stp, store_")] +) + (define_insn "load_pair" [(set (match_operand:VQ 0 "register_operand" "=w") (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump")) diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md index 5b20abc27e5..6df1dbec2a8 100644 --- a/gcc/config/aarch64/constraints.md +++ b/gcc/config/aarch64/constraints.md @@ -287,7 +287,7 @@ ;; Used for storing or loading pairs in an AdvSIMD register using an STP/LDP ;; as a vector-concat. The address mode uses the same constraints as if it ;; were for a single value. -(define_memory_constraint "Umn" +(define_relaxed_memory_constraint "Umn" "@internal A memory address suitable for a load/store pair operation." (and (match_code "mem") diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 13a7e89777d..1d0b4822102 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1020,6 +1020,9 @@ ;; Likewise for load/store pair. (define_mode_attr ldpstp_sz [(SI "8") (DI "16")]) +;; Size of element access for STP/LDP-generated vectors. +(define_mode_attr ldpstp_vel_sz [(V2SI "8") (V2SF "8") (V2DI "16") (V2DF "16")]) + ;; For inequal width int to float conversion (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")]) (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")]) diff --git a/gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c b/gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c new file mode 100644 index 00000000000..fc2c1ea39e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c @@ -0,0 +1,57 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +typedef long long v2di __attribute__((vector_size (16))); +typedef int v2si __attribute__((vector_size (8))); + +#define TESTV2DI(lab, idx) \ + void \ + stpv2di_##lab (v2di *x, long long a) \ + { \ + v2di tmp = {a, a}; \ + x[idx] = tmp; \ + } + + +#define TESTV2SI(lab, idx) \ + void \ + stpv2si_##lab (v2si *x, int a) \ + { \ + v2si tmp = {a, a}; \ + x[idx] = tmp; \ + } \ + +/* Core test, no imm assembler offset: */ + +TESTV2SI(0, 0) +TESTV2DI(0, 0) +/* { dg-final { scan-assembler {\s+stp\t(w[0-9]+), \1, \[x[0-9]+\]} } } */ +/* { dg-final { scan-assembler {\s+stp\t(x[0-9]+), \1, \[x[0-9]+\]} } } */ + +/* Lower offset bounds: */ + +/* Vaid offsets: */ +TESTV2SI(1, -32) +TESTV2DI(1, -32) +/* { dg-final { scan-assembler {\s+stp\t(w[0-9]+), \1, \[x[0-9]+, -256\]} } } */ +/* { dg-final { scan-assembler {\s+stp\t(x[0-9]+), \1, \[x[0-9]+, -512\]} } } */ +/* Invalid offsets: */ +TESTV2SI(2, -33) +TESTV2DI(2, -33) +/* { dg-final { scan-assembler-not {\s+stp\t(w[0-9]+), \1, \[x[0-9]+, -264\]} } } */ +/* { dg-final { scan-assembler-not {\s+stp\t(x[0-9]+), \1, \[x[0-9]+, -528\]} } } */ + +/* Upper offset bounds: */ + +/* Valid offsets: */ +TESTV2SI(3, 31) +TESTV2DI(3, 31) +/* { dg-final { scan-assembler {\s+stp\t(w[0-9]+), \1, \[x[0-9]+, 248\]} } } */ +/* { dg-final { scan-assembler {\s+stp\t(x[0-9]+), \1, \[x[0-9]+, 496\]} } } */ +/* Invalid offsets: */ +TESTV2SI(4, 32) +TESTV2DI(4, 32) +/* { dg-final { scan-assembler-not {\s+stp\t(w[0-9]+), \1, \[x[0-9]+, 256\]} } } */ +/* { dg-final { scan-assembler-not {\s+stp\t(x[0-9]+), \1, \[x[0-9]+, 512\]} } } */ + +