From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1816) id 604C83857716; Tue, 25 Apr 2023 13:51:20 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 604C83857716 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1682430680; bh=gKZI/UoGdS97PqZBDQdjz0PdHhDZk8Czqd94FI15CMU=; h=From:To:Subject:Date:From; b=NI4uyF8DUbvRFyyJHhCZeIA5sJFLFKWDQs/OCCryr28kHfr9a49/SSIxoVyvFN7k3 tIVDG487IR0uKbnJhwVnJ+RmY0hzFl7DVekHoivyMOyP/r0qyMKVXax5dZdMLsBWmc A3cHfnqL9n1pHzu2U0pz132KFnrghL2p1ztMB5WI= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Kyrylo Tkachov To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-221] aarch64: Implement V2DI, V4SI division optabs for TARGET_SVE X-Act-Checkin: gcc X-Git-Author: Kyrylo Tkachov X-Git-Refname: refs/heads/master X-Git-Oldrev: 784e03f378bb2c330b96459928d0472d38748970 X-Git-Newrev: c69db3ef7f7d82a50f46038aa5457b7c8cc2d643 Message-Id: <20230425135120.604C83857716@sourceware.org> Date: Tue, 25 Apr 2023 13:51:20 +0000 (GMT) List-Id: https://gcc.gnu.org/g:c69db3ef7f7d82a50f46038aa5457b7c8cc2d643 commit r14-221-gc69db3ef7f7d82a50f46038aa5457b7c8cc2d643 Author: Kyrylo Tkachov Date: Tue Apr 25 14:50:32 2023 +0100 aarch64: Implement V2DI,V4SI division optabs for TARGET_SVE Similar to the mulv2di case, we can use SVE instruction to implement the V4SI and V2DI optabs for signed and unsigned integer division. This allows us to generate much cleaner code for the testcase than the current: food: fmov x1, d1 fmov x0, d0 umov x2, v0.d[1] sdiv x0, x0, x1 umov x1, v1.d[1] sdiv x1, x2, x1 fmov d0, x0 ins v0.d[1], x1 ret which now becomes: food: ptrue p0.b, all sdiv z0.d, p0/m, z0.d, z1.d ret Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (div3): New define_expand. * config/aarch64/iterators.md (VQDIV): New mode iterator. (vnx2di): New mode attribute. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve-neon-modes_3.c: New test. Diff: --- gcc/config/aarch64/aarch64-simd.md | 21 ++++++++ gcc/config/aarch64/iterators.md | 5 ++ .../gcc.target/aarch64/sve-neon-modes_3.c | 61 ++++++++++++++++++++++ 3 files changed, 87 insertions(+) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index cfad812658f..5e1b4b18623 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2867,6 +2867,27 @@ [(set_attr "type" "neon_fp_div_")] ) +;; SVE has vector integer divisions, unlike Advanced SIMD. +;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI +;; optabs to the midend. +(define_expand "div3" + [(set (match_operand:VQDIV 0 "register_operand") + (ANY_DIV:VQDIV + (match_operand:VQDIV 1 "register_operand") + (match_operand:VQDIV 2 "register_operand")))] + "TARGET_SVE" + { + machine_mode sve_mode + = aarch64_full_sve_mode (GET_MODE_INNER (mode)).require (); + rtx sve_op0 = simplify_gen_subreg (sve_mode, operands[0], mode, 0); + rtx sve_op1 = simplify_gen_subreg (sve_mode, operands[1], mode, 0); + rtx sve_op2 = simplify_gen_subreg (sve_mode, operands[2], mode, 0); + + emit_insn (gen_div3 (sve_op0, sve_op1, sve_op2)); + DONE; + } +) + (define_insn "neg2" [(set (match_operand:VHSDF 0 "register_operand" "=w") (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 1d0b4822102..861753f677b 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -108,6 +108,9 @@ ;; Copy of the above. (define_mode_iterator DREG2 [V8QI V4HI V4HF V2SI V2SF DF]) +;; Advanced SIMD modes for integer divides. +(define_mode_iterator VQDIV [V4SI V2DI]) + ;; All modes suitable to store/load pair (2 elements) using STP/LDP. (define_mode_iterator VP_2E [V2SI V2SF V2DI V2DF]) @@ -1421,6 +1424,8 @@ (VNx4SI "v4si") (VNx4SF "v4sf") (VNx2DI "v2di") (VNx2DF "v2df")]) +(define_mode_attr vnx [(V4SI "vnx4si") (V2DI "vnx2di")]) + ;; 64-bit container modes the inner or scalar source mode. (define_mode_attr VCOND [(HI "V4HI") (SI "V2SI") (V4HI "V4HI") (V8HI "V4HI") diff --git a/gcc/testsuite/gcc.target/aarch64/sve-neon-modes_3.c b/gcc/testsuite/gcc.target/aarch64/sve-neon-modes_3.c new file mode 100644 index 00000000000..f1e78a83a9c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve-neon-modes_3.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8.2-a+sve" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +typedef long v2di __attribute__((vector_size (16))); +typedef unsigned long v2udi __attribute__((vector_size (16))); +typedef int v4si __attribute__((vector_size (16))); +typedef unsigned int v4usi __attribute__((vector_size (16))); + +/* +** food: +** ptrue p0.b, all +** sdiv z0.d, p0/m, z0.d, z1.d +** ret +*/ + +v2di +food (v2di a, v2di b) +{ + return a / b; +} + +/* +** fooud: +** ptrue p0.b, all +** udiv z0.d, p0/m, z0.d, z1.d +** ret +*/ + +v2udi +fooud (v2udi a, v2udi b) +{ + return a / b; +} + +/* +** foos: +** ptrue p0.b, all +** sdiv z0.s, p0/m, z0.s, z1.s +** ret +*/ + +v4si +foos (v4si a, v4si b) +{ + return a / b; +} + +/* +** foous: +** ptrue p0.b, all +** udiv z0.s, p0/m, z0.s, z1.s +** ret +*/ + +v4usi +foous (v4usi a, v4usi b) +{ + return a / b; +} +