public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][committed] aarch64: Implement V2DI,V4SI division optabs for TARGET_SVE
@ 2023-04-25 13:50 Kyrylo Tkachov
  0 siblings, 0 replies; only message in thread
From: Kyrylo Tkachov @ 2023-04-25 13:50 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 935 bytes --]

Hi all,

Similar to the mulv2di case, we can use SVE instruction to implement the V4SI and V2DI optabs
for signed and unsigned integer division.
This allows us to generate much cleaner code for the testcase than the current:
food:
        fmov    x1, d1
        fmov    x0, d0
        umov    x2, v0.d[1]
        sdiv    x0, x0, x1
        umov    x1, v1.d[1]
        sdiv    x1, x2, x1
        fmov    d0, x0
        ins     v0.d[1], x1
        ret
which now becomes:
food:
        ptrue   p0.b, all
        sdiv    z0.d, p0/m, z0.d, z1.d
        ret

Bootstrapped and tested on aarch64-none-linux-gnu.
Pushing to trunk.
Thanks,
Kyrill

gcc/ChangeLog:

	* config/aarch64/aarch64-simd.md (<su_optab>div<mode>3): New define_expand.
	* config/aarch64/iterators.md (VQDIV): New mode iterator.
	(vnx2di): New mode attribute.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/sve-neon-modes_3.c: New test.

[-- Attachment #2: divsve.patch --]
[-- Type: application/octet-stream, Size: 3539 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 762a6b42d151ab9676493109a13fe24d6932fe36..d2c2ce763987a5841a5eed341aaf584f9ac94a51 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2910,6 +2910,27 @@ (define_insn "*div<mode>3"
   [(set_attr "type" "neon_fp_div_<stype><q>")]
 )
 
+;; SVE has vector integer divisions, unlike Advanced SIMD.
+;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI
+;; optabs to the midend.
+(define_expand "<su_optab>div<mode>3"
+  [(set (match_operand:VQDIV 0 "register_operand")
+	(ANY_DIV:VQDIV
+	  (match_operand:VQDIV 1 "register_operand")
+	  (match_operand:VQDIV 2 "register_operand")))]
+  "TARGET_SVE"
+  {
+    machine_mode sve_mode
+      = aarch64_full_sve_mode (GET_MODE_INNER (<MODE>mode)).require ();
+    rtx sve_op0 = simplify_gen_subreg (sve_mode, operands[0], <MODE>mode, 0);
+    rtx sve_op1 = simplify_gen_subreg (sve_mode, operands[1], <MODE>mode, 0);
+    rtx sve_op2 = simplify_gen_subreg (sve_mode, operands[2], <MODE>mode, 0);
+
+    emit_insn (gen_<su_optab>div<vnx>3 (sve_op0, sve_op1, sve_op2));
+    DONE;
+  }
+)
+
 (define_insn "neg<mode>2"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 886a5325e24a7c666475383573d0c75372420f4a..4ccd89782bab917f183c117ca85c154c841bc69e 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -108,6 +108,9 @@ (define_mode_iterator DREG [V8QI V4HI V4HF V2SI V2SF DF])
 ;; Copy of the above.
 (define_mode_iterator DREG2 [V8QI V4HI V4HF V2SI V2SF DF])
 
+;; Advanced SIMD modes for integer divides.
+(define_mode_iterator VQDIV [V4SI V2DI])
+
 ;; All modes suitable to store/load pair (2 elements) using STP/LDP.
 (define_mode_iterator VP_2E [V2SI V2SF V2DI V2DF])
 
@@ -1412,6 +1415,8 @@ (define_mode_attr v128 [(VNx16QI "v16qi")
 			(VNx4SI  "v4si") (VNx4SF "v4sf")
 			(VNx2DI  "v2di") (VNx2DF "v2df")])
 
+(define_mode_attr vnx [(V4SI "vnx4si") (V2DI "vnx2di")])
+
 ;; 64-bit container modes the inner or scalar source mode.
 (define_mode_attr VCOND [(HI "V4HI") (SI "V2SI")
 			 (V4HI "V4HI") (V8HI "V4HI")
diff --git a/gcc/testsuite/gcc.target/aarch64/sve-neon-modes_3.c b/gcc/testsuite/gcc.target/aarch64/sve-neon-modes_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..f1e78a83a9c26aa9a197ab28a09134746eee5535
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve-neon-modes_3.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-O -march=armv8.2-a+sve" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+typedef long v2di  __attribute__((vector_size (16)));
+typedef unsigned long v2udi  __attribute__((vector_size (16)));
+typedef int v4si  __attribute__((vector_size (16)));
+typedef unsigned int v4usi  __attribute__((vector_size (16)));
+
+/*
+** food:
+**	ptrue	p0.b, all
+**	sdiv	z0.d, p0/m, z0.d, z1.d
+**	ret
+*/
+
+v2di
+food (v2di a, v2di b)
+{
+  return a / b;
+}
+
+/*
+** fooud:
+**	ptrue	p0.b, all
+**	udiv	z0.d, p0/m, z0.d, z1.d
+**	ret
+*/
+
+v2udi
+fooud (v2udi a, v2udi b)
+{
+  return a / b;
+}
+
+/*
+** foos:
+**	ptrue	p0.b, all
+**	sdiv	z0.s, p0/m, z0.s, z1.s
+**	ret
+*/
+
+v4si
+foos (v4si a, v4si b)
+{
+  return a / b;
+}
+
+/*
+** foous:
+**	ptrue	p0.b, all
+**	udiv	z0.s, p0/m, z0.s, z1.s
+**	ret
+*/
+
+v4usi
+foous (v4usi a, v4usi b)
+{
+  return a / b;
+}
+

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-04-25 13:50 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-25 13:50 [PATCH][committed] aarch64: Implement V2DI,V4SI division optabs for TARGET_SVE Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).