[gcc r14-8394] AArch64: Fix expansion of Advanced SIMD div and mul using SVE [PR109636]

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc r14-8394] AArch64: Fix expansion of Advanced SIMD div and mul using SVE [PR109636]
@ 2024-01-24 15:58 Tamar Christina
  0 siblings, 0 replies; only message in thread
From: Tamar Christina @ 2024-01-24 15:58 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:dfa17fd3b1a50cab51803e8a63c5c7b7db173523

commit r14-8394-gdfa17fd3b1a50cab51803e8a63c5c7b7db173523
Author: Tamar Christina <tamar.christina@arm.com>
Date:   Wed Jan 24 15:58:34 2024 +0000

    AArch64: Fix expansion of Advanced SIMD div and mul using SVE [PR109636]
    
    As suggested in the ticket this replaces the expansion by converting the
    Advanced SIMD types to SVE types by simply printing out an SVE register for
    these instructions.
    
    This fixes the subreg issues since there are no subregs involved anymore.
    
    gcc/ChangeLog:
    
            PR target/109636
            * config/aarch64/aarch64-simd.md (<su_optab>div<mode>3,
            mulv2di3): Remove.
            * config/aarch64/iterators.md (VQDIV): Remove.
            (SVE_FULL_SDI_SIMD, SVE_FULL_HSDI_SIMD_DI,
            SVE_I_SIMD_DI): New.
            (VPRED, sve_lane_con): Add V4SI and V2DI.
            * config/aarch64/aarch64-sve.md (<optab><mode>3,
            @aarch64_pred_<optab><mode>): Support Advanced SIMD types.
            (mul<mode>3): New, split from <optab><mode>3.
            (@aarch64_pred_<optab><mode>, *post_ra_<optab><mode>3): New.
            * config/aarch64/aarch64-sve2.md (@aarch64_mul_lane_<mode>,
            *aarch64_mul_unpredicated_<mode>): Change SVE_FULL_HSDI to
            SVE_FULL_HSDI_SIMD_DI.
    
    gcc/testsuite/ChangeLog:
    
            PR target/109636
            * gcc.target/aarch64/sve/pr109636_1.c: New test.
            * gcc.target/aarch64/sve/pr109636_2.c: New test.
            * gcc.target/aarch64/sve2/pr109636_1.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md                 | 41 -----------
 gcc/config/aarch64/aarch64-sve.md                  | 80 ++++++++++++++--------
 gcc/config/aarch64/aarch64-sve2.md                 | 26 +++----
 gcc/config/aarch64/iterators.md                    | 19 +++--
 gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c  | 13 ++++
 gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c  | 13 ++++
 gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c | 13 ++++
 7 files changed, 118 insertions(+), 87 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 6f48b4d5f21..556d0cf359f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -389,26 +389,6 @@
   [(set_attr "type" "neon_mul_<Vetype><q>")]
 )
 
-;; Advanced SIMD does not support vector DImode MUL, but SVE does.
-;; Make use of the overlap between Z and V registers to implement the V2DI
-;; optab for TARGET_SVE.  The mulvnx2di3 expander can
-;; handle the TARGET_SVE2 case transparently.
-(define_expand "mulv2di3"
-  [(set (match_operand:V2DI 0 "register_operand")
-        (mult:V2DI (match_operand:V2DI 1 "register_operand")
-		   (match_operand:V2DI 2 "aarch64_sve_vsm_operand")))]
-  "TARGET_SVE"
-  {
-    machine_mode sve_mode = VNx2DImode;
-    rtx sve_op0 = simplify_gen_subreg (sve_mode, operands[0], V2DImode, 0);
-    rtx sve_op1 = simplify_gen_subreg (sve_mode, operands[1], V2DImode, 0);
-    rtx sve_op2 = simplify_gen_subreg (sve_mode, operands[2], V2DImode, 0);
-
-    emit_insn (gen_mulvnx2di3 (sve_op0, sve_op1, sve_op2));
-    DONE;
-  }
-)
-
 (define_insn "bswap<mode>2"
   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
@@ -2678,27 +2658,6 @@
   [(set_attr "type" "neon_fp_div_<stype><q>")]
 )
 
-;; SVE has vector integer divisions, unlike Advanced SIMD.
-;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI
-;; optabs to the midend.
-(define_expand "<su_optab>div<mode>3"
-  [(set (match_operand:VQDIV 0 "register_operand")
-	(ANY_DIV:VQDIV
-	  (match_operand:VQDIV 1 "register_operand")
-	  (match_operand:VQDIV 2 "register_operand")))]
-  "TARGET_SVE"
-  {
-    machine_mode sve_mode
-      = aarch64_full_sve_mode (GET_MODE_INNER (<MODE>mode)).require ();
-    rtx sve_op0 = simplify_gen_subreg (sve_mode, operands[0], <MODE>mode, 0);
-    rtx sve_op1 = simplify_gen_subreg (sve_mode, operands[1], <MODE>mode, 0);
-    rtx sve_op2 = simplify_gen_subreg (sve_mode, operands[2], <MODE>mode, 0);
-
-    emit_insn (gen_<su_optab>div<vnx>3 (sve_op0, sve_op1, sve_op2));
-    DONE;
-  }
-)
-
 (define_insn "neg<mode>2<vczle><vczbe>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index e1e3c1bd0b7..eca8623e587 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3789,16 +3789,35 @@
   [(set (match_operand:SVE_I 0 "register_operand")
 	(unspec:SVE_I
 	  [(match_dup 3)
-	   (SVE_INT_BINARY_IMM:SVE_I
+	   (SVE_INT_BINARY_MULTI:SVE_I
 	     (match_operand:SVE_I 1 "register_operand")
 	     (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
 	  UNSPEC_PRED_X))]
   "TARGET_SVE"
+  {
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
+
+;; Unpredicated integer binary operations that have an immediate form.
+;; Advanced SIMD does not support vector DImode MUL, but SVE does.
+;; Make use of the overlap between Z and V registers to implement the V2DI
+;; optab for TARGET_SVE.  The mulvnx2di3 expander can
+;; handle the TARGET_SVE2 case transparently.
+(define_expand "mul<mode>3"
+  [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
+	(unspec:SVE_I_SIMD_DI
+	  [(match_dup 3)
+	   (mult:SVE_I_SIMD_DI
+	     (match_operand:SVE_I_SIMD_DI 1 "register_operand")
+	     (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_vsm_operand"))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE"
   {
     /* SVE2 supports the MUL (vectors, unpredicated) form.  Emit the simple
        pattern for it here rather than splitting off the MULT expander
        separately.  */
-    if (TARGET_SVE2 && <CODE> == MULT)
+    if (TARGET_SVE2)
       {
 	emit_move_insn (operands[0], gen_rtx_MULT (<MODE>mode,
 						   operands[1], operands[2]));
@@ -3814,26 +3833,26 @@
 ;; and would make the instruction seem less uniform to the register
 ;; allocator.
 (define_insn_and_split "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_I 0 "register_operand")
-	(unspec:SVE_I
+  [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
+	(unspec:SVE_I_SIMD_DI
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (SVE_INT_BINARY_IMM:SVE_I
-	     (match_operand:SVE_I 2 "register_operand")
-	     (match_operand:SVE_I 3 "aarch64_sve_<sve_imm_con>_operand"))]
+	   (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI
+	     (match_operand:SVE_I_SIMD_DI 2 "register_operand")
+	     (match_operand:SVE_I_SIMD_DI 3 "aarch64_sve_<sve_imm_con>_operand"))]
 	  UNSPEC_PRED_X))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 2  , 3             ; attrs: movprfx ]
      [ w        , Upl , %0 , <sve_imm_con> ; *              ] #
-     [ w        , Upl , 0  , w             ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ w        , Upl , 0  , w             ; *              ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
      [ ?&w      , Upl , w  , <sve_imm_con> ; yes            ] #
-     [ ?&w      , Upl , w  , w             ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ ?&w      , Upl , w  , w             ; yes            ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
   }
   ; Split the unpredicated form after reload, so that we don't have
   ; the unnecessary PTRUE.
   "&& reload_completed
    && !register_operand (operands[3], <MODE>mode)"
   [(set (match_dup 0)
-	(SVE_INT_BINARY_IMM:SVE_I (match_dup 2) (match_dup 3)))]
+	(SVE_INT_BINARY_IMM:SVE_I_SIMD_DI (match_dup 2) (match_dup 3)))]
   ""
 )
 
@@ -3841,14 +3860,14 @@
 ;; These are generated by splitting a predicated instruction whose
 ;; predicate is unused.
 (define_insn "*post_ra_<optab><mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
-	(SVE_INT_BINARY_IMM:SVE_I
-	  (match_operand:SVE_I 1 "register_operand" "0, w")
-	  (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
+  [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand" "=w, ?&w")
+	(SVE_INT_BINARY_IMM:SVE_I_SIMD_DI
+	  (match_operand:SVE_I_SIMD_DI 1 "register_operand" "0, w")
+	  (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_<sve_imm_con>_immediate")))]
   "TARGET_SVE && reload_completed"
   "@
-   <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2
-   movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2"
+   <sve_int_op>\t%Z0.<Vetype>, %Z0.<Vetype>, #%<sve_imm_prefix>2
+   movprfx\t%Z0, %Z1\;<sve_int_op>\t%Z0.<Vetype>, %Z0.<Vetype>, #%<sve_imm_prefix>2"
   [(set_attr "movprfx" "*,yes")]
 )
 
@@ -4458,13 +4477,16 @@
 ;; -------------------------------------------------------------------------
 
 ;; Unpredicated integer division.
+;; SVE has vector integer divisions, unlike Advanced SIMD.
+;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI
+;; optabs to the midend.
 (define_expand "<optab><mode>3"
-  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
-	(unspec:SVE_FULL_SDI
+  [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
+	(unspec:SVE_FULL_SDI_SIMD
 	  [(match_dup 3)
-	   (SVE_INT_BINARY_SD:SVE_FULL_SDI
-	     (match_operand:SVE_FULL_SDI 1 "register_operand")
-	     (match_operand:SVE_FULL_SDI 2 "register_operand"))]
+	   (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
+	     (match_operand:SVE_FULL_SDI_SIMD 1 "register_operand")
+	     (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand"))]
 	  UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
@@ -4474,18 +4496,18 @@
 
 ;; Integer division predicated with a PTRUE.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
-	(unspec:SVE_FULL_SDI
+  [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
+	(unspec:SVE_FULL_SDI_SIMD
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (SVE_INT_BINARY_SD:SVE_FULL_SDI
-	     (match_operand:SVE_FULL_SDI 2 "register_operand")
-	     (match_operand:SVE_FULL_SDI 3 "register_operand"))]
+	   (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
+	     (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand")
+	     (match_operand:SVE_FULL_SDI_SIMD 3 "register_operand"))]
 	  UNSPEC_PRED_X))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
-     [ w        , Upl , 0 , w ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-     [ w        , Upl , w , 0 ; *              ] <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
-     [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ w        , Upl , 0 , w ; *              ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
+     [ w        , Upl , w , 0 ; *              ] <sve_int_op>r\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z2.<Vetype>
+     [ ?&w      , Upl , w , w ; yes            ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
   }
 )
 
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 1d1eb8bfdff..934e57055d3 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -615,29 +615,29 @@
 ;; -------------------------------------------------------------------------
 
 (define_insn "@aarch64_mul_lane_<mode>"
-  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
-	(mult:SVE_FULL_HSDI
-	  (unspec:SVE_FULL_HSDI
-	    [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>")
+  [(set (match_operand:SVE_FULL_HSDI_SIMD_DI 0 "register_operand" "=w")
+	(mult:SVE_FULL_HSDI_SIMD_DI
+	  (unspec:SVE_FULL_HSDI_SIMD_DI
+	    [(match_operand:SVE_FULL_HSDI_SIMD_DI 2 "register_operand" "<sve_lane_con>")
 	     (match_operand:SI 3 "const_int_operand")]
 	    UNSPEC_SVE_LANE_SELECT)
-	  (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")))]
+	  (match_operand:SVE_FULL_HSDI_SIMD_DI 1 "register_operand" "w")))]
   "TARGET_SVE2"
-  "mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
+  "mul\t%Z0.<Vetype>, %Z1.<Vetype>, %Z2.<Vetype>[%3]"
 )
 
 ;; The 2nd and 3rd alternatives are valid for just TARGET_SVE as well but
 ;; we include them here to allow matching simpler, unpredicated RTL.
 (define_insn "*aarch64_mul_unpredicated_<mode>"
-  [(set (match_operand:SVE_I 0 "register_operand")
-	(mult:SVE_I
-	  (match_operand:SVE_I 1 "register_operand")
-	  (match_operand:SVE_I 2 "aarch64_sve_vsm_operand")))]
+  [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
+	(mult:SVE_I_SIMD_DI
+	  (match_operand:SVE_I_SIMD_DI 1 "register_operand")
+	  (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_vsm_operand")))]
   "TARGET_SVE2"
   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
-     [ w        , w , w   ; *              ] mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
-     [ w        , 0 , vsm ; *              ] mul\t%0.<Vetype>, %0.<Vetype>, #%2
-     [ ?&w      , w , vsm ; yes            ] movprfx\t%0, %1\;mul\t%0.<Vetype>, %0.<Vetype>, #%2
+     [ w        , w , w   ; *              ] mul\t%Z0.<Vetype>, %Z1.<Vetype>, %Z2.<Vetype>
+     [ w        , 0 , vsm ; *              ] mul\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
+     [ ?&w      , w , vsm ; yes            ] movprfx\t%Z0, %Z1\;mul\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
   }
 )
 
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 942270e99d6..99cde46f1ba 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -108,9 +108,6 @@
 ;; Copy of the above.
 (define_mode_iterator DREG2 [DREG])
 
-;; Advanced SIMD modes for integer divides.
-(define_mode_iterator VQDIV [V4SI V2DI])
-
 ;; All modes suitable to store/load pair (2 elements) using STP/LDP.
 (define_mode_iterator VP_2E [V2SI V2SF V2DI V2DF])
 
@@ -471,6 +468,10 @@
 ;; elements.
 (define_mode_iterator SVE_FULL_HSDI [VNx8HI VNx4SI VNx2DI])
 
+;; Fully-packed SVE integer vector modes that have 16-bit, 32-bit or 64-bit
+;; elements and Advanced SIMD Fully-packed 64-bit elements.
+(define_mode_iterator SVE_FULL_HSDI_SIMD_DI [SVE_FULL_HSDI V2DI])
+
 ;; Fully-packed SVE integer vector modes that have 16-bit or 32-bit
 ;; elements.
 (define_mode_iterator SVE_FULL_HSI [VNx8HI VNx4SI])
@@ -488,6 +489,10 @@
 ;; Fully-packed SVE integer vector modes that have 32-bit or 64-bit elements.
 (define_mode_iterator SVE_FULL_SDI [VNx4SI VNx2DI])
 
+;; Fully-packed SVE and Advanced SIMD integer vector modes that have 32-bit or
+;; 64-bit elements.
+(define_mode_iterator SVE_FULL_SDI_SIMD [SVE_FULL_SDI V4SI V2DI])
+
 ;; 2x and 4x tuples of the above, excluding 2x DI.
 (define_mode_iterator SVE_FULL_SIx2_SDIx4 [VNx8SI VNx16SI VNx8DI])
 
@@ -550,6 +555,10 @@
 			     VNx4SI VNx2SI
 			     VNx2DI])
 
+;; All SVE integer vector modes and Advanced SIMD 64-bit vector
+;; element modes
+(define_mode_iterator SVE_I_SIMD_DI [SVE_I V2DI])
+
 ;; SVE integer vector modes whose elements are 16 bits or wider.
 (define_mode_iterator SVE_HSDI [VNx8HI VNx4HI VNx2HI
 				VNx4SI VNx2SI
@@ -2268,7 +2277,8 @@
 			 (VNx32HI "VNx8BI") (VNx32HF "VNx8BI")
 			 (VNx32BF "VNx8BI")
 			 (VNx16SI "VNx4BI") (VNx16SF "VNx4BI")
-			 (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")])
+			 (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")
+			 (V4SI "VNx4BI") (V2DI "VNx2BI")])
 
 ;; ...and again in lower case.
 (define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8QI "vnx8bi")
@@ -2370,6 +2380,7 @@
 
 ;; The constraint to use for an SVE [SU]DOT, FMUL, FMLA or FMLS lane index.
 (define_mode_attr sve_lane_con [(VNx8HI "y") (VNx4SI "y") (VNx2DI "x")
+							  (V2DI "x")
 				(VNx8HF "y") (VNx4SF "y") (VNx2DF "x")])
 
 ;; The constraint to use for an SVE FCMLA lane index.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c
new file mode 100644
index 00000000000..5b37ddd2770
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c
@@ -0,0 +1,13 @@
+/* { dg-additional-options "-O -mtune=a64fx" } */
+
+typedef unsigned long long __attribute__((__vector_size__ (16))) V;
+typedef unsigned long long __attribute__((__vector_size__ (32))) W;
+
+extern void bar (V v);
+
+void foo (V v, W w)
+{
+  bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) / v));
+}
+
+/* { dg-final { scan-assembler {udiv\tz[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c
new file mode 100644
index 00000000000..6d39dc8e590
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c
@@ -0,0 +1,13 @@
+/* { dg-additional-options "-O -mcpu=a64fx" } */
+
+typedef unsigned long long __attribute__((__vector_size__ (16))) V;
+typedef unsigned long long __attribute__((__vector_size__ (32))) W;
+
+extern void bar (V v);
+
+void foom (V v, W w)
+{
+  bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) * v));
+}
+
+/* { dg-final { scan-assembler {mul\tz[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c
new file mode 100644
index 00000000000..2bea18ad703
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c
@@ -0,0 +1,13 @@
+/* { dg-additional-options "-O -mtune=a64fx" } */
+
+typedef unsigned long long __attribute__((__vector_size__ (16))) V;
+typedef unsigned long long __attribute__((__vector_size__ (32))) W;
+
+extern void bar (V v);
+
+void foom (V v, W w)
+{
+  bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) * v));
+}
+
+/* { dg-final { scan-assembler {mul\tz[0-9]+.d, z[0-9]+.d, z[0-9]+.d} } } */

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2024-01-24 15:58 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-24 15:58 [gcc r14-8394] AArch64: Fix expansion of Advanced SIMD div and mul using SVE [PR109636] Tamar Christina

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).