public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH]AArch64 Undo vec_widen_<sur>shiftl optabs [PR106346]
@ 2023-08-02 10:25 Tamar Christina
  2023-08-02 14:34 ` Richard Sandiford
  0 siblings, 1 reply; 4+ messages in thread
From: Tamar Christina @ 2023-08-02 10:25 UTC (permalink / raw)
  To: gcc-patches
  Cc: nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov,
	richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 11323 bytes --]

Hi All,

In GCC 11 we implemented the vectorizer optab for widening left shifts,
however this optab is only supported for uniform shift constants.

At the moment GCC still has two loop vectorization strategy (classical loop and
SLP based loop vec) and the optab is implemented as a scalar pattern.

This means that when we apply it to a non-uniform constant inside a loop we only
find out during SLP build that the constants aren't uniform.  At this point it's
too late and we lose SLP entirely.

Over the years I've tried various options but none of it works well:

1. Dissolving patterns during SLP built (problematic, also dissolves them for
non-slp).
2. Optionally ignoring patterns for SLP build (problematic, ends up interfearing
with relevancy detection).
3. Relaxing contraint on SLP build to allow non-constant values and dissolving
them after SLP build using an SLP pattern.  (problematic, ends up breaking
shift reassociation).

As a result we've concluded that for now this pattern should just be removed
and formed during RTL.

The plan is to move this to an SLP only pattern once we remove classical loop
vectorization support from GCC, at which time we can also properly support SVE's
Top and Bottom variants.

This removes the optab and reworks the RTL to recognize both the vector variant
and the intrinsics variant.  Also just simplifies all these patterns.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR target/106346
	* config/aarch64/aarch64-simd.md (vec_widen_<sur>shiftl_lo_<mode>,
	vec_widen_<sur>shiftl_hi_<mode>): Remove.
	(aarch64_<sur>shll<mode>_internal): Renamed to...
	(aarch64_<su>shll<mode>): .. This.
	(aarch64_<sur>shll2<mode>_internal): Renamed to...
	(aarch64_<su>shll2<mode>): .. This.
	(aarch64_<sur>shll_n<mode>, aarch64_<sur>shll2_n<mode>): Re-use new
	optabs.
	* config/aarch64/constraints.md (D2, D3): New.
	* config/aarch64/predicates.md (aarch64_simd_shift_imm_vec): New.

gcc/testsuite/ChangeLog:

	PR target/106346
	* gcc.target/aarch64/pr98772.c: Adjust assembly.
	* gcc.target/aarch64/vect-widen-shift.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index d95394101470446e55f25a2397dd112239b6a54d..afd5b8632afbcddf8dad14495c3446c560eb085d 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -6387,105 +6387,66 @@ (define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
   [(set_attr "type" "neon_sat_shift_reg<q>")]
 )
 
-(define_expand "vec_widen_<sur>shiftl_lo_<mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
-			 (match_operand:SI 2
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
-			 VSHLL))]
-  "TARGET_SIMD"
-  {
-    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
-    emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
-						     p, operands[2]));
-    DONE;
-  }
-)
-
-(define_expand "vec_widen_<sur>shiftl_hi_<mode>"
-   [(set (match_operand:<VWIDE> 0 "register_operand")
-	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
-			 (match_operand:SI 2
-			   "immediate_operand" "i")]
-			  VSHLL))]
-   "TARGET_SIMD"
-   {
-    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
-    emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
-						      p, operands[2]));
-    DONE;
-   }
-)
-
 ;; vshll_n
 
-(define_insn "aarch64_<sur>shll<mode>_internal"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(vec_select:<VHALF>
-			    (match_operand:VQW 1 "register_operand" "w")
-			    (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
-			 (match_operand:SI 3
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
-			 VSHLL))]
+(define_insn "aarch64_<su>shll<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
+			    (match_operand:VD_BHSI 1 "register_operand"))
+			 (match_operand:<VWIDE> 2
+			   "aarch64_simd_shift_imm_vec")))]
   "TARGET_SIMD"
-  {
-    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
-    else
-      return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
+  {@ [cons: =0, 1, 2]
+     [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2
+     [w, w, D3] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2
   }
   [(set_attr "type" "neon_shift_imm_long")]
 )
 
-(define_insn "aarch64_<sur>shll2<mode>_internal"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(vec_select:<VHALF>
-			    (match_operand:VQW 1 "register_operand" "w")
-			    (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
-			 (match_operand:SI 3
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
+(define_expand "aarch64_<sur>shll_n<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand")
+			 (match_operand:SI 2
+			   "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
 			 VSHLL))]
   "TARGET_SIMD"
   {
-    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
-    else
-      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
+    rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
+    emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft));
+    DONE;
   }
-  [(set_attr "type" "neon_shift_imm_long")]
 )
 
-(define_insn "aarch64_<sur>shll_n<mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
-			 (match_operand:SI 2
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
-                         VSHLL))]
+;; vshll_high_n
+
+(define_insn "aarch64_<su>shll2<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
+			  (vec_select:<VHALF>
+			    (match_operand:VQW 1 "register_operand")
+			    (match_operand:VQW 2 "vect_par_cnst_hi_half")))
+			 (match_operand:<VWIDE> 3
+			   "aarch64_simd_shift_imm_vec")))]
   "TARGET_SIMD"
-  {
-    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
-    else
-      return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
+  {@ [cons: =0, 1, 2, 3]
+     [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
+     [w, w, , D3] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
   }
   [(set_attr "type" "neon_shift_imm_long")]
 )
 
-;; vshll_high_n
-
-(define_insn "aarch64_<sur>shll2_n<mode>"
+(define_expand "aarch64_<sur>shll2_n<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
 			 (match_operand:SI 2 "immediate_operand" "i")]
-                         VSHLL))]
+			 VSHLL))]
   "TARGET_SIMD"
   {
-    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
-    else
-      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
+    rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
+    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+    emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, shft));
+    DONE;
   }
-  [(set_attr "type" "neon_shift_imm_long")]
 )
 
 ;; vrshr_n
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 6df1dbec2a8097abe9783ed1670c77a8fad4ca57..07613013622deb8797255cbf10c265080066565b 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -468,6 +468,22 @@ (define_constraint "D1"
 			GET_MODE_UNIT_BITSIZE (mode) - 1,
 			GET_MODE_UNIT_BITSIZE (mode) - 1)")))
 
+(define_constraint "D2"
+  "@internal
+ A constraint that matches vector of immediates that is bits(mode)/2."
+ (and (match_code "const,const_vector")
+      (match_test "aarch64_const_vec_all_same_in_range_p (op,
+			GET_MODE_UNIT_BITSIZE (mode) / 2,
+			GET_MODE_UNIT_BITSIZE (mode) / 2)")))
+
+(define_constraint "D3"
+  "@internal
+ A constraint that matches vector of immediates that is with 0 to
+ (bits(mode)/2)-1."
+ (and (match_code "const,const_vector")
+      (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
+			(GET_MODE_UNIT_BITSIZE (mode) / 2) - 1)")))
+
 (define_constraint "Dr"
   "@internal
  A constraint that matches vector of immediates for right shifts."
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index d5a4a1cd9bf8cde8e779de6e0afa531f04892a7b..2b50e39aa8651b415443b5fd90f187c83d5591d5 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -638,6 +638,11 @@ (define_predicate "aarch64_simd_raddsubhn_imm_vec"
 				HOST_WIDE_INT_1U
 				<< (GET_MODE_UNIT_BITSIZE  (mode) / 2 - 1))")))
 
+(define_predicate "aarch64_simd_shift_imm_vec"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
+				GET_MODE_UNIT_BITSIZE (mode) / 2)")))
+
 (define_predicate "aarch64_simd_shift_imm_bitsize_qi"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 8)")))
diff --git a/gcc/testsuite/gcc.target/aarch64/pr98772.c b/gcc/testsuite/gcc.target/aarch64/pr98772.c
index 8259251a7c0b64ae8362ea29ec3cf1d2a9d63547..52ad012dcfe72721b8c987bb826c0ffb8ba3f31e 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr98772.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr98772.c
@@ -155,4 +155,4 @@ int main ()
 /* { dg-final { scan-assembler-times "uaddl\\tv" 2 } } */
 /* { dg-final { scan-assembler-times "usubl\\tv" 2 } } */
 /* { dg-final { scan-assembler-times "umull\\tv" 2 } } */
-/* { dg-final { scan-assembler-times "shl\\tv" 2 } } */
+/* { dg-final { scan-assembler-times "shll\\tv" 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
new file mode 100644
index 0000000000000000000000000000000000000000..6ee41f63ef8a145c0eb7f213950e7501e058b2fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+#include <stdint.h>
+#include <string.h>
+
+#pragma GCC target "+nosve"
+
+#define ARR_SIZE 1024
+
+/* Should produce an shll,shll2 pair*/
+/*
+** sshll_opt1:
+** 	...
+** 	shll	v[0-9]+.4s, v[0-9]+.4h, 16
+** 	shll2	v[0-9]+.4s, v[0-9]+.8h, 16
+** 	...
+*/
+void sshll_opt1 (int32_t *foo, int16_t *a, int16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   << 16;
+        foo[i+1] = a[i+1] << 16;
+        foo[i+2] = a[i+2] << 16;
+        foo[i+3] = a[i+3] << 16;
+    }
+}
+
+/*
+** sshll_opt2:
+** 	...
+** 	sxtl	v[0-9]+.4s, v[0-9]+.4h
+** 	sxtl2	v[0-9]+.4s, v[0-9]+.8h
+** 	sshl	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** 	sshl	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** 	...
+*/
+void sshll_opt2 (int32_t *foo, int16_t *a, int16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   << 16;
+        foo[i+1] = a[i+1] << 15;
+        foo[i+2] = a[i+2] << 14;
+        foo[i+3] = a[i+3] << 17;
+    }
+}
+
+




-- 

[-- Attachment #2: rb17620.patch --]
[-- Type: text/plain, Size: 9154 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index d95394101470446e55f25a2397dd112239b6a54d..afd5b8632afbcddf8dad14495c3446c560eb085d 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -6387,105 +6387,66 @@ (define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
   [(set_attr "type" "neon_sat_shift_reg<q>")]
 )
 
-(define_expand "vec_widen_<sur>shiftl_lo_<mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
-			 (match_operand:SI 2
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
-			 VSHLL))]
-  "TARGET_SIMD"
-  {
-    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
-    emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
-						     p, operands[2]));
-    DONE;
-  }
-)
-
-(define_expand "vec_widen_<sur>shiftl_hi_<mode>"
-   [(set (match_operand:<VWIDE> 0 "register_operand")
-	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
-			 (match_operand:SI 2
-			   "immediate_operand" "i")]
-			  VSHLL))]
-   "TARGET_SIMD"
-   {
-    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
-    emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
-						      p, operands[2]));
-    DONE;
-   }
-)
-
 ;; vshll_n
 
-(define_insn "aarch64_<sur>shll<mode>_internal"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(vec_select:<VHALF>
-			    (match_operand:VQW 1 "register_operand" "w")
-			    (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
-			 (match_operand:SI 3
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
-			 VSHLL))]
+(define_insn "aarch64_<su>shll<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
+			    (match_operand:VD_BHSI 1 "register_operand"))
+			 (match_operand:<VWIDE> 2
+			   "aarch64_simd_shift_imm_vec")))]
   "TARGET_SIMD"
-  {
-    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
-    else
-      return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
+  {@ [cons: =0, 1, 2]
+     [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2
+     [w, w, D3] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2
   }
   [(set_attr "type" "neon_shift_imm_long")]
 )
 
-(define_insn "aarch64_<sur>shll2<mode>_internal"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(vec_select:<VHALF>
-			    (match_operand:VQW 1 "register_operand" "w")
-			    (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
-			 (match_operand:SI 3
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
+(define_expand "aarch64_<sur>shll_n<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand")
+			 (match_operand:SI 2
+			   "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
 			 VSHLL))]
   "TARGET_SIMD"
   {
-    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
-    else
-      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
+    rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
+    emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft));
+    DONE;
   }
-  [(set_attr "type" "neon_shift_imm_long")]
 )
 
-(define_insn "aarch64_<sur>shll_n<mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
-			 (match_operand:SI 2
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
-                         VSHLL))]
+;; vshll_high_n
+
+(define_insn "aarch64_<su>shll2<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
+			  (vec_select:<VHALF>
+			    (match_operand:VQW 1 "register_operand")
+			    (match_operand:VQW 2 "vect_par_cnst_hi_half")))
+			 (match_operand:<VWIDE> 3
+			   "aarch64_simd_shift_imm_vec")))]
   "TARGET_SIMD"
-  {
-    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
-    else
-      return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
+  {@ [cons: =0, 1, 2, 3]
+     [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
+     [w, w, , D3] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
   }
   [(set_attr "type" "neon_shift_imm_long")]
 )
 
-;; vshll_high_n
-
-(define_insn "aarch64_<sur>shll2_n<mode>"
+(define_expand "aarch64_<sur>shll2_n<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
 			 (match_operand:SI 2 "immediate_operand" "i")]
-                         VSHLL))]
+			 VSHLL))]
   "TARGET_SIMD"
   {
-    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
-    else
-      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
+    rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
+    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+    emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, shft));
+    DONE;
   }
-  [(set_attr "type" "neon_shift_imm_long")]
 )
 
 ;; vrshr_n
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 6df1dbec2a8097abe9783ed1670c77a8fad4ca57..07613013622deb8797255cbf10c265080066565b 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -468,6 +468,22 @@ (define_constraint "D1"
 			GET_MODE_UNIT_BITSIZE (mode) - 1,
 			GET_MODE_UNIT_BITSIZE (mode) - 1)")))
 
+(define_constraint "D2"
+  "@internal
+ A constraint that matches vector of immediates that is bits(mode)/2."
+ (and (match_code "const,const_vector")
+      (match_test "aarch64_const_vec_all_same_in_range_p (op,
+			GET_MODE_UNIT_BITSIZE (mode) / 2,
+			GET_MODE_UNIT_BITSIZE (mode) / 2)")))
+
+(define_constraint "D3"
+  "@internal
+ A constraint that matches vector of immediates that is with 0 to
+ (bits(mode)/2)-1."
+ (and (match_code "const,const_vector")
+      (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
+			(GET_MODE_UNIT_BITSIZE (mode) / 2) - 1)")))
+
 (define_constraint "Dr"
   "@internal
  A constraint that matches vector of immediates for right shifts."
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index d5a4a1cd9bf8cde8e779de6e0afa531f04892a7b..2b50e39aa8651b415443b5fd90f187c83d5591d5 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -638,6 +638,11 @@ (define_predicate "aarch64_simd_raddsubhn_imm_vec"
 				HOST_WIDE_INT_1U
 				<< (GET_MODE_UNIT_BITSIZE  (mode) / 2 - 1))")))
 
+(define_predicate "aarch64_simd_shift_imm_vec"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
+				GET_MODE_UNIT_BITSIZE (mode) / 2)")))
+
 (define_predicate "aarch64_simd_shift_imm_bitsize_qi"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 8)")))
diff --git a/gcc/testsuite/gcc.target/aarch64/pr98772.c b/gcc/testsuite/gcc.target/aarch64/pr98772.c
index 8259251a7c0b64ae8362ea29ec3cf1d2a9d63547..52ad012dcfe72721b8c987bb826c0ffb8ba3f31e 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr98772.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr98772.c
@@ -155,4 +155,4 @@ int main ()
 /* { dg-final { scan-assembler-times "uaddl\\tv" 2 } } */
 /* { dg-final { scan-assembler-times "usubl\\tv" 2 } } */
 /* { dg-final { scan-assembler-times "umull\\tv" 2 } } */
-/* { dg-final { scan-assembler-times "shl\\tv" 2 } } */
+/* { dg-final { scan-assembler-times "shll\\tv" 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
new file mode 100644
index 0000000000000000000000000000000000000000..6ee41f63ef8a145c0eb7f213950e7501e058b2fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+#include <stdint.h>
+#include <string.h>
+
+#pragma GCC target "+nosve"
+
+#define ARR_SIZE 1024
+
+/* Should produce an shll,shll2 pair*/
+/*
+** sshll_opt1:
+** 	...
+** 	shll	v[0-9]+.4s, v[0-9]+.4h, 16
+** 	shll2	v[0-9]+.4s, v[0-9]+.8h, 16
+** 	...
+*/
+void sshll_opt1 (int32_t *foo, int16_t *a, int16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   << 16;
+        foo[i+1] = a[i+1] << 16;
+        foo[i+2] = a[i+2] << 16;
+        foo[i+3] = a[i+3] << 16;
+    }
+}
+
+/*
+** sshll_opt2:
+** 	...
+** 	sxtl	v[0-9]+.4s, v[0-9]+.4h
+** 	sxtl2	v[0-9]+.4s, v[0-9]+.8h
+** 	sshl	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** 	sshl	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** 	...
+*/
+void sshll_opt2 (int32_t *foo, int16_t *a, int16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   << 16;
+        foo[i+1] = a[i+1] << 15;
+        foo[i+2] = a[i+2] << 14;
+        foo[i+3] = a[i+3] << 17;
+    }
+}
+
+




^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH]AArch64 Undo vec_widen_<sur>shiftl optabs [PR106346]
  2023-08-02 10:25 [PATCH]AArch64 Undo vec_widen_<sur>shiftl optabs [PR106346] Tamar Christina
@ 2023-08-02 14:34 ` Richard Sandiford
  2023-08-03 12:32   ` Tamar Christina
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Sandiford @ 2023-08-02 14:34 UTC (permalink / raw)
  To: Tamar Christina
  Cc: gcc-patches, nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov

Tamar Christina <tamar.christina@arm.com> writes:
> Hi All,
>
> In GCC 11 we implemented the vectorizer optab for widening left shifts,
> however this optab is only supported for uniform shift constants.
>
> At the moment GCC still has two loop vectorization strategy (classical loop and
> SLP based loop vec) and the optab is implemented as a scalar pattern.
>
> This means that when we apply it to a non-uniform constant inside a loop we only
> find out during SLP build that the constants aren't uniform.  At this point it's
> too late and we lose SLP entirely.
>
> Over the years I've tried various options but none of it works well:
>
> 1. Dissolving patterns during SLP built (problematic, also dissolves them for
> non-slp).
> 2. Optionally ignoring patterns for SLP build (problematic, ends up interfearing
> with relevancy detection).
> 3. Relaxing contraint on SLP build to allow non-constant values and dissolving
> them after SLP build using an SLP pattern.  (problematic, ends up breaking
> shift reassociation).
>
> As a result we've concluded that for now this pattern should just be removed
> and formed during RTL.
>
> The plan is to move this to an SLP only pattern once we remove classical loop
> vectorization support from GCC, at which time we can also properly support SVE's
> Top and Bottom variants.
>
> This removes the optab and reworks the RTL to recognize both the vector variant
> and the intrinsics variant.  Also just simplifies all these patterns.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	PR target/106346
> 	* config/aarch64/aarch64-simd.md (vec_widen_<sur>shiftl_lo_<mode>,
> 	vec_widen_<sur>shiftl_hi_<mode>): Remove.
> 	(aarch64_<sur>shll<mode>_internal): Renamed to...
> 	(aarch64_<su>shll<mode>): .. This.
> 	(aarch64_<sur>shll2<mode>_internal): Renamed to...
> 	(aarch64_<su>shll2<mode>): .. This.
> 	(aarch64_<sur>shll_n<mode>, aarch64_<sur>shll2_n<mode>): Re-use new
> 	optabs.
> 	* config/aarch64/constraints.md (D2, D3): New.
> 	* config/aarch64/predicates.md (aarch64_simd_shift_imm_vec): New.
>
> gcc/testsuite/ChangeLog:
>
> 	PR target/106346
> 	* gcc.target/aarch64/pr98772.c: Adjust assembly.
> 	* gcc.target/aarch64/vect-widen-shift.c: New test.
>
> --- inline copy of patch -- 
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index d95394101470446e55f25a2397dd112239b6a54d..afd5b8632afbcddf8dad14495c3446c560eb085d 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -6387,105 +6387,66 @@ (define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
>    [(set_attr "type" "neon_sat_shift_reg<q>")]
>  )
>  
> -(define_expand "vec_widen_<sur>shiftl_lo_<mode>"
> -  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> -	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
> -			 (match_operand:SI 2
> -			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
> -			 VSHLL))]
> -  "TARGET_SIMD"
> -  {
> -    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
> -    emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
> -						     p, operands[2]));
> -    DONE;
> -  }
> -)
> -
> -(define_expand "vec_widen_<sur>shiftl_hi_<mode>"
> -   [(set (match_operand:<VWIDE> 0 "register_operand")
> -	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
> -			 (match_operand:SI 2
> -			   "immediate_operand" "i")]
> -			  VSHLL))]
> -   "TARGET_SIMD"
> -   {
> -    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
> -    emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
> -						      p, operands[2]));
> -    DONE;
> -   }
> -)
> -
>  ;; vshll_n
>  
> -(define_insn "aarch64_<sur>shll<mode>_internal"
> -  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> -	(unspec:<VWIDE> [(vec_select:<VHALF>
> -			    (match_operand:VQW 1 "register_operand" "w")
> -			    (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
> -			 (match_operand:SI 3
> -			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
> -			 VSHLL))]
> +(define_insn "aarch64_<su>shll<mode>"
> +  [(set (match_operand:<VWIDE> 0 "register_operand")
> +	(ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
> +			    (match_operand:VD_BHSI 1 "register_operand"))
> +			 (match_operand:<VWIDE> 2
> +			   "aarch64_simd_shift_imm_vec")))]

The name of this predicate seems more general than its meaning.
How about naming it aarch64_simd_shift_imm_vec_half_bitsize, to follow:

;; Predicates used by the various SIMD shift operations.  These
;; fall in to 3 categories.
;;   Shifts with a range 0-(bit_size - 1) (aarch64_simd_shift_imm)
;;   Shifts with a range 1-bit_size (aarch64_simd_shift_imm_offset)
;;   Shifts with a range 0-bit_size (aarch64_simd_shift_imm_bitsize)

Or aarch64_simd_shll_imm_vec, for something shorter.

>    "TARGET_SIMD"
> -  {
> -    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
> -      return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
> -    else
> -      return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
> +  {@ [cons: =0, 1, 2]
> +     [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2
> +     [w, w, D3] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2
>    }
>    [(set_attr "type" "neon_shift_imm_long")]
>  )
>  
> -(define_insn "aarch64_<sur>shll2<mode>_internal"
> -  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> -	(unspec:<VWIDE> [(vec_select:<VHALF>
> -			    (match_operand:VQW 1 "register_operand" "w")
> -			    (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
> -			 (match_operand:SI 3
> -			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
> +(define_expand "aarch64_<sur>shll_n<mode>"
> +  [(set (match_operand:<VWIDE> 0 "register_operand")
> +	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand")
> +			 (match_operand:SI 2
> +			   "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
>  			 VSHLL))]
>    "TARGET_SIMD"
>    {
> -    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
> -      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
> -    else
> -      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
> +    rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
> +    emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft));
> +    DONE;
>    }
> -  [(set_attr "type" "neon_shift_imm_long")]
>  )
>  
> -(define_insn "aarch64_<sur>shll_n<mode>"
> -  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> -	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
> -			 (match_operand:SI 2
> -			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
> -                         VSHLL))]
> +;; vshll_high_n
> +
> +(define_insn "aarch64_<su>shll2<mode>"
> +  [(set (match_operand:<VWIDE> 0 "register_operand")
> +	(ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
> +			  (vec_select:<VHALF>
> +			    (match_operand:VQW 1 "register_operand")
> +			    (match_operand:VQW 2 "vect_par_cnst_hi_half")))
> +			 (match_operand:<VWIDE> 3
> +			   "aarch64_simd_shift_imm_vec")))]
>    "TARGET_SIMD"
> -  {
> -    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
> -      return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
> -    else
> -      return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
> +  {@ [cons: =0, 1, 2, 3]
> +     [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
> +     [w, w, , D3] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
>    }
>    [(set_attr "type" "neon_shift_imm_long")]
>  )
>  
> -;; vshll_high_n
> -
> -(define_insn "aarch64_<sur>shll2_n<mode>"
> +(define_expand "aarch64_<sur>shll2_n<mode>"
>    [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
>  	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
>  			 (match_operand:SI 2 "immediate_operand" "i")]

Pre-existing, but I think the predicate on operand 2 should be
aarch64_simd_shift_imm_bitsize_<ve_mode>, as above.  As it stands
we accept:

int64x2_t foo(int32x4_t x) { return vshll_high_n_s32(x, 33); }

and generate the invalid:

	sshll2  v0.2d, v0.4s, 33

I think after the patch we'd ICE instead.

The pattern shouldn't have any constraints, now that it's an expander.

> -                         VSHLL))]
> +			 VSHLL))]
>    "TARGET_SIMD"
>    {
> -    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
> -      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
> -    else
> -      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
> +    rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
> +    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
> +    emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, shft));
> +    DONE;
>    }
> -  [(set_attr "type" "neon_shift_imm_long")]
>  )
>  
>  ;; vrshr_n
> diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
> index 6df1dbec2a8097abe9783ed1670c77a8fad4ca57..07613013622deb8797255cbf10c265080066565b 100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -468,6 +468,22 @@ (define_constraint "D1"
>  			GET_MODE_UNIT_BITSIZE (mode) - 1,
>  			GET_MODE_UNIT_BITSIZE (mode) - 1)")))
>  
> +(define_constraint "D2"
> +  "@internal
> + A constraint that matches vector of immediates that is bits(mode)/2."
> + (and (match_code "const,const_vector")
> +      (match_test "aarch64_const_vec_all_same_in_range_p (op,
> +			GET_MODE_UNIT_BITSIZE (mode) / 2,
> +			GET_MODE_UNIT_BITSIZE (mode) / 2)")))

Looks like this can reuse aarch64_simd_shift_imm_vec_exact_top.

> +
> +(define_constraint "D3"
> +  "@internal
> + A constraint that matches vector of immediates that is with 0 to
> + (bits(mode)/2)-1."
> + (and (match_code "const,const_vector")
> +      (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
> +			(GET_MODE_UNIT_BITSIZE (mode) / 2) - 1)")))

Having this mapping for D2 and D3, with D2 corresponded to prec/2,
kind-of makes D3 a false mnemonic.  How about DL instead?  (L for
"left-shift long" or "low-part", take your pick)

Looks good otherwise.

Thanks,
Richard

> +
>  (define_constraint "Dr"
>    "@internal
>   A constraint that matches vector of immediates for right shifts."
> diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
> index d5a4a1cd9bf8cde8e779de6e0afa531f04892a7b..2b50e39aa8651b415443b5fd90f187c83d5591d5 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -638,6 +638,11 @@ (define_predicate "aarch64_simd_raddsubhn_imm_vec"
>  				HOST_WIDE_INT_1U
>  				<< (GET_MODE_UNIT_BITSIZE  (mode) / 2 - 1))")))
>  
> +(define_predicate "aarch64_simd_shift_imm_vec"
> +  (and (match_code "const_vector")
> +       (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
> +				GET_MODE_UNIT_BITSIZE (mode) / 2)")))
> +
>  (define_predicate "aarch64_simd_shift_imm_bitsize_qi"
>    (and (match_code "const_int")
>         (match_test "IN_RANGE (INTVAL (op), 0, 8)")))
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr98772.c b/gcc/testsuite/gcc.target/aarch64/pr98772.c
> index 8259251a7c0b64ae8362ea29ec3cf1d2a9d63547..52ad012dcfe72721b8c987bb826c0ffb8ba3f31e 100644
> --- a/gcc/testsuite/gcc.target/aarch64/pr98772.c
> +++ b/gcc/testsuite/gcc.target/aarch64/pr98772.c
> @@ -155,4 +155,4 @@ int main ()
>  /* { dg-final { scan-assembler-times "uaddl\\tv" 2 } } */
>  /* { dg-final { scan-assembler-times "usubl\\tv" 2 } } */
>  /* { dg-final { scan-assembler-times "umull\\tv" 2 } } */
> -/* { dg-final { scan-assembler-times "shl\\tv" 2 } } */
> +/* { dg-final { scan-assembler-times "shll\\tv" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..6ee41f63ef8a145c0eb7f213950e7501e058b2fa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
> @@ -0,0 +1,50 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -save-temps" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +#include <stdint.h>
> +#include <string.h>
> +
> +#pragma GCC target "+nosve"
> +
> +#define ARR_SIZE 1024
> +
> +/* Should produce an shll,shll2 pair*/
> +/*
> +** sshll_opt1:
> +** 	...
> +** 	shll	v[0-9]+.4s, v[0-9]+.4h, 16
> +** 	shll2	v[0-9]+.4s, v[0-9]+.8h, 16
> +** 	...
> +*/
> +void sshll_opt1 (int32_t *foo, int16_t *a, int16_t *b)
> +{
> +    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
> +    {
> +        foo[i]   = a[i]   << 16;
> +        foo[i+1] = a[i+1] << 16;
> +        foo[i+2] = a[i+2] << 16;
> +        foo[i+3] = a[i+3] << 16;
> +    }
> +}
> +
> +/*
> +** sshll_opt2:
> +** 	...
> +** 	sxtl	v[0-9]+.4s, v[0-9]+.4h
> +** 	sxtl2	v[0-9]+.4s, v[0-9]+.8h
> +** 	sshl	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
> +** 	sshl	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
> +** 	...
> +*/
> +void sshll_opt2 (int32_t *foo, int16_t *a, int16_t *b)
> +{
> +    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
> +    {
> +        foo[i]   = a[i]   << 16;
> +        foo[i+1] = a[i+1] << 15;
> +        foo[i+2] = a[i+2] << 14;
> +        foo[i+3] = a[i+3] << 17;
> +    }
> +}
> +
> +

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH]AArch64 Undo vec_widen_<sur>shiftl optabs [PR106346]
  2023-08-02 14:34 ` Richard Sandiford
@ 2023-08-03 12:32   ` Tamar Christina
  2023-08-03 12:51     ` Richard Sandiford
  0 siblings, 1 reply; 4+ messages in thread
From: Tamar Christina @ 2023-08-03 12:32 UTC (permalink / raw)
  To: Richard Sandiford
  Cc: gcc-patches, nd, Richard Earnshaw, Marcus Shawcroft, Kyrylo Tkachov

[-- Attachment #1: Type: text/plain, Size: 11014 bytes --]

> > +
> > +(define_constraint "D3"
> > +  "@internal
> > + A constraint that matches vector of immediates that is with 0 to
> > +(bits(mode)/2)-1."
> > + (and (match_code "const,const_vector")
> > +      (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
> > +			(GET_MODE_UNIT_BITSIZE (mode) / 2) - 1)")))
> 
> Having this mapping for D2 and D3, with D2 corresponded to prec/2, kind-of
> makes D3 a false mnemonic.  How about DL instead?  (L for "left-shift long" or
> "low-part", take your pick)
> 
> Looks good otherwise.
> 

Wasn't sure if this was an ok with changes or not, so here's the final patch 😊

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR target/106346
	* config/aarch64/aarch64-simd.md (vec_widen_<sur>shiftl_lo_<mode>,
	vec_widen_<sur>shiftl_hi_<mode>): Remove.
	(aarch64_<sur>shll<mode>_internal): Renamed to...
	(aarch64_<su>shll<mode>): .. This.
	(aarch64_<sur>shll2<mode>_internal): Renamed to...
	(aarch64_<su>shll2<mode>): .. This.
	(aarch64_<sur>shll_n<mode>, aarch64_<sur>shll2_n<mode>): Re-use new
	optabs.
	* config/aarch64/constraints.md (D2, DL): New.
	* config/aarch64/predicates.md (aarch64_simd_shll_imm_vec): New.

gcc/testsuite/ChangeLog:

	PR target/106346
	* gcc.target/aarch64/pr98772.c: Adjust assembly.
	* gcc.target/aarch64/vect-widen-shift.c: New test.

--- inline copy of patch ---

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index d95394101470446e55f25a2397dd112239b6a54d..f67eb70577d0c2d9911d8c867d38a4d0b390337c 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -6387,105 +6387,67 @@ (define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
   [(set_attr "type" "neon_sat_shift_reg<q>")]
 )
 
-(define_expand "vec_widen_<sur>shiftl_lo_<mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
-			 (match_operand:SI 2
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
-			 VSHLL))]
-  "TARGET_SIMD"
-  {
-    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
-    emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
-						     p, operands[2]));
-    DONE;
-  }
-)
-
-(define_expand "vec_widen_<sur>shiftl_hi_<mode>"
-   [(set (match_operand:<VWIDE> 0 "register_operand")
-	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
-			 (match_operand:SI 2
-			   "immediate_operand" "i")]
-			  VSHLL))]
-   "TARGET_SIMD"
-   {
-    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
-    emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
-						      p, operands[2]));
-    DONE;
-   }
-)
-
 ;; vshll_n
 
-(define_insn "aarch64_<sur>shll<mode>_internal"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(vec_select:<VHALF>
-			    (match_operand:VQW 1 "register_operand" "w")
-			    (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
-			 (match_operand:SI 3
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
-			 VSHLL))]
+(define_insn "aarch64_<su>shll<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
+			    (match_operand:VD_BHSI 1 "register_operand"))
+			 (match_operand:<VWIDE> 2
+			   "aarch64_simd_shll_imm_vec")))]
   "TARGET_SIMD"
-  {
-    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
-    else
-      return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
+  {@ [cons: =0, 1, 2]
+     [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2
+     [w, w, DL] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2
   }
   [(set_attr "type" "neon_shift_imm_long")]
 )
 
-(define_insn "aarch64_<sur>shll2<mode>_internal"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(vec_select:<VHALF>
-			    (match_operand:VQW 1 "register_operand" "w")
-			    (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
-			 (match_operand:SI 3
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
+(define_expand "aarch64_<sur>shll_n<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand")
+			 (match_operand:SI 2
+			   "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
 			 VSHLL))]
   "TARGET_SIMD"
   {
-    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
-    else
-      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
+    rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
+    emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft));
+    DONE;
   }
-  [(set_attr "type" "neon_shift_imm_long")]
 )
 
-(define_insn "aarch64_<sur>shll_n<mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
-			 (match_operand:SI 2
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
-                         VSHLL))]
+;; vshll_high_n
+
+(define_insn "aarch64_<su>shll2<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
+			  (vec_select:<VHALF>
+			    (match_operand:VQW 1 "register_operand")
+			    (match_operand:VQW 2 "vect_par_cnst_hi_half")))
+			 (match_operand:<VWIDE> 3
+			   "aarch64_simd_shll_imm_vec")))]
   "TARGET_SIMD"
-  {
-    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
-    else
-      return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
+  {@ [cons: =0, 1, 2, 3]
+     [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
+     [w, w, , DL] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
   }
   [(set_attr "type" "neon_shift_imm_long")]
 )
 
-;; vshll_high_n
-
-(define_insn "aarch64_<sur>shll2_n<mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
-			 (match_operand:SI 2 "immediate_operand" "i")]
-                         VSHLL))]
+(define_expand "aarch64_<sur>shll2_n<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand")
+			 (match_operand:SI 2
+			   "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
+			 VSHLL))]
   "TARGET_SIMD"
   {
-    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
-    else
-      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
+    rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
+    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+    emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, shft));
+    DONE;
   }
-  [(set_attr "type" "neon_shift_imm_long")]
 )
 
 ;; vrshr_n
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 6df1dbec2a8097abe9783ed1670c77a8fad4ca57..371a00827d84d8ea4a06ba2b00a761d3b179ae90 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -468,6 +468,20 @@ (define_constraint "D1"
 			GET_MODE_UNIT_BITSIZE (mode) - 1,
 			GET_MODE_UNIT_BITSIZE (mode) - 1)")))
 
+(define_constraint "D2"
+  "@internal
+ A constraint that matches vector of immediates that is bits(mode)/2."
+ (and (match_code "const,const_vector")
+      (match_test "aarch64_simd_shift_imm_vec_exact_top (op, mode)")))
+
+(define_constraint "DL"
+  "@internal
+ A constraint that matches vector of immediates for left shift long.
+ That is immediates between 0 to (bits(mode)/2)-1."
+ (and (match_code "const,const_vector")
+      (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
+			(GET_MODE_UNIT_BITSIZE (mode) / 2) - 1)")))
+
 (define_constraint "Dr"
   "@internal
  A constraint that matches vector of immediates for right shifts."
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index d5a4a1cd9bf8cde8e779de6e0afa531f04892a7b..2d8d1fe25c1de35cb5a2386058cb2901ee46cd82 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -638,6 +638,11 @@ (define_predicate "aarch64_simd_raddsubhn_imm_vec"
 				HOST_WIDE_INT_1U
 				<< (GET_MODE_UNIT_BITSIZE  (mode) / 2 - 1))")))
 
+(define_predicate "aarch64_simd_shll_imm_vec"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
+				GET_MODE_UNIT_BITSIZE (mode) / 2)")))
+
 (define_predicate "aarch64_simd_shift_imm_bitsize_qi"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 8)")))
diff --git a/gcc/testsuite/gcc.target/aarch64/pr98772.c b/gcc/testsuite/gcc.target/aarch64/pr98772.c
index 8259251a7c0b64ae8362ea29ec3cf1d2a9d63547..52ad012dcfe72721b8c987bb826c0ffb8ba3f31e 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr98772.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr98772.c
@@ -155,4 +155,4 @@ int main ()
 /* { dg-final { scan-assembler-times "uaddl\\tv" 2 } } */
 /* { dg-final { scan-assembler-times "usubl\\tv" 2 } } */
 /* { dg-final { scan-assembler-times "umull\\tv" 2 } } */
-/* { dg-final { scan-assembler-times "shl\\tv" 2 } } */
+/* { dg-final { scan-assembler-times "shll\\tv" 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
new file mode 100644
index 0000000000000000000000000000000000000000..6ee41f63ef8a145c0eb7f213950e7501e058b2fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+#include <stdint.h>
+#include <string.h>
+
+#pragma GCC target "+nosve"
+
+#define ARR_SIZE 1024
+
+/* Should produce an shll,shll2 pair*/
+/*
+** sshll_opt1:
+** 	...
+** 	shll	v[0-9]+.4s, v[0-9]+.4h, 16
+** 	shll2	v[0-9]+.4s, v[0-9]+.8h, 16
+** 	...
+*/
+void sshll_opt1 (int32_t *foo, int16_t *a, int16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   << 16;
+        foo[i+1] = a[i+1] << 16;
+        foo[i+2] = a[i+2] << 16;
+        foo[i+3] = a[i+3] << 16;
+    }
+}
+
+/*
+** sshll_opt2:
+** 	...
+** 	sxtl	v[0-9]+.4s, v[0-9]+.4h
+** 	sxtl2	v[0-9]+.4s, v[0-9]+.8h
+** 	sshl	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** 	sshl	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** 	...
+*/
+void sshll_opt2 (int32_t *foo, int16_t *a, int16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   << 16;
+        foo[i+1] = a[i+1] << 15;
+        foo[i+2] = a[i+2] << 14;
+        foo[i+3] = a[i+3] << 17;
+    }
+}
+
+

[-- Attachment #2: rb17620.patch --]
[-- Type: application/octet-stream, Size: 9303 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index d95394101470446e55f25a2397dd112239b6a54d..f67eb70577d0c2d9911d8c867d38a4d0b390337c 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -6387,105 +6387,67 @@ (define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
   [(set_attr "type" "neon_sat_shift_reg<q>")]
 )
 
-(define_expand "vec_widen_<sur>shiftl_lo_<mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
-			 (match_operand:SI 2
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
-			 VSHLL))]
-  "TARGET_SIMD"
-  {
-    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
-    emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
-						     p, operands[2]));
-    DONE;
-  }
-)
-
-(define_expand "vec_widen_<sur>shiftl_hi_<mode>"
-   [(set (match_operand:<VWIDE> 0 "register_operand")
-	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
-			 (match_operand:SI 2
-			   "immediate_operand" "i")]
-			  VSHLL))]
-   "TARGET_SIMD"
-   {
-    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
-    emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
-						      p, operands[2]));
-    DONE;
-   }
-)
-
 ;; vshll_n
 
-(define_insn "aarch64_<sur>shll<mode>_internal"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(vec_select:<VHALF>
-			    (match_operand:VQW 1 "register_operand" "w")
-			    (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
-			 (match_operand:SI 3
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
-			 VSHLL))]
+(define_insn "aarch64_<su>shll<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
+			    (match_operand:VD_BHSI 1 "register_operand"))
+			 (match_operand:<VWIDE> 2
+			   "aarch64_simd_shll_imm_vec")))]
   "TARGET_SIMD"
-  {
-    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
-    else
-      return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
+  {@ [cons: =0, 1, 2]
+     [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2
+     [w, w, DL] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2
   }
   [(set_attr "type" "neon_shift_imm_long")]
 )
 
-(define_insn "aarch64_<sur>shll2<mode>_internal"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(vec_select:<VHALF>
-			    (match_operand:VQW 1 "register_operand" "w")
-			    (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
-			 (match_operand:SI 3
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
+(define_expand "aarch64_<sur>shll_n<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand")
+			 (match_operand:SI 2
+			   "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
 			 VSHLL))]
   "TARGET_SIMD"
   {
-    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
-    else
-      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
+    rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
+    emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft));
+    DONE;
   }
-  [(set_attr "type" "neon_shift_imm_long")]
 )
 
-(define_insn "aarch64_<sur>shll_n<mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
-			 (match_operand:SI 2
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
-                         VSHLL))]
+;; vshll_high_n
+
+(define_insn "aarch64_<su>shll2<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
+			  (vec_select:<VHALF>
+			    (match_operand:VQW 1 "register_operand")
+			    (match_operand:VQW 2 "vect_par_cnst_hi_half")))
+			 (match_operand:<VWIDE> 3
+			   "aarch64_simd_shll_imm_vec")))]
   "TARGET_SIMD"
-  {
-    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
-    else
-      return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
+  {@ [cons: =0, 1, 2, 3]
+     [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
+     [w, w, , DL] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
   }
   [(set_attr "type" "neon_shift_imm_long")]
 )
 
-;; vshll_high_n
-
-(define_insn "aarch64_<sur>shll2_n<mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
-			 (match_operand:SI 2 "immediate_operand" "i")]
-                         VSHLL))]
+(define_expand "aarch64_<sur>shll2_n<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand")
+			 (match_operand:SI 2
+			   "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
+			 VSHLL))]
   "TARGET_SIMD"
   {
-    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
-    else
-      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
+    rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
+    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+    emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, shft));
+    DONE;
   }
-  [(set_attr "type" "neon_shift_imm_long")]
 )
 
 ;; vrshr_n
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 6df1dbec2a8097abe9783ed1670c77a8fad4ca57..371a00827d84d8ea4a06ba2b00a761d3b179ae90 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -468,6 +468,20 @@ (define_constraint "D1"
 			GET_MODE_UNIT_BITSIZE (mode) - 1,
 			GET_MODE_UNIT_BITSIZE (mode) - 1)")))
 
+(define_constraint "D2"
+  "@internal
+ A constraint that matches vector of immediates that is bits(mode)/2."
+ (and (match_code "const,const_vector")
+      (match_test "aarch64_simd_shift_imm_vec_exact_top (op, mode)")))
+
+(define_constraint "DL"
+  "@internal
+ A constraint that matches vector of immediates for left shift long.
+ That is immediates between 0 to (bits(mode)/2)-1."
+ (and (match_code "const,const_vector")
+      (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
+			(GET_MODE_UNIT_BITSIZE (mode) / 2) - 1)")))
+
 (define_constraint "Dr"
   "@internal
  A constraint that matches vector of immediates for right shifts."
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index d5a4a1cd9bf8cde8e779de6e0afa531f04892a7b..2d8d1fe25c1de35cb5a2386058cb2901ee46cd82 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -638,6 +638,11 @@ (define_predicate "aarch64_simd_raddsubhn_imm_vec"
 				HOST_WIDE_INT_1U
 				<< (GET_MODE_UNIT_BITSIZE  (mode) / 2 - 1))")))
 
+(define_predicate "aarch64_simd_shll_imm_vec"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
+				GET_MODE_UNIT_BITSIZE (mode) / 2)")))
+
 (define_predicate "aarch64_simd_shift_imm_bitsize_qi"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 8)")))
diff --git a/gcc/testsuite/gcc.target/aarch64/pr98772.c b/gcc/testsuite/gcc.target/aarch64/pr98772.c
index 8259251a7c0b64ae8362ea29ec3cf1d2a9d63547..52ad012dcfe72721b8c987bb826c0ffb8ba3f31e 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr98772.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr98772.c
@@ -155,4 +155,4 @@ int main ()
 /* { dg-final { scan-assembler-times "uaddl\\tv" 2 } } */
 /* { dg-final { scan-assembler-times "usubl\\tv" 2 } } */
 /* { dg-final { scan-assembler-times "umull\\tv" 2 } } */
-/* { dg-final { scan-assembler-times "shl\\tv" 2 } } */
+/* { dg-final { scan-assembler-times "shll\\tv" 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
new file mode 100644
index 0000000000000000000000000000000000000000..6ee41f63ef8a145c0eb7f213950e7501e058b2fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+#include <stdint.h>
+#include <string.h>
+
+#pragma GCC target "+nosve"
+
+#define ARR_SIZE 1024
+
+/* Should produce an shll,shll2 pair*/
+/*
+** sshll_opt1:
+** 	...
+** 	shll	v[0-9]+.4s, v[0-9]+.4h, 16
+** 	shll2	v[0-9]+.4s, v[0-9]+.8h, 16
+** 	...
+*/
+void sshll_opt1 (int32_t *foo, int16_t *a, int16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   << 16;
+        foo[i+1] = a[i+1] << 16;
+        foo[i+2] = a[i+2] << 16;
+        foo[i+3] = a[i+3] << 16;
+    }
+}
+
+/*
+** sshll_opt2:
+** 	...
+** 	sxtl	v[0-9]+.4s, v[0-9]+.4h
+** 	sxtl2	v[0-9]+.4s, v[0-9]+.8h
+** 	sshl	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** 	sshl	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** 	...
+*/
+void sshll_opt2 (int32_t *foo, int16_t *a, int16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   << 16;
+        foo[i+1] = a[i+1] << 15;
+        foo[i+2] = a[i+2] << 14;
+        foo[i+3] = a[i+3] << 17;
+    }
+}
+
+

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH]AArch64 Undo vec_widen_<sur>shiftl optabs [PR106346]
  2023-08-03 12:32   ` Tamar Christina
@ 2023-08-03 12:51     ` Richard Sandiford
  0 siblings, 0 replies; 4+ messages in thread
From: Richard Sandiford @ 2023-08-03 12:51 UTC (permalink / raw)
  To: Tamar Christina
  Cc: gcc-patches, nd, Richard Earnshaw, Marcus Shawcroft, Kyrylo Tkachov

Tamar Christina <Tamar.Christina@arm.com> writes:
>> > +
>> > +(define_constraint "D3"
>> > +  "@internal
>> > + A constraint that matches vector of immediates that is with 0 to
>> > +(bits(mode)/2)-1."
>> > + (and (match_code "const,const_vector")
>> > +      (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
>> > +			(GET_MODE_UNIT_BITSIZE (mode) / 2) - 1)")))
>> 
>> Having this mapping for D2 and D3, with D2 corresponded to prec/2, kind-of
>> makes D3 a false mnemonic.  How about DL instead?  (L for "left-shift long" or
>> "low-part", take your pick)
>> 
>> Looks good otherwise.
>> 
>
> Wasn't sure if this was an ok with changes or not, so here's the final patch 😊

I was hoping to have another look before it went in.  But...

> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?

...yeah, LGTM, thanks.

Richard

> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	PR target/106346
> 	* config/aarch64/aarch64-simd.md (vec_widen_<sur>shiftl_lo_<mode>,
> 	vec_widen_<sur>shiftl_hi_<mode>): Remove.
> 	(aarch64_<sur>shll<mode>_internal): Renamed to...
> 	(aarch64_<su>shll<mode>): .. This.
> 	(aarch64_<sur>shll2<mode>_internal): Renamed to...
> 	(aarch64_<su>shll2<mode>): .. This.
> 	(aarch64_<sur>shll_n<mode>, aarch64_<sur>shll2_n<mode>): Re-use new
> 	optabs.
> 	* config/aarch64/constraints.md (D2, DL): New.
> 	* config/aarch64/predicates.md (aarch64_simd_shll_imm_vec): New.
>
> gcc/testsuite/ChangeLog:
>
> 	PR target/106346
> 	* gcc.target/aarch64/pr98772.c: Adjust assembly.
> 	* gcc.target/aarch64/vect-widen-shift.c: New test.
>
> --- inline copy of patch ---
>
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index d95394101470446e55f25a2397dd112239b6a54d..f67eb70577d0c2d9911d8c867d38a4d0b390337c 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -6387,105 +6387,67 @@ (define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
>    [(set_attr "type" "neon_sat_shift_reg<q>")]
>  )
>  
> -(define_expand "vec_widen_<sur>shiftl_lo_<mode>"
> -  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> -	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
> -			 (match_operand:SI 2
> -			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
> -			 VSHLL))]
> -  "TARGET_SIMD"
> -  {
> -    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
> -    emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
> -						     p, operands[2]));
> -    DONE;
> -  }
> -)
> -
> -(define_expand "vec_widen_<sur>shiftl_hi_<mode>"
> -   [(set (match_operand:<VWIDE> 0 "register_operand")
> -	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
> -			 (match_operand:SI 2
> -			   "immediate_operand" "i")]
> -			  VSHLL))]
> -   "TARGET_SIMD"
> -   {
> -    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
> -    emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
> -						      p, operands[2]));
> -    DONE;
> -   }
> -)
> -
>  ;; vshll_n
>  
> -(define_insn "aarch64_<sur>shll<mode>_internal"
> -  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> -	(unspec:<VWIDE> [(vec_select:<VHALF>
> -			    (match_operand:VQW 1 "register_operand" "w")
> -			    (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
> -			 (match_operand:SI 3
> -			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
> -			 VSHLL))]
> +(define_insn "aarch64_<su>shll<mode>"
> +  [(set (match_operand:<VWIDE> 0 "register_operand")
> +	(ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
> +			    (match_operand:VD_BHSI 1 "register_operand"))
> +			 (match_operand:<VWIDE> 2
> +			   "aarch64_simd_shll_imm_vec")))]
>    "TARGET_SIMD"
> -  {
> -    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
> -      return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
> -    else
> -      return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
> +  {@ [cons: =0, 1, 2]
> +     [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2
> +     [w, w, DL] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2
>    }
>    [(set_attr "type" "neon_shift_imm_long")]
>  )
>  
> -(define_insn "aarch64_<sur>shll2<mode>_internal"
> -  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> -	(unspec:<VWIDE> [(vec_select:<VHALF>
> -			    (match_operand:VQW 1 "register_operand" "w")
> -			    (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
> -			 (match_operand:SI 3
> -			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
> +(define_expand "aarch64_<sur>shll_n<mode>"
> +  [(set (match_operand:<VWIDE> 0 "register_operand")
> +	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand")
> +			 (match_operand:SI 2
> +			   "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
>  			 VSHLL))]
>    "TARGET_SIMD"
>    {
> -    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
> -      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
> -    else
> -      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
> +    rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
> +    emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft));
> +    DONE;
>    }
> -  [(set_attr "type" "neon_shift_imm_long")]
>  )
>  
> -(define_insn "aarch64_<sur>shll_n<mode>"
> -  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> -	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
> -			 (match_operand:SI 2
> -			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
> -                         VSHLL))]
> +;; vshll_high_n
> +
> +(define_insn "aarch64_<su>shll2<mode>"
> +  [(set (match_operand:<VWIDE> 0 "register_operand")
> +	(ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
> +			  (vec_select:<VHALF>
> +			    (match_operand:VQW 1 "register_operand")
> +			    (match_operand:VQW 2 "vect_par_cnst_hi_half")))
> +			 (match_operand:<VWIDE> 3
> +			   "aarch64_simd_shll_imm_vec")))]
>    "TARGET_SIMD"
> -  {
> -    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
> -      return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
> -    else
> -      return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
> +  {@ [cons: =0, 1, 2, 3]
> +     [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
> +     [w, w, , DL] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
>    }
>    [(set_attr "type" "neon_shift_imm_long")]
>  )
>  
> -;; vshll_high_n
> -
> -(define_insn "aarch64_<sur>shll2_n<mode>"
> -  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> -	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
> -			 (match_operand:SI 2 "immediate_operand" "i")]
> -                         VSHLL))]
> +(define_expand "aarch64_<sur>shll2_n<mode>"
> +  [(set (match_operand:<VWIDE> 0 "register_operand")
> +	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand")
> +			 (match_operand:SI 2
> +			   "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
> +			 VSHLL))]
>    "TARGET_SIMD"
>    {
> -    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
> -      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
> -    else
> -      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
> +    rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
> +    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
> +    emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, shft));
> +    DONE;
>    }
> -  [(set_attr "type" "neon_shift_imm_long")]
>  )
>  
>  ;; vrshr_n
> diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
> index 6df1dbec2a8097abe9783ed1670c77a8fad4ca57..371a00827d84d8ea4a06ba2b00a761d3b179ae90 100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -468,6 +468,20 @@ (define_constraint "D1"
>  			GET_MODE_UNIT_BITSIZE (mode) - 1,
>  			GET_MODE_UNIT_BITSIZE (mode) - 1)")))
>  
> +(define_constraint "D2"
> +  "@internal
> + A constraint that matches vector of immediates that is bits(mode)/2."
> + (and (match_code "const,const_vector")
> +      (match_test "aarch64_simd_shift_imm_vec_exact_top (op, mode)")))
> +
> +(define_constraint "DL"
> +  "@internal
> + A constraint that matches vector of immediates for left shift long.
> + That is immediates between 0 to (bits(mode)/2)-1."
> + (and (match_code "const,const_vector")
> +      (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
> +			(GET_MODE_UNIT_BITSIZE (mode) / 2) - 1)")))
> +
>  (define_constraint "Dr"
>    "@internal
>   A constraint that matches vector of immediates for right shifts."
> diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
> index d5a4a1cd9bf8cde8e779de6e0afa531f04892a7b..2d8d1fe25c1de35cb5a2386058cb2901ee46cd82 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -638,6 +638,11 @@ (define_predicate "aarch64_simd_raddsubhn_imm_vec"
>  				HOST_WIDE_INT_1U
>  				<< (GET_MODE_UNIT_BITSIZE  (mode) / 2 - 1))")))
>  
> +(define_predicate "aarch64_simd_shll_imm_vec"
> +  (and (match_code "const_vector")
> +       (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
> +				GET_MODE_UNIT_BITSIZE (mode) / 2)")))
> +
>  (define_predicate "aarch64_simd_shift_imm_bitsize_qi"
>    (and (match_code "const_int")
>         (match_test "IN_RANGE (INTVAL (op), 0, 8)")))
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr98772.c b/gcc/testsuite/gcc.target/aarch64/pr98772.c
> index 8259251a7c0b64ae8362ea29ec3cf1d2a9d63547..52ad012dcfe72721b8c987bb826c0ffb8ba3f31e 100644
> --- a/gcc/testsuite/gcc.target/aarch64/pr98772.c
> +++ b/gcc/testsuite/gcc.target/aarch64/pr98772.c
> @@ -155,4 +155,4 @@ int main ()
>  /* { dg-final { scan-assembler-times "uaddl\\tv" 2 } } */
>  /* { dg-final { scan-assembler-times "usubl\\tv" 2 } } */
>  /* { dg-final { scan-assembler-times "umull\\tv" 2 } } */
> -/* { dg-final { scan-assembler-times "shl\\tv" 2 } } */
> +/* { dg-final { scan-assembler-times "shll\\tv" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..6ee41f63ef8a145c0eb7f213950e7501e058b2fa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
> @@ -0,0 +1,50 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -save-temps" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +#include <stdint.h>
> +#include <string.h>
> +
> +#pragma GCC target "+nosve"
> +
> +#define ARR_SIZE 1024
> +
> +/* Should produce an shll,shll2 pair*/
> +/*
> +** sshll_opt1:
> +** 	...
> +** 	shll	v[0-9]+.4s, v[0-9]+.4h, 16
> +** 	shll2	v[0-9]+.4s, v[0-9]+.8h, 16
> +** 	...
> +*/
> +void sshll_opt1 (int32_t *foo, int16_t *a, int16_t *b)
> +{
> +    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
> +    {
> +        foo[i]   = a[i]   << 16;
> +        foo[i+1] = a[i+1] << 16;
> +        foo[i+2] = a[i+2] << 16;
> +        foo[i+3] = a[i+3] << 16;
> +    }
> +}
> +
> +/*
> +** sshll_opt2:
> +** 	...
> +** 	sxtl	v[0-9]+.4s, v[0-9]+.4h
> +** 	sxtl2	v[0-9]+.4s, v[0-9]+.8h
> +** 	sshl	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
> +** 	sshl	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
> +** 	...
> +*/
> +void sshll_opt2 (int32_t *foo, int16_t *a, int16_t *b)
> +{
> +    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
> +    {
> +        foo[i]   = a[i]   << 16;
> +        foo[i+1] = a[i+1] << 15;
> +        foo[i+2] = a[i+2] << 14;
> +        foo[i+3] = a[i+3] << 17;
> +    }
> +}
> +
> +

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-08-03 12:51 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-02 10:25 [PATCH]AArch64 Undo vec_widen_<sur>shiftl optabs [PR106346] Tamar Christina
2023-08-02 14:34 ` Richard Sandiford
2023-08-03 12:32   ` Tamar Christina
2023-08-03 12:51     ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).