[gcc r14-2991] AArch64: Undo vec_widen_<sur>shiftl optabs [PR106346]

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc r14-2991] AArch64: Undo vec_widen_<sur>shiftl optabs [PR106346]
@ 2023-08-04 12:50 Tamar Christina
  0 siblings, 0 replies; only message in thread
From: Tamar Christina @ 2023-08-04 12:50 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:451391a6477f5b012faeca42cdba1bfb8e6eecc0

commit r14-2991-g451391a6477f5b012faeca42cdba1bfb8e6eecc0
Author: Tamar Christina <tamar.christina@arm.com>
Date:   Fri Aug 4 13:49:23 2023 +0100

    AArch64: Undo vec_widen_<sur>shiftl optabs [PR106346]
    
    In GCC 11 we implemented the vectorizer optab for widening left shifts,
    however this optab is only supported for uniform shift constants.
    
    At the moment GCC still has two loop vectorization strategy (classical loop and
    SLP based loop vec) and the optab is implemented as a scalar pattern.
    
    This means that when we apply it to a non-uniform constant inside a loop we only
    find out during SLP build that the constants aren't uniform.  At this point it's
    too late and we lose SLP entirely.
    
    Over the years I've tried various options but none of it works well:
    
    1. Dissolving patterns during SLP built (problematic, also dissolves them for
    non-slp).
    2. Optionally ignoring patterns for SLP build (problematic, ends up interfearing
    with relevancy detection).
    3. Relaxing contraint on SLP build to allow non-constant values and dissolving
    them after SLP build using an SLP pattern.  (problematic, ends up breaking
    shift reassociation).
    
    As a result we've concluded that for now this pattern should just be removed
    and formed during RTL.
    
    The plan is to move this to an SLP only pattern once we remove classical loop
    vectorization support from GCC, at which time we can also properly support SVE's
    Top and Bottom variants.
    
    This removes the optab and reworks the RTL to recognize both the vector variant
    and the intrinsics variant.  Also just simplifies all these patterns.
    
    gcc/ChangeLog:
    
            PR target/106346
            * config/aarch64/aarch64-simd.md (vec_widen_<sur>shiftl_lo_<mode>,
            vec_widen_<sur>shiftl_hi_<mode>): Remove.
            (aarch64_<sur>shll<mode>_internal): Renamed to...
            (aarch64_<su>shll<mode>): .. This.
            (aarch64_<sur>shll2<mode>_internal): Renamed to...
            (aarch64_<su>shll2<mode>): .. This.
            (aarch64_<sur>shll_n<mode>, aarch64_<sur>shll2_n<mode>): Re-use new
            optabs.
            * config/aarch64/constraints.md (D2, DL): New.
            * config/aarch64/predicates.md (aarch64_simd_shll_imm_vec): New.
    
    gcc/testsuite/ChangeLog:
    
            PR target/106346
            * gcc.target/aarch64/pr98772.c: Adjust assembly.
            * gcc.target/aarch64/vect-widen-shift.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md                 | 118 +++++++--------------
 gcc/config/aarch64/constraints.md                  |  14 +++
 gcc/config/aarch64/predicates.md                   |   5 +
 gcc/testsuite/gcc.target/aarch64/pr98772.c         |   2 +-
 .../gcc.target/aarch64/vect-widen-shift.c          |  50 +++++++++
 5 files changed, 110 insertions(+), 79 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index d9539410147..f67eb70577d 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -6387,105 +6387,67 @@
   [(set_attr "type" "neon_sat_shift_reg<q>")]
 )
 
-(define_expand "vec_widen_<sur>shiftl_lo_<mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
-			 (match_operand:SI 2
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
-			 VSHLL))]
-  "TARGET_SIMD"
-  {
-    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
-    emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
-						     p, operands[2]));
-    DONE;
-  }
-)
-
-(define_expand "vec_widen_<sur>shiftl_hi_<mode>"
-   [(set (match_operand:<VWIDE> 0 "register_operand")
-	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
-			 (match_operand:SI 2
-			   "immediate_operand" "i")]
-			  VSHLL))]
-   "TARGET_SIMD"
-   {
-    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
-    emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
-						      p, operands[2]));
-    DONE;
-   }
-)
-
 ;; vshll_n
 
-(define_insn "aarch64_<sur>shll<mode>_internal"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(vec_select:<VHALF>
-			    (match_operand:VQW 1 "register_operand" "w")
-			    (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
-			 (match_operand:SI 3
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
-			 VSHLL))]
+(define_insn "aarch64_<su>shll<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
+			    (match_operand:VD_BHSI 1 "register_operand"))
+			 (match_operand:<VWIDE> 2
+			   "aarch64_simd_shll_imm_vec")))]
   "TARGET_SIMD"
-  {
-    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
-    else
-      return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
+  {@ [cons: =0, 1, 2]
+     [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2
+     [w, w, DL] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2
   }
   [(set_attr "type" "neon_shift_imm_long")]
 )
 
-(define_insn "aarch64_<sur>shll2<mode>_internal"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(vec_select:<VHALF>
-			    (match_operand:VQW 1 "register_operand" "w")
-			    (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
-			 (match_operand:SI 3
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
+(define_expand "aarch64_<sur>shll_n<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand")
+			 (match_operand:SI 2
+			   "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
 			 VSHLL))]
   "TARGET_SIMD"
   {
-    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
-    else
-      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
+    rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
+    emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft));
+    DONE;
   }
-  [(set_attr "type" "neon_shift_imm_long")]
 )
 
-(define_insn "aarch64_<sur>shll_n<mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
-			 (match_operand:SI 2
-			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
-                         VSHLL))]
+;; vshll_high_n
+
+(define_insn "aarch64_<su>shll2<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
+			  (vec_select:<VHALF>
+			    (match_operand:VQW 1 "register_operand")
+			    (match_operand:VQW 2 "vect_par_cnst_hi_half")))
+			 (match_operand:<VWIDE> 3
+			   "aarch64_simd_shll_imm_vec")))]
   "TARGET_SIMD"
-  {
-    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
-    else
-      return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
+  {@ [cons: =0, 1, 2, 3]
+     [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
+     [w, w, , DL] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
   }
   [(set_attr "type" "neon_shift_imm_long")]
 )
 
-;; vshll_high_n
-
-(define_insn "aarch64_<sur>shll2_n<mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
-			 (match_operand:SI 2 "immediate_operand" "i")]
-                         VSHLL))]
+(define_expand "aarch64_<sur>shll2_n<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand")
+	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand")
+			 (match_operand:SI 2
+			   "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
+			 VSHLL))]
   "TARGET_SIMD"
   {
-    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
-      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
-    else
-      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
+    rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
+    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+    emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, shft));
+    DONE;
   }
-  [(set_attr "type" "neon_shift_imm_long")]
 )
 
 ;; vrshr_n
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 6df1dbec2a8..371a00827d8 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -468,6 +468,20 @@
 			GET_MODE_UNIT_BITSIZE (mode) - 1,
 			GET_MODE_UNIT_BITSIZE (mode) - 1)")))
 
+(define_constraint "D2"
+  "@internal
+ A constraint that matches vector of immediates that is bits(mode)/2."
+ (and (match_code "const,const_vector")
+      (match_test "aarch64_simd_shift_imm_vec_exact_top (op, mode)")))
+
+(define_constraint "DL"
+  "@internal
+ A constraint that matches vector of immediates for left shift long.
+ That is immediates between 0 to (bits(mode)/2)-1."
+ (and (match_code "const,const_vector")
+      (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
+			(GET_MODE_UNIT_BITSIZE (mode) / 2) - 1)")))
+
 (define_constraint "Dr"
   "@internal
  A constraint that matches vector of immediates for right shifts."
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index d5a4a1cd9bf..2d8d1fe25c1 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -638,6 +638,11 @@
 				HOST_WIDE_INT_1U
 				<< (GET_MODE_UNIT_BITSIZE  (mode) / 2 - 1))")))
 
+(define_predicate "aarch64_simd_shll_imm_vec"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
+				GET_MODE_UNIT_BITSIZE (mode) / 2)")))
+
 (define_predicate "aarch64_simd_shift_imm_bitsize_qi"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 8)")))
diff --git a/gcc/testsuite/gcc.target/aarch64/pr98772.c b/gcc/testsuite/gcc.target/aarch64/pr98772.c
index 8259251a7c0..52ad012dcfe 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr98772.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr98772.c
@@ -155,4 +155,4 @@ int main ()
 /* { dg-final { scan-assembler-times "uaddl\\tv" 2 } } */
 /* { dg-final { scan-assembler-times "usubl\\tv" 2 } } */
 /* { dg-final { scan-assembler-times "umull\\tv" 2 } } */
-/* { dg-final { scan-assembler-times "shl\\tv" 2 } } */
+/* { dg-final { scan-assembler-times "shll\\tv" 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
new file mode 100644
index 00000000000..6ee41f63ef8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+#include <stdint.h>
+#include <string.h>
+
+#pragma GCC target "+nosve"
+
+#define ARR_SIZE 1024
+
+/* Should produce an shll,shll2 pair*/
+/*
+** sshll_opt1:
+** 	...
+** 	shll	v[0-9]+.4s, v[0-9]+.4h, 16
+** 	shll2	v[0-9]+.4s, v[0-9]+.8h, 16
+** 	...
+*/
+void sshll_opt1 (int32_t *foo, int16_t *a, int16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   << 16;
+        foo[i+1] = a[i+1] << 16;
+        foo[i+2] = a[i+2] << 16;
+        foo[i+3] = a[i+3] << 16;
+    }
+}
+
+/*
+** sshll_opt2:
+** 	...
+** 	sxtl	v[0-9]+.4s, v[0-9]+.4h
+** 	sxtl2	v[0-9]+.4s, v[0-9]+.8h
+** 	sshl	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** 	sshl	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** 	...
+*/
+void sshll_opt2 (int32_t *foo, int16_t *a, int16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   << 16;
+        foo[i+1] = a[i+1] << 15;
+        foo[i+2] = a[i+2] << 14;
+        foo[i+3] = a[i+3] << 17;
+    }
+}
+
+

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-08-04 12:50 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-04 12:50 [gcc r14-2991] AArch64: Undo vec_widen_<sur>shiftl optabs [PR106346] Tamar Christina

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).