public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Re: [Aarch64] Use vector wide add for mixed-mode adds
@ 2015-11-09  6:52 Michael Collison
  2015-11-22 16:13 ` James Greenhalgh
  0 siblings, 1 reply; 8+ messages in thread
From: Michael Collison @ 2015-11-09  6:52 UTC (permalink / raw)
  To: gcc Patches, Richard Biener, James Greenhalgh

[-- Attachment #1: Type: text/plain, Size: 1804 bytes --]

This is a followup patch to my earlier patch here:

https://gcc.gnu.org/ml/gcc-patches/2015-09/msg00408.html

and comments here:

https://gcc.gnu.org/ml/gcc-patches/2015-09/msg01300.html

This patches fixes the failure in slp-reduc-3.c by adding aarch64 support in
check_effective_target_vect_widen_sum_hi_to_si_pattern in 
target-supports.exp.
The remaining failures in slp-multitypes-[45].c and vect-125.c appear to 
be deficiencies in
the vectorizer, as the same failures are seen on PowerPC and ia64. See here:

PowerPC: https://gcc.gnu.org/ml/gcc-testresults/2015-10/msg03293.html
ia64: https://gcc.gnu.org/ml/gcc-testresults/2015-10/msg03176.html

Thanks to James Greenhalgh at Arm for pointing this out. My patch 
disables these tests for targets with
widening adds that support V8HI to V4SI. Tested on aarch64-none-elf, 
aarch64_be-none-elf, and aarch64-none-linus-gnu.

2015-11-06  Michael Collison <Michael.Collison@linaro.org>
     * config/aarch64/aarch64-simd.md (widen_ssum, widen_usum)
(aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal): New patterns
     * config/aarch64/iterators.md (Vhalf, VDBLW): New mode attributes.
     * gcc.target/aarch64/saddw-1.c: New test.
     * gcc.target/aarch64/saddw-2.c: New test.
     * gcc.target/aarch64/uaddw-1.c: New test.
     * gcc.target/aarch64/uaddw-2.c: New test.
     * gcc.target/aarch64/uaddw-3.c: New test.
     * gcc.dg/vect/slp-multitypes-4.c: Disable test for
     targets with widening adds from V8HI=>V4SI.
     * gcc.dg/vect/slp-multitypes-5.c: Ditto.
     * gcc.dg/vect/vect-125.c: Ditto.
     * lib/target-support.exp
     (check_effective_target_vect_widen_sum_hi_to_si_pattern):
     Add aarch64 to list of support targets.

Okay to commit?

-- 
Michael Collison
Linaro Toolchain Working Group
michael.collison@linaro.org


[-- Attachment #2: tcwg-833-aarch64-upstream.patch --]
[-- Type: text/x-patch, Size: 9961 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 65a2b6f..acb7cf0 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2750,6 +2750,60 @@
 
 ;; <su><addsub>w<q>.
 
+(define_expand "widen_ssum<mode>3"
+  [(set (match_operand:<VDBLW> 0 "register_operand" "")
+	(plus:<VDBLW> (sign_extend:<VDBLW> (match_operand:VQW 1 "register_operand" ""))
+		      (match_operand:<VDBLW> 2 "register_operand" "")))]
+  "TARGET_SIMD"
+  {
+    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
+    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
+
+    emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
+						operands[1], p));
+    emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
+    DONE;
+  }
+)
+
+(define_expand "widen_ssum<mode>3"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "")
+	(plus:<VWIDE> (sign_extend:<VWIDE>
+		       (match_operand:VD_BHSI 1 "register_operand" ""))
+		      (match_operand:<VWIDE> 2 "register_operand" "")))]
+  "TARGET_SIMD"
+{
+  emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
+  DONE;
+})
+
+(define_expand "widen_usum<mode>3"
+  [(set (match_operand:<VDBLW> 0 "register_operand" "")
+	(plus:<VDBLW> (zero_extend:<VDBLW> (match_operand:VQW 1 "register_operand" ""))
+		      (match_operand:<VDBLW> 2 "register_operand" "")))]
+  "TARGET_SIMD"
+  {
+    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
+    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
+
+    emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
+						 operands[1], p));
+    emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
+    DONE;
+  }
+)
+
+(define_expand "widen_usum<mode>3"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "")
+	(plus:<VWIDE> (zero_extend:<VWIDE>
+		       (match_operand:VD_BHSI 1 "register_operand" ""))
+		      (match_operand:<VWIDE> 2 "register_operand" "")))]
+  "TARGET_SIMD"
+{
+  emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
+  DONE;
+})
+
 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
@@ -2760,6 +2814,18 @@
   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
 )
 
+(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
+			(ANY_EXTEND:<VWIDE>
+			  (vec_select:<VHALF>
+			   (match_operand:VQW 2 "register_operand" "w")
+			   (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
+  "TARGET_SIMD"
+  "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
+  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
+)
+
 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 964f8f1..f851dca 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -455,6 +455,13 @@
 			 (V4SF "V2SF")  (V4HF "V2HF")
 			 (V8HF "V4HF")  (V2DF  "DF")])
 
+;; Half modes of all vector modes, in lower-case.
+(define_mode_attr Vhalf [(V8QI "v4qi")  (V16QI "v8qi")
+			 (V4HI "v2hi")  (V8HI  "v4hi")
+			 (V2SI "si")    (V4SI  "v2si")
+			 (V2DI "di")    (V2SF  "sf")
+			 (V4SF "v2sf")  (V2DF  "df")])
+
 ;; Double modes of vector modes.
 (define_mode_attr VDBL [(V8QI "V16QI") (V4HI "V8HI")
 			(V4HF "V8HF")
@@ -472,6 +479,11 @@
 			(SI   "v2si")  (DI   "v2di")
 			(DF   "v2df")])
 
+;; Modes with double-width elements.
+(define_mode_attr VDBLW [(V8QI "V4HI") (V16QI "V8HI")
+                  (V4HI "V2SI") (V8HI "V4SI")
+                  (V2SI "DI")   (V4SI "V2DI")])
+
 ;; Narrowed modes for VDN.
 (define_mode_attr VNARROWD [(V4HI "V8QI") (V2SI "V4HI")
 			    (DI   "V2SI")])
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c
index faf17d6..fa3b9e2 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_unpack } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
@@ -51,6 +52,6 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_unpack } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { target vect_unpack } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_widen_sum_hi_to_si_pattern } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_widen_sum_hi_to_si_pattern } } } */
   
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c
index fb4f720..42a3b4b 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_pack_trunc } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
@@ -51,6 +52,6 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_widen_sum_hi_to_si_pattern  } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_widen_sum_hi_to_si_pattern } } } */
   
diff --git a/gcc/testsuite/gcc.dg/vect/vect-125.c b/gcc/testsuite/gcc.dg/vect/vect-125.c
index 4a3c0dc..a1d1e88 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-125.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-125.c
@@ -16,4 +16,4 @@ void train(short *t, short *w, int n, int err)
     }
 }
 
-/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail vect_no_int_min_max } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { vect_widen_sum_hi_to_si_pattern || vect_no_int_min_max } } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/saddw-1.c b/gcc/testsuite/gcc.target/aarch64/saddw-1.c
new file mode 100644
index 0000000..9db5d00
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/saddw-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+
+int 
+t6(int len, void * dummy, short * __restrict x)
+{
+  len = len & ~31;
+  int result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "saddw" } } */
+/* { dg-final { scan-assembler "saddw2" } } */
+
+
+
diff --git a/gcc/testsuite/gcc.target/aarch64/saddw-2.c b/gcc/testsuite/gcc.target/aarch64/saddw-2.c
new file mode 100644
index 0000000..6f8c8fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/saddw-2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+int 
+t6(int len, void * dummy, int * __restrict x)
+{
+  len = len & ~31;
+  long long result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "saddw" } } */
+/* { dg-final { scan-assembler "saddw2" } } */
+
+
diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-1.c b/gcc/testsuite/gcc.target/aarch64/uaddw-1.c
new file mode 100644
index 0000000..e34574f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/uaddw-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+
+int 
+t6(int len, void * dummy, unsigned short * __restrict x)
+{
+  len = len & ~31;
+  unsigned int result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "uaddw" } } */
+/* { dg-final { scan-assembler "uaddw2" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-2.c b/gcc/testsuite/gcc.target/aarch64/uaddw-2.c
new file mode 100644
index 0000000..fd3b578
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/uaddw-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+int 
+t6(int len, void * dummy, unsigned short * __restrict x)
+{
+  len = len & ~31;
+  unsigned int result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "uaddw" } } */
+/* { dg-final { scan-assembler "uaddw2" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-3.c b/gcc/testsuite/gcc.target/aarch64/uaddw-3.c
new file mode 100644
index 0000000..04bc7c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/uaddw-3.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+
+int 
+t6(int len, void * dummy, char * __restrict x)
+{
+  len = len & ~31;
+  unsigned short result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "uaddw" } } */
+/* { dg-final { scan-assembler "uaddw2" } } */
+
+
+
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index b543519..46f41a1 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3943,6 +3943,7 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } {
     } else {
         set et_vect_widen_sum_hi_to_si_pattern_saved 0
         if { [istarget powerpc*-*-*]
+              || [istarget aarch64*-*-*]
              || [istarget ia64-*-*] } {
             set et_vect_widen_sum_hi_to_si_pattern_saved 1
         }
-- 
1.9.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Aarch64] Use vector wide add for mixed-mode adds
  2015-11-09  6:52 [Aarch64] Use vector wide add for mixed-mode adds Michael Collison
@ 2015-11-22 16:13 ` James Greenhalgh
  2015-11-23  2:46   ` Michael Collison
  0 siblings, 1 reply; 8+ messages in thread
From: James Greenhalgh @ 2015-11-22 16:13 UTC (permalink / raw)
  To: Michael Collison; +Cc: gcc Patches, Richard Biener

On Sun, Nov 08, 2015 at 11:51:47PM -0700, Michael Collison wrote:
> 2015-11-06  Michael Collison <Michael.Collison@linaro.org>
>     * config/aarch64/aarch64-simd.md (widen_ssum, widen_usum)
> (aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal): New patterns
>     * config/aarch64/iterators.md (Vhalf, VDBLW): New mode attributes.
>     * gcc.target/aarch64/saddw-1.c: New test.
>     * gcc.target/aarch64/saddw-2.c: New test.
>     * gcc.target/aarch64/uaddw-1.c: New test.
>     * gcc.target/aarch64/uaddw-2.c: New test.
>     * gcc.target/aarch64/uaddw-3.c: New test.
>     * lib/target-support.exp
>     (check_effective_target_vect_widen_sum_hi_to_si_pattern):
>     Add aarch64 to list of support targets.


These hunks are all OK (with the minor style comments below applied).

As we understand what's happening here, let's take the regressions below
for now and add AArch64 to the targets affected by pr68333.

>     * gcc.dg/vect/slp-multitypes-4.c: Disable test for
>     targets with widening adds from V8HI=>V4SI.
>     * gcc.dg/vect/slp-multitypes-5.c: Ditto.
>     * gcc.dg/vect/vect-125.c: Ditto.

Let's leave these for now, while we wait for pr68333.

> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index 65a2b6f..acb7cf0 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -2750,6 +2750,60 @@
>  
>  ;; <su><addsub>w<q>.
>  
> +(define_expand "widen_ssum<mode>3"
> +  [(set (match_operand:<VDBLW> 0 "register_operand" "")
> +	(plus:<VDBLW> (sign_extend:<VDBLW> (match_operand:VQW 1 "register_operand" ""))

Split this line (more than 80 characters).

> +		      (match_operand:<VDBLW> 2 "register_operand" "")))]
> +  "TARGET_SIMD"
> +  {
> +    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
> +    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
> +
> +    emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
> +						operands[1], p));
> +    emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
> +    DONE;
> +  }
> +)
> +
> +(define_expand "widen_ssum<mode>3"
> +  [(set (match_operand:<VWIDE> 0 "register_operand" "")
> +	(plus:<VWIDE> (sign_extend:<VWIDE>
> +		       (match_operand:VD_BHSI 1 "register_operand" ""))
> +		      (match_operand:<VWIDE> 2 "register_operand" "")))]
> +  "TARGET_SIMD"
> +{
> +  emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
> +  DONE;
> +})
> +
> +(define_expand "widen_usum<mode>3"
> +  [(set (match_operand:<VDBLW> 0 "register_operand" "")
> +	(plus:<VDBLW> (zero_extend:<VDBLW> (match_operand:VQW 1 "register_operand" ""))

Split this line (more than 80 characters).

> +		      (match_operand:<VDBLW> 2 "register_operand" "")))]
> +  "TARGET_SIMD"
> +  {
> +    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
> +    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
> +
> +    emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
> +						 operands[1], p));
> +    emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
> +    DONE;
> +  }
> +)
> +
> +(define_expand "widen_usum<mode>3"
> +  [(set (match_operand:<VWIDE> 0 "register_operand" "")
> +	(plus:<VWIDE> (zero_extend:<VWIDE>
> +		       (match_operand:VD_BHSI 1 "register_operand" ""))
> +		      (match_operand:<VWIDE> 2 "register_operand" "")))]
> +  "TARGET_SIMD"
> +{
> +  emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
> +  DONE;
> +})
> +
>  (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
>    [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
>          (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
> @@ -2760,6 +2814,18 @@
>    [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
>  )
>  
> +(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
> +  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> +        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
> +			(ANY_EXTEND:<VWIDE>
> +			  (vec_select:<VHALF>
> +			   (match_operand:VQW 2 "register_operand" "w")
> +			   (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
> +  "TARGET_SIMD"
> +  "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
> +  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
> +)
> +
>  (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
>    [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
>          (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")

> diff --git a/gcc/testsuite/gcc.target/aarch64/saddw-1.c b/gcc/testsuite/gcc.target/aarch64/saddw-1.c
> new file mode 100644
> index 0000000..9db5d00
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/saddw-1.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +

Extra newline.

> +int 
> +t6(int len, void * dummy, short * __restrict x)
> +{
> +  len = len & ~31;
> +  int result = 0;
> +  __asm volatile ("");
> +  for (int i = 0; i < len; i++)
> +    result += x[i];
> +  return result;
> +}
> +
> +/* { dg-final { scan-assembler "saddw" } } */
> +/* { dg-final { scan-assembler "saddw2" } } */
> +
> +
> +

Trailing newlines.

> diff --git a/gcc/testsuite/gcc.target/aarch64/saddw-2.c b/gcc/testsuite/gcc.target/aarch64/saddw-2.c
> new file mode 100644
> index 0000000..6f8c8fd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/saddw-2.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +int 
> +t6(int len, void * dummy, int * __restrict x)
> +{
> +  len = len & ~31;
> +  long long result = 0;
> +  __asm volatile ("");
> +  for (int i = 0; i < len; i++)
> +    result += x[i];
> +  return result;
> +}
> +
> +/* { dg-final { scan-assembler "saddw" } } */
> +/* { dg-final { scan-assembler "saddw2" } } */
> +
> +

Trailing newlines.

> diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-1.c b/gcc/testsuite/gcc.target/aarch64/uaddw-1.c
> new file mode 100644
> index 0000000..e34574f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/uaddw-1.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +

Extra newline.

> +int 
> +t6(int len, void * dummy, unsigned short * __restrict x)
> +{
> +  len = len & ~31;
> +  unsigned int result = 0;
> +  __asm volatile ("");
> +  for (int i = 0; i < len; i++)
> +    result += x[i];
> +  return result;
> +}
> +
> +/* { dg-final { scan-assembler "uaddw" } } */
> +/* { dg-final { scan-assembler "uaddw2" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-3.c b/gcc/testsuite/gcc.target/aarch64/uaddw-3.c
> new file mode 100644
> index 0000000..04bc7c9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/uaddw-3.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +

Extra newline.

> +
> +int 
> +t6(int len, void * dummy, char * __restrict x)
> +{
> +  len = len & ~31;
> +  unsigned short result = 0;
> +  __asm volatile ("");
> +  for (int i = 0; i < len; i++)
> +    result += x[i];
> +  return result;
> +}
> +
> +/* { dg-final { scan-assembler "uaddw" } } */
> +/* { dg-final { scan-assembler "uaddw2" } } */
> +
> +
> +

Trailing newlines.

> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index b543519..46f41a1 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -3943,6 +3943,7 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } {
>      } else {
>          set et_vect_widen_sum_hi_to_si_pattern_saved 0
>          if { [istarget powerpc*-*-*]
> +              || [istarget aarch64*-*-*]
>               || [istarget ia64-*-*] } {

Either line ia64 up with aarch64, or line aarch64 up with ia64.

Thanks,
James

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Aarch64] Use vector wide add for mixed-mode adds
  2015-11-22 16:13 ` James Greenhalgh
@ 2015-11-23  2:46   ` Michael Collison
  2015-11-23  9:21     ` James Greenhalgh
  0 siblings, 1 reply; 8+ messages in thread
From: Michael Collison @ 2015-11-23  2:46 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc Patches, Richard Biener



On 11/22/2015 8:48 AM, James Greenhalgh wrote:
> On Sun, Nov 08, 2015 at 11:51:47PM -0700, Michael Collison wrote:
>> 2015-11-06  Michael Collison <Michael.Collison@linaro.org>
>>      * config/aarch64/aarch64-simd.md (widen_ssum, widen_usum)
>> (aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal): New patterns
>>      * config/aarch64/iterators.md (Vhalf, VDBLW): New mode attributes.
>>      * gcc.target/aarch64/saddw-1.c: New test.
>>      * gcc.target/aarch64/saddw-2.c: New test.
>>      * gcc.target/aarch64/uaddw-1.c: New test.
>>      * gcc.target/aarch64/uaddw-2.c: New test.
>>      * gcc.target/aarch64/uaddw-3.c: New test.
>>      * lib/target-support.exp
>>      (check_effective_target_vect_widen_sum_hi_to_si_pattern):
>>      Add aarch64 to list of support targets.
>
> These hunks are all OK (with the minor style comments below applied).

Okay I will update with your comments.
>
> As we understand what's happening here, let's take the regressions below
> for now and add AArch64 to the targets affected by pr68333.
>
>>      * gcc.dg/vect/slp-multitypes-4.c: Disable test for
>>      targets with widening adds from V8HI=>V4SI.
>>      * gcc.dg/vect/slp-multitypes-5.c: Ditto.
>>      * gcc.dg/vect/vect-125.c: Ditto.
> Let's leave these for now, while we wait for pr68333.

To clarify you would like me to exclude these bits from the patch?

>
>> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
>> index 65a2b6f..acb7cf0 100644
>> --- a/gcc/config/aarch64/aarch64-simd.md
>> +++ b/gcc/config/aarch64/aarch64-simd.md
>> @@ -2750,6 +2750,60 @@
>>   
>>   ;; <su><addsub>w<q>.
>>   
>> +(define_expand "widen_ssum<mode>3"
>> +  [(set (match_operand:<VDBLW> 0 "register_operand" "")
>> +	(plus:<VDBLW> (sign_extend:<VDBLW> (match_operand:VQW 1 "register_operand" ""))
> Split this line (more than 80 characters).
>
>> +		      (match_operand:<VDBLW> 2 "register_operand" "")))]
>> +  "TARGET_SIMD"
>> +  {
>> +    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
>> +    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
>> +
>> +    emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
>> +						operands[1], p));
>> +    emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
>> +    DONE;
>> +  }
>> +)
>> +
>> +(define_expand "widen_ssum<mode>3"
>> +  [(set (match_operand:<VWIDE> 0 "register_operand" "")
>> +	(plus:<VWIDE> (sign_extend:<VWIDE>
>> +		       (match_operand:VD_BHSI 1 "register_operand" ""))
>> +		      (match_operand:<VWIDE> 2 "register_operand" "")))]
>> +  "TARGET_SIMD"
>> +{
>> +  emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
>> +  DONE;
>> +})
>> +
>> +(define_expand "widen_usum<mode>3"
>> +  [(set (match_operand:<VDBLW> 0 "register_operand" "")
>> +	(plus:<VDBLW> (zero_extend:<VDBLW> (match_operand:VQW 1 "register_operand" ""))
> Split this line (more than 80 characters).
>
>> +		      (match_operand:<VDBLW> 2 "register_operand" "")))]
>> +  "TARGET_SIMD"
>> +  {
>> +    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
>> +    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
>> +
>> +    emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
>> +						 operands[1], p));
>> +    emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
>> +    DONE;
>> +  }
>> +)
>> +
>> +(define_expand "widen_usum<mode>3"
>> +  [(set (match_operand:<VWIDE> 0 "register_operand" "")
>> +	(plus:<VWIDE> (zero_extend:<VWIDE>
>> +		       (match_operand:VD_BHSI 1 "register_operand" ""))
>> +		      (match_operand:<VWIDE> 2 "register_operand" "")))]
>> +  "TARGET_SIMD"
>> +{
>> +  emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
>> +  DONE;
>> +})
>> +
>>   (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
>>     [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
>>           (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
>> @@ -2760,6 +2814,18 @@
>>     [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
>>   )
>>   
>> +(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
>> +  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
>> +        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
>> +			(ANY_EXTEND:<VWIDE>
>> +			  (vec_select:<VHALF>
>> +			   (match_operand:VQW 2 "register_operand" "w")
>> +			   (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
>> +  "TARGET_SIMD"
>> +  "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
>> +  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
>> +)
>> +
>>   (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
>>     [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
>>           (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
>> diff --git a/gcc/testsuite/gcc.target/aarch64/saddw-1.c b/gcc/testsuite/gcc.target/aarch64/saddw-1.c
>> new file mode 100644
>> index 0000000..9db5d00
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/saddw-1.c
>> @@ -0,0 +1,20 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O3" } */
>> +
>> +
> Extra newline.
>
>> +int
>> +t6(int len, void * dummy, short * __restrict x)
>> +{
>> +  len = len & ~31;
>> +  int result = 0;
>> +  __asm volatile ("");
>> +  for (int i = 0; i < len; i++)
>> +    result += x[i];
>> +  return result;
>> +}
>> +
>> +/* { dg-final { scan-assembler "saddw" } } */
>> +/* { dg-final { scan-assembler "saddw2" } } */
>> +
>> +
>> +
> Trailing newlines.
>
>> diff --git a/gcc/testsuite/gcc.target/aarch64/saddw-2.c b/gcc/testsuite/gcc.target/aarch64/saddw-2.c
>> new file mode 100644
>> index 0000000..6f8c8fd
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/saddw-2.c
>> @@ -0,0 +1,18 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O3" } */
>> +
>> +int
>> +t6(int len, void * dummy, int * __restrict x)
>> +{
>> +  len = len & ~31;
>> +  long long result = 0;
>> +  __asm volatile ("");
>> +  for (int i = 0; i < len; i++)
>> +    result += x[i];
>> +  return result;
>> +}
>> +
>> +/* { dg-final { scan-assembler "saddw" } } */
>> +/* { dg-final { scan-assembler "saddw2" } } */
>> +
>> +
> Trailing newlines.
>
>> diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-1.c b/gcc/testsuite/gcc.target/aarch64/uaddw-1.c
>> new file mode 100644
>> index 0000000..e34574f
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/uaddw-1.c
>> @@ -0,0 +1,17 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O3" } */
>> +
>> +
> Extra newline.
>
>> +int
>> +t6(int len, void * dummy, unsigned short * __restrict x)
>> +{
>> +  len = len & ~31;
>> +  unsigned int result = 0;
>> +  __asm volatile ("");
>> +  for (int i = 0; i < len; i++)
>> +    result += x[i];
>> +  return result;
>> +}
>> +
>> +/* { dg-final { scan-assembler "uaddw" } } */
>> +/* { dg-final { scan-assembler "uaddw2" } } */
>> diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-3.c b/gcc/testsuite/gcc.target/aarch64/uaddw-3.c
>> new file mode 100644
>> index 0000000..04bc7c9
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/uaddw-3.c
>> @@ -0,0 +1,20 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O3" } */
>> +
> Extra newline.
>
>> +
>> +int
>> +t6(int len, void * dummy, char * __restrict x)
>> +{
>> +  len = len & ~31;
>> +  unsigned short result = 0;
>> +  __asm volatile ("");
>> +  for (int i = 0; i < len; i++)
>> +    result += x[i];
>> +  return result;
>> +}
>> +
>> +/* { dg-final { scan-assembler "uaddw" } } */
>> +/* { dg-final { scan-assembler "uaddw2" } } */
>> +
>> +
>> +
> Trailing newlines.
>
>> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
>> index b543519..46f41a1 100644
>> --- a/gcc/testsuite/lib/target-supports.exp
>> +++ b/gcc/testsuite/lib/target-supports.exp
>> @@ -3943,6 +3943,7 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } {
>>       } else {
>>           set et_vect_widen_sum_hi_to_si_pattern_saved 0
>>           if { [istarget powerpc*-*-*]
>> +              || [istarget aarch64*-*-*]
>>                || [istarget ia64-*-*] } {
> Either line ia64 up with aarch64, or line aarch64 up with ia64.
>
> Thanks,
> James
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Aarch64] Use vector wide add for mixed-mode adds
  2015-11-23  2:46   ` Michael Collison
@ 2015-11-23  9:21     ` James Greenhalgh
  0 siblings, 0 replies; 8+ messages in thread
From: James Greenhalgh @ 2015-11-23  9:21 UTC (permalink / raw)
  To: Michael Collison; +Cc: gcc Patches, Richard Biener

On Sun, Nov 22, 2015 at 06:24:19PM -0700, Michael Collison wrote:
> 
> 
> On 11/22/2015 8:48 AM, James Greenhalgh wrote:
> >On Sun, Nov 08, 2015 at 11:51:47PM -0700, Michael Collison wrote:
> >>2015-11-06  Michael Collison <Michael.Collison@linaro.org>
> >>     * config/aarch64/aarch64-simd.md (widen_ssum, widen_usum)
> >>(aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal): New patterns
> >>     * config/aarch64/iterators.md (Vhalf, VDBLW): New mode attributes.
> >>     * gcc.target/aarch64/saddw-1.c: New test.
> >>     * gcc.target/aarch64/saddw-2.c: New test.
> >>     * gcc.target/aarch64/uaddw-1.c: New test.
> >>     * gcc.target/aarch64/uaddw-2.c: New test.
> >>     * gcc.target/aarch64/uaddw-3.c: New test.
> >>     * lib/target-support.exp
> >>     (check_effective_target_vect_widen_sum_hi_to_si_pattern):
> >>     Add aarch64 to list of support targets.
> >
> >These hunks are all OK (with the minor style comments below applied).
> 
> Okay I will update with your comments.
> >
> >As we understand what's happening here, let's take the regressions below
> >for now and add AArch64 to the targets affected by pr68333.
> >
> >>     * gcc.dg/vect/slp-multitypes-4.c: Disable test for
> >>     targets with widening adds from V8HI=>V4SI.
> >>     * gcc.dg/vect/slp-multitypes-5.c: Ditto.
> >>     * gcc.dg/vect/vect-125.c: Ditto.
> >Let's leave these for now, while we wait for pr68333.
> 
> To clarify you would like me to exclude these bits from the patch?

Yes, given the direction that pr68333 is going (a bug that should be
fixed, rather than an expected failure) that seems best to me.

Thanks,
James
 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Aarch64] Use vector wide add for mixed-mode adds
  2015-11-24  9:36 Michael Collison
@ 2015-11-24 10:58 ` James Greenhalgh
  0 siblings, 0 replies; 8+ messages in thread
From: James Greenhalgh @ 2015-11-24 10:58 UTC (permalink / raw)
  To: Michael Collison; +Cc: GCC Patches

On Tue, Nov 24, 2015 at 02:24:30AM -0700, Michael Collison wrote:
> This is a followup patch which addresses formatting comments posted here:
> 
> https://gcc.gnu.org/ml/gcc-patches/2015-11/msg02611.html
> 
> 2015-11-24  Michael Collison <Michael.Collison@linaro.org>
>     * config/aarch64/aarch64-simd.md (widen_ssum, widen_usum)
> (aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal): New patterns
>     * config/aarch64/iterators.md (Vhalf, VDBLW): New mode attributes.
>     * gcc.target/aarch64/saddw-1.c: New test.
>     * gcc.target/aarch64/saddw-2.c: New test.
>     * gcc.target/aarch64/uaddw-1.c: New test.
>     * gcc.target/aarch64/uaddw-2.c: New test.
>     * gcc.target/aarch64/uaddw-3.c: New test.
>     * lib/target-support.exp
>     (check_effective_target_vect_widen_sum_hi_to_si_pattern):
>     Add aarch64 to list of support targets.
> 
> Okay to commit?

OK.

Thanks,
James

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Aarch64] Use vector wide add for mixed-mode adds
@ 2015-11-24  9:36 Michael Collison
  2015-11-24 10:58 ` James Greenhalgh
  0 siblings, 1 reply; 8+ messages in thread
From: Michael Collison @ 2015-11-24  9:36 UTC (permalink / raw)
  To: GCC Patches; +Cc: James Greenhalgh

[-- Attachment #1: Type: text/plain, Size: 869 bytes --]

This is a followup patch which addresses formatting comments posted here:

https://gcc.gnu.org/ml/gcc-patches/2015-11/msg02611.html

2015-11-24  Michael Collison <Michael.Collison@linaro.org>
     * config/aarch64/aarch64-simd.md (widen_ssum, widen_usum)
(aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal): New patterns
     * config/aarch64/iterators.md (Vhalf, VDBLW): New mode attributes.
     * gcc.target/aarch64/saddw-1.c: New test.
     * gcc.target/aarch64/saddw-2.c: New test.
     * gcc.target/aarch64/uaddw-1.c: New test.
     * gcc.target/aarch64/uaddw-2.c: New test.
     * gcc.target/aarch64/uaddw-3.c: New test.
     * lib/target-support.exp
     (check_effective_target_vect_widen_sum_hi_to_si_pattern):
     Add aarch64 to list of support targets.

Okay to commit?

-- 
Michael Collison
Linaro Toolchain Working Group
michael.collison@linaro.org


[-- Attachment #2: bugzilla_67322_nov242015.patch --]
[-- Type: text/x-patch, Size: 7628 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 3fa23b3..79be6be 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2777,6 +2777,62 @@
 
 ;; <su><addsub>w<q>.
 
+(define_expand "widen_ssum<mode>3"
+  [(set (match_operand:<VDBLW> 0 "register_operand" "")
+	(plus:<VDBLW> (sign_extend:<VDBLW> 
+		        (match_operand:VQW 1 "register_operand" ""))
+		      (match_operand:<VDBLW> 2 "register_operand" "")))]
+  "TARGET_SIMD"
+  {
+    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
+    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
+
+    emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
+						operands[1], p));
+    emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
+    DONE;
+  }
+)
+
+(define_expand "widen_ssum<mode>3"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "")
+	(plus:<VWIDE> (sign_extend:<VWIDE>
+		        (match_operand:VD_BHSI 1 "register_operand" ""))
+		      (match_operand:<VWIDE> 2 "register_operand" "")))]
+  "TARGET_SIMD"
+{
+  emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
+  DONE;
+})
+
+(define_expand "widen_usum<mode>3"
+  [(set (match_operand:<VDBLW> 0 "register_operand" "")
+	(plus:<VDBLW> (zero_extend:<VDBLW> 
+		        (match_operand:VQW 1 "register_operand" ""))
+		      (match_operand:<VDBLW> 2 "register_operand" "")))]
+  "TARGET_SIMD"
+  {
+    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
+    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
+
+    emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
+						 operands[1], p));
+    emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
+    DONE;
+  }
+)
+
+(define_expand "widen_usum<mode>3"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "")
+	(plus:<VWIDE> (zero_extend:<VWIDE>
+		        (match_operand:VD_BHSI 1 "register_operand" ""))
+		      (match_operand:<VWIDE> 2 "register_operand" "")))]
+  "TARGET_SIMD"
+{
+  emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
+  DONE;
+})
+
 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
@@ -2787,6 +2843,18 @@
   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
 )
 
+(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
+			(ANY_EXTEND:<VWIDE>
+			  (vec_select:<VHALF>
+			   (match_operand:VQW 2 "register_operand" "w")
+			   (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
+  "TARGET_SIMD"
+  "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
+  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
+)
+
 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index c2eb7de..02e930b 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -479,6 +479,13 @@
 			 (V4SF "V2SF")  (V4HF "V2HF")
 			 (V8HF "V4HF")  (V2DF  "DF")])
 
+;; Half modes of all vector modes, in lower-case.
+(define_mode_attr Vhalf [(V8QI "v4qi")  (V16QI "v8qi")
+			 (V4HI "v2hi")  (V8HI  "v4hi")
+			 (V2SI "si")    (V4SI  "v2si")
+			 (V2DI "di")    (V2SF  "sf")
+			 (V4SF "v2sf")  (V2DF  "df")])
+
 ;; Double modes of vector modes.
 (define_mode_attr VDBL [(V8QI "V16QI") (V4HI "V8HI")
 			(V4HF "V8HF")
@@ -496,6 +503,11 @@
 			(SI   "v2si")  (DI   "v2di")
 			(DF   "v2df")])
 
+;; Modes with double-width elements.
+(define_mode_attr VDBLW [(V8QI "V4HI") (V16QI "V8HI")
+                  (V4HI "V2SI") (V8HI "V4SI")
+                  (V2SI "DI")   (V4SI "V2DI")])
+
 ;; Narrowed modes for VDN.
 (define_mode_attr VNARROWD [(V4HI "V8QI") (V2SI "V4HI")
 			    (DI   "V2SI")])
diff --git a/gcc/testsuite/gcc.target/aarch64/saddw-1.c b/gcc/testsuite/gcc.target/aarch64/saddw-1.c
new file mode 100644
index 0000000..7500fb8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/saddw-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+int 
+t6(int len, void * dummy, short * __restrict x)
+{
+  len = len & ~31;
+  int result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "saddw" } } */
+/* { dg-final { scan-assembler "saddw2" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/saddw-2.c b/gcc/testsuite/gcc.target/aarch64/saddw-2.c
new file mode 100644
index 0000000..5d9c8d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/saddw-2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+int 
+t6(int len, void * dummy, int * __restrict x)
+{
+  len = len & ~31;
+  long long result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "saddw" } } */
+/* { dg-final { scan-assembler "saddw2" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-1.c b/gcc/testsuite/gcc.target/aarch64/uaddw-1.c
new file mode 100644
index 0000000..3d55ecf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/uaddw-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+int 
+t6(int len, void * dummy, unsigned short * __restrict x)
+{
+  len = len & ~31;
+  unsigned int result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "uaddw" } } */
+/* { dg-final { scan-assembler "uaddw2" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-2.c b/gcc/testsuite/gcc.target/aarch64/uaddw-2.c
new file mode 100644
index 0000000..fd3b578
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/uaddw-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+int 
+t6(int len, void * dummy, unsigned short * __restrict x)
+{
+  len = len & ~31;
+  unsigned int result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "uaddw" } } */
+/* { dg-final { scan-assembler "uaddw2" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-3.c b/gcc/testsuite/gcc.target/aarch64/uaddw-3.c
new file mode 100644
index 0000000..499af51
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/uaddw-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+int 
+t6(int len, void * dummy, char * __restrict x)
+{
+  len = len & ~31;
+  unsigned short result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "uaddw" } } */
+/* { dg-final { scan-assembler "uaddw2" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 3eb46f2..254c4e3 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4163,6 +4163,7 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } {
     } else {
         set et_vect_widen_sum_hi_to_si_pattern_saved 0
         if { [istarget powerpc*-*-*]
+             || [istarget aarch64*-*-*]
              || [istarget ia64-*-*] } {
             set et_vect_widen_sum_hi_to_si_pattern_saved 1
         }
-- 
1.9.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Aarch64] Use vector wide add for mixed-mode adds
  2015-09-07  8:35 Michael Collison
@ 2015-09-17 15:52 ` James Greenhalgh
  0 siblings, 0 replies; 8+ messages in thread
From: James Greenhalgh @ 2015-09-17 15:52 UTC (permalink / raw)
  To: Michael Collison; +Cc: GCC Patches

On Mon, Sep 07, 2015 at 06:54:30AM +0100, Michael Collison wrote:
> This patch is designed to address code that was not being vectorized due
> to missing widening patterns in the aarch64 backend. Code such as:
> 
> int t6(int len, void * dummy, short * __restrict x)
> {
>    len = len & ~31;
>    int result = 0;
>    __asm volatile ("");
>    for (int i = 0; i < len; i++)
>      result += x[i];
>    return result;
> }
> 
> Validated on aarch64-none-elf, aarch64_be-none-elf, and
> aarch64-none-linus-gnu.
> 
> Note that there are three non-execution tree dump vectorization
> regressions where previously code was being vectorized.  They are:

I'd like to understand these better before taking the patch...

> 
> Passed now fails          [PASS => FAIL]:
>    gcc.dg/vect/slp-multitypes-4.c -flto -ffat-lto-objects  scan-tree-dump-times vect "vectorized 1 loops" 1
>    gcc.dg/vect/slp-multitypes-4.c -flto -ffat-lto-objects  scan-tree-dump-times vect "vectorizing stmts using SLP" 1
>    gcc.dg/vect/slp-multitypes-4.c scan-tree-dump-times vect "vectorized 1 loops" 1
>    gcc.dg/vect/slp-multitypes-4.c scan-tree-dump-times vect "vectorizing stmts using SLP" 1
>    gcc.dg/vect/slp-multitypes-5.c -flto -ffat-lto-objects  scan-tree-dump-times vect "vectorized 1 loops" 1
>    gcc.dg/vect/slp-multitypes-5.c -flto -ffat-lto-objects  scan-tree-dump-times vect "vectorizing stmts using SLP" 1
>    gcc.dg/vect/slp-multitypes-5.c scan-tree-dump-times vect "vectorized 1 loops" 1
>    gcc.dg/vect/slp-multitypes-5.c scan-tree-dump-times vect "vectorizing stmts using SLP" 1

These look like weaknesses in SLP trying to build a widening add by a
constant in the wider mode (i.e. (short) w+ (int)). I'd like to understand
what the issue is here.

>    gcc.dg/vect/slp-reduc-3.c -flto -ffat-lto-objects  scan-tree-dump-times vect "vectorizing stmts using SLP" 1
>    gcc.dg/vect/slp-reduc-3.c scan-tree-dump-times vect "vectorizing stmts using SLP" 1

Is this one as simple as setting
check_effective_target_vect_widen_sum_hi_to_si_pattern in
testsuite/lib/target-supports.exp ?

>    gcc.dg/vect/vect-125.c -flto -ffat-lto-objects  scan-tree-dump vect "vectorized 1 loops"
>    gcc.dg/vect/vect-125.c scan-tree-dump vect "vectorized 1 loops"

These look like a failure to match the widening optab for operand types
(int) w+ (short) -> (int), so I'm also concerned here.

> I would like to treat these as saperate bugs and resolve them separately.

I think we shouldn't write these off without at least partially understanding
the issues.

> diff --git a/gcc/config/aarch64/aarch64-simd.md
> b/gcc/config/aarch64/aarch64-simd.md
> index 9777418..d6c5d61 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -2636,6 +2636,60 @@
> 
>   ;; <su><addsub>w<q>.
> 
> +(define_expand "widen_ssum<mode>3"
> +  [(set (match_operand:<VDBLW> 0 "register_operand" "")
> +    (plus:<VDBLW> (sign_extend:<VDBLW> (match_operand:VQW 1
> "register_operand" ""))

Please check your mail settings, as the patch currently does not apply.
The last time I saw this issue it was a mail client turning on
format=flowed which caused carnage for patch files

( https://gcc.gnu.org/ml/gcc-patches/2015-08/msg00179.html )

> +              (match_operand:<VDBLW> 2 "register_operand" "")))]
> +  "TARGET_SIMD"
> +  {
> +    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
> +    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
> +
> +    emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
> +                        operands[1], p));

Replace 8 spaces with a tab (mail client?).

Thanks,
James

> +    emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
> +    DONE;
> +  }
> +)
> +
> +(define_expand "widen_ssum<mode>3"
> +  [(set (match_operand:<VWIDE> 0 "register_operand" "")
> +    (plus:<VWIDE> (sign_extend:<VWIDE>
> +               (match_operand:VD_BHSI 1 "register_operand" ""))
> +              (match_operand:<VWIDE> 2 "register_operand" "")))]
> +  "TARGET_SIMD"
> +{
> +  emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2],
> operands[1]));
> +  DONE;
> +})
> +
> +(define_expand "widen_usum<mode>3"
> +  [(set (match_operand:<VDBLW> 0 "register_operand" "=&w")
> +    (plus:<VDBLW> (zero_extend:<VDBLW> (match_operand:VQW 1
> "register_operand" ""))
> +              (match_operand:<VDBLW> 2 "register_operand" "")))]
> +  "TARGET_SIMD"
> +  {
> +    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
> +    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
> +
> +    emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
> +                         operands[1], p));
> +    emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
> +    DONE;
> +  }
> +)
> +
> +(define_expand "widen_usum<mode>3"
> +  [(set (match_operand:<VWIDE> 0 "register_operand" "")
> +    (plus:<VWIDE> (zero_extend:<VWIDE>
> +               (match_operand:VD_BHSI 1 "register_operand" ""))
> +              (match_operand:<VWIDE> 2 "register_operand" "")))]
> +  "TARGET_SIMD"
> +{
> +  emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2],
> operands[1]));
> +  DONE;
> +})
> +
>   (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
>     [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
>           (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
> @@ -2646,6 +2700,18 @@
>     [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
>   )
> 
> +(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
> +  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> +        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
> +            (ANY_EXTEND:<VWIDE>
> +              (vec_select:<VHALF>
> +               (match_operand:VQW 2 "register_operand" "w")
> +               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
> +  "TARGET_SIMD"
> +  "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>,
> %2.<Vhalftype>"
> +  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
> +)
> +
>   (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
>     [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
>           (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
> diff --git a/gcc/config/aarch64/iterators.md
> b/gcc/config/aarch64/iterators.md
> index b8a45d1..cd2914e 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -427,6 +427,13 @@
>                (V2DI "DI")    (V2SF  "SF")
>                (V4SF "V2SF")  (V2DF  "DF")])
> 
> +;; Half modes of all vector modes, in lower-case.
> +(define_mode_attr Vhalf [(V8QI "v4qi")  (V16QI "v8qi")
> +             (V4HI "v2hi")  (V8HI  "v4hi")
> +             (V2SI "si")    (V4SI  "v2si")
> +             (V2DI "di")    (V2SF  "sf")
> +             (V4SF "v2sf")  (V2DF  "df")])
> +
>   ;; Double modes of vector modes.
>   (define_mode_attr VDBL [(V8QI "V16QI") (V4HI "V8HI")
>               (V2SI "V4SI")  (V2SF "V4SF")
> @@ -439,6 +446,11 @@
>               (SI   "v2si")  (DI   "v2di")
>               (DF   "v2df")])
> 
> +;; Modes with double-width elements.
> +(define_mode_attr VDBLW [(V8QI "V4HI") (V16QI "V8HI")
> +                  (V4HI "V2SI") (V8HI "V4SI")
> +                  (V2SI "DI")   (V4SI "V2DI")])
> +
>   ;; Narrowed modes for VDN.
>   (define_mode_attr VNARROWD [(V4HI "V8QI") (V2SI "V4HI")
>                   (DI   "V2SI")])
> diff --git a/gcc/testsuite/gcc.target/aarch64/saddw-1.c
> b/gcc/testsuite/gcc.target/aarch64/saddw-1.c
> new file mode 100644
> index 0000000..9db5d00
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/saddw-1.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +
> +int
> +t6(int len, void * dummy, short * __restrict x)
> +{
> +  len = len & ~31;
> +  int result = 0;
> +  __asm volatile ("");
> +  for (int i = 0; i < len; i++)
> +    result += x[i];
> +  return result;
> +}
> +
> +/* { dg-final { scan-assembler "saddw" } } */
> +/* { dg-final { scan-assembler "saddw2" } } */
> +
> +
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/saddw-2.c
> b/gcc/testsuite/gcc.target/aarch64/saddw-2.c
> new file mode 100644
> index 0000000..6f8c8fd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/saddw-2.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +int
> +t6(int len, void * dummy, int * __restrict x)
> +{
> +  len = len & ~31;
> +  long long result = 0;
> +  __asm volatile ("");
> +  for (int i = 0; i < len; i++)
> +    result += x[i];
> +  return result;
> +}
> +
> +/* { dg-final { scan-assembler "saddw" } } */
> +/* { dg-final { scan-assembler "saddw2" } } */
> +
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-1.c
> b/gcc/testsuite/gcc.target/aarch64/uaddw-1.c
> new file mode 100644
> index 0000000..e34574f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/uaddw-1.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +
> +int
> +t6(int len, void * dummy, unsigned short * __restrict x)
> +{
> +  len = len & ~31;
> +  unsigned int result = 0;
> +  __asm volatile ("");
> +  for (int i = 0; i < len; i++)
> +    result += x[i];
> +  return result;
> +}
> +
> +/* { dg-final { scan-assembler "uaddw" } } */
> +/* { dg-final { scan-assembler "uaddw2" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-2.c
> b/gcc/testsuite/gcc.target/aarch64/uaddw-2.c
> new file mode 100644
> index 0000000..fd3b578
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/uaddw-2.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +int
> +t6(int len, void * dummy, unsigned short * __restrict x)
> +{
> +  len = len & ~31;
> +  unsigned int result = 0;
> +  __asm volatile ("");
> +  for (int i = 0; i < len; i++)
> +    result += x[i];
> +  return result;
> +}
> +
> +/* { dg-final { scan-assembler "uaddw" } } */
> +/* { dg-final { scan-assembler "uaddw2" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-3.c
> b/gcc/testsuite/gcc.target/aarch64/uaddw-3.c
> new file mode 100644
> index 0000000..04bc7c9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/uaddw-3.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +
> +int
> +t6(int len, void * dummy, char * __restrict x)
> +{
> +  len = len & ~31;
> +  unsigned short result = 0;
> +  __asm volatile ("");
> +  for (int i = 0; i < len; i++)
> +    result += x[i];
> +  return result;
> +}
> +
> +/* { dg-final { scan-assembler "uaddw" } } */
> +/* { dg-final { scan-assembler "uaddw2" } } */
> +
> +
> +
> --
> 1.9.1
> 
> 
> 
> --
> Michael Collison
> Linaro Toolchain Working Group
> michael.collison@linaro.org
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Aarch64] Use vector wide add for mixed-mode adds
@ 2015-09-07  8:35 Michael Collison
  2015-09-17 15:52 ` James Greenhalgh
  0 siblings, 1 reply; 8+ messages in thread
From: Michael Collison @ 2015-09-07  8:35 UTC (permalink / raw)
  To: GCC Patches

This patch is designed to address code that was not being vectorized due 
to missing widening patterns in the aarch64 backend. Code such as:

int t6(int len, void * dummy, short * __restrict x)
{
   len = len & ~31;
   int result = 0;
   __asm volatile ("");
   for (int i = 0; i < len; i++)
     result += x[i];
   return result;
}

Validated on aarch64-none-elf, aarch64_be-none-elf, and 
aarch64-none-linus-gnu.

Note that there are three non-execution tree dump vectorization 
regressions where previously code was being vectorized.  They are:

Passed now fails          [PASS => FAIL]:
   gcc.dg/vect/slp-multitypes-4.c -flto -ffat-lto-objects  scan-tree-dump-times vect "vectorized 1 loops" 1
   gcc.dg/vect/slp-multitypes-4.c -flto -ffat-lto-objects  scan-tree-dump-times vect "vectorizing stmts using SLP" 1
   gcc.dg/vect/slp-multitypes-4.c scan-tree-dump-times vect "vectorized 1 loops" 1
   gcc.dg/vect/slp-multitypes-4.c scan-tree-dump-times vect "vectorizing stmts using SLP" 1
   gcc.dg/vect/slp-multitypes-5.c -flto -ffat-lto-objects  scan-tree-dump-times vect "vectorized 1 loops" 1
   gcc.dg/vect/slp-multitypes-5.c -flto -ffat-lto-objects  scan-tree-dump-times vect "vectorizing stmts using SLP" 1
   gcc.dg/vect/slp-multitypes-5.c scan-tree-dump-times vect "vectorized 1 loops" 1
   gcc.dg/vect/slp-multitypes-5.c scan-tree-dump-times vect "vectorizing stmts using SLP" 1
   gcc.dg/vect/slp-reduc-3.c -flto -ffat-lto-objects  scan-tree-dump-times vect "vectorizing stmts using SLP" 1
   gcc.dg/vect/slp-reduc-3.c scan-tree-dump-times vect "vectorizing stmts using SLP" 1
   gcc.dg/vect/vect-125.c -flto -ffat-lto-objects  scan-tree-dump vect "vectorized 1 loops"
   gcc.dg/vect/vect-125.c scan-tree-dump vect "vectorized 1 loops"

I would like to treat these as saperate bugs and resolve them separately.


--------------------------------------------------------------------------------------------------------------------------------------------------------

2015-09-04  Michael Collison  <Michael.Collison@linaro.org>

     * config/aarch64/aarch64-simd.md (widen_ssum, widen_usum,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal): New patterns
     * config/aarch64/iterators.md (Vhalf, VDBLW): New mode attributes.
     * gcc.target/aarch64/saddw-1.c: New test.
     * gcc.target/aarch64/saddw-2.c: New test.
     * gcc.target/aarch64/uaddw-1.c: New test.
     * gcc.target/aarch64/uaddw-2.c: New test.
     * gcc.target/aarch64/uaddw-3.c: New test.

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 9777418..d6c5d61 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2636,6 +2636,60 @@

  ;; <su><addsub>w<q>.

+(define_expand "widen_ssum<mode>3"
+  [(set (match_operand:<VDBLW> 0 "register_operand" "")
+    (plus:<VDBLW> (sign_extend:<VDBLW> (match_operand:VQW 1 
"register_operand" ""))
+              (match_operand:<VDBLW> 2 "register_operand" "")))]
+  "TARGET_SIMD"
+  {
+    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
+    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
+
+    emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
+                        operands[1], p));
+    emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
+    DONE;
+  }
+)
+
+(define_expand "widen_ssum<mode>3"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "")
+    (plus:<VWIDE> (sign_extend:<VWIDE>
+               (match_operand:VD_BHSI 1 "register_operand" ""))
+              (match_operand:<VWIDE> 2 "register_operand" "")))]
+  "TARGET_SIMD"
+{
+  emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], 
operands[1]));
+  DONE;
+})
+
+(define_expand "widen_usum<mode>3"
+  [(set (match_operand:<VDBLW> 0 "register_operand" "=&w")
+    (plus:<VDBLW> (zero_extend:<VDBLW> (match_operand:VQW 1 
"register_operand" ""))
+              (match_operand:<VDBLW> 2 "register_operand" "")))]
+  "TARGET_SIMD"
+  {
+    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
+    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
+
+    emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
+                         operands[1], p));
+    emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
+    DONE;
+  }
+)
+
+(define_expand "widen_usum<mode>3"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "")
+    (plus:<VWIDE> (zero_extend:<VWIDE>
+               (match_operand:VD_BHSI 1 "register_operand" ""))
+              (match_operand:<VWIDE> 2 "register_operand" "")))]
+  "TARGET_SIMD"
+{
+  emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], 
operands[1]));
+  DONE;
+})
+
  (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
    [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
          (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
@@ -2646,6 +2700,18 @@
    [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
  )

+(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
+            (ANY_EXTEND:<VWIDE>
+              (vec_select:<VHALF>
+               (match_operand:VQW 2 "register_operand" "w")
+               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
+  "TARGET_SIMD"
+  "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, 
%2.<Vhalftype>"
+  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
+)
+
  (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
    [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
          (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
diff --git a/gcc/config/aarch64/iterators.md 
b/gcc/config/aarch64/iterators.md
index b8a45d1..cd2914e 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -427,6 +427,13 @@
               (V2DI "DI")    (V2SF  "SF")
               (V4SF "V2SF")  (V2DF  "DF")])

+;; Half modes of all vector modes, in lower-case.
+(define_mode_attr Vhalf [(V8QI "v4qi")  (V16QI "v8qi")
+             (V4HI "v2hi")  (V8HI  "v4hi")
+             (V2SI "si")    (V4SI  "v2si")
+             (V2DI "di")    (V2SF  "sf")
+             (V4SF "v2sf")  (V2DF  "df")])
+
  ;; Double modes of vector modes.
  (define_mode_attr VDBL [(V8QI "V16QI") (V4HI "V8HI")
              (V2SI "V4SI")  (V2SF "V4SF")
@@ -439,6 +446,11 @@
              (SI   "v2si")  (DI   "v2di")
              (DF   "v2df")])

+;; Modes with double-width elements.
+(define_mode_attr VDBLW [(V8QI "V4HI") (V16QI "V8HI")
+                  (V4HI "V2SI") (V8HI "V4SI")
+                  (V2SI "DI")   (V4SI "V2DI")])
+
  ;; Narrowed modes for VDN.
  (define_mode_attr VNARROWD [(V4HI "V8QI") (V2SI "V4HI")
                  (DI   "V2SI")])
diff --git a/gcc/testsuite/gcc.target/aarch64/saddw-1.c 
b/gcc/testsuite/gcc.target/aarch64/saddw-1.c
new file mode 100644
index 0000000..9db5d00
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/saddw-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+
+int
+t6(int len, void * dummy, short * __restrict x)
+{
+  len = len & ~31;
+  int result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "saddw" } } */
+/* { dg-final { scan-assembler "saddw2" } } */
+
+
+
diff --git a/gcc/testsuite/gcc.target/aarch64/saddw-2.c 
b/gcc/testsuite/gcc.target/aarch64/saddw-2.c
new file mode 100644
index 0000000..6f8c8fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/saddw-2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+int
+t6(int len, void * dummy, int * __restrict x)
+{
+  len = len & ~31;
+  long long result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "saddw" } } */
+/* { dg-final { scan-assembler "saddw2" } } */
+
+
diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-1.c 
b/gcc/testsuite/gcc.target/aarch64/uaddw-1.c
new file mode 100644
index 0000000..e34574f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/uaddw-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+
+int
+t6(int len, void * dummy, unsigned short * __restrict x)
+{
+  len = len & ~31;
+  unsigned int result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "uaddw" } } */
+/* { dg-final { scan-assembler "uaddw2" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-2.c 
b/gcc/testsuite/gcc.target/aarch64/uaddw-2.c
new file mode 100644
index 0000000..fd3b578
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/uaddw-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+int
+t6(int len, void * dummy, unsigned short * __restrict x)
+{
+  len = len & ~31;
+  unsigned int result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "uaddw" } } */
+/* { dg-final { scan-assembler "uaddw2" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-3.c 
b/gcc/testsuite/gcc.target/aarch64/uaddw-3.c
new file mode 100644
index 0000000..04bc7c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/uaddw-3.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+
+int
+t6(int len, void * dummy, char * __restrict x)
+{
+  len = len & ~31;
+  unsigned short result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "uaddw" } } */
+/* { dg-final { scan-assembler "uaddw2" } } */
+
+
+
-- 
1.9.1



-- 
Michael Collison
Linaro Toolchain Working Group
michael.collison@linaro.org

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2015-11-24 10:48 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-09  6:52 [Aarch64] Use vector wide add for mixed-mode adds Michael Collison
2015-11-22 16:13 ` James Greenhalgh
2015-11-23  2:46   ` Michael Collison
2015-11-23  9:21     ` James Greenhalgh
  -- strict thread matches above, loose matches on Subject: below --
2015-11-24  9:36 Michael Collison
2015-11-24 10:58 ` James Greenhalgh
2015-09-07  8:35 Michael Collison
2015-09-17 15:52 ` James Greenhalgh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).