public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] [ARM] Fix widen-sum pattern in neon.md.
@ 2015-03-05 13:35 Xingxing Pan
  2015-03-05 13:55 ` Kyrill Tkachov
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Xingxing Pan @ 2015-03-05 13:35 UTC (permalink / raw)
  To: julian, James Greenhalgh, Kyrill Tkachov, Ramana Radhakrishnan,
	Richard Earnshaw, nickc, Xinyu Qi, Liping Gao, joey.ye,
	gcc-patches

[-- Attachment #1: Type: text/plain, Size: 351 bytes --]

Hi,

The expanding of widen-sum pattern always fails. The vectorizer expects 
the operands to have the same size, while the current implementation of 
widen-sum pattern dose not conform to this.

This patch implements the widen-sum pattern with vpadal. Change the 
vaddw pattern to anonymous. Add widen-sum test cases for neon.

-- 
Regards,
Xingxing

[-- Attachment #2: fix-widen-sum.patch --]
[-- Type: text/x-patch, Size: 17410 bytes --]

commit 62637f371a3329ff56644526bc5dbf9356cbdd6c
Author: Xingxing Pan <xxingpan@marvell.com>
Date:   Wed Feb 25 16:44:25 2015 +0800

    Fix widen-sum pattern in neon.md.

    2015-03-05  Xingxing Pan  <xxingpan@marvell.com>
    
        config/arm/
        * iterators.md:
        (VWSD): New define_mode_iterator.
        (V_widen_sum_d): New define_mode_attr.
        * neon.md
        (widen_ssum<mode>3): Redefined.
        (widen_usum<mode>3): Ditto.
        (neon_svaddw<mode>3): New anonymous define_insn.
        (neon_uvaddw<mode>3): Ditto.
        testsuite/gcc.target/arm/neon/
        * vect-widen-sum-char2short-s-d.c: New file.
        * vect-widen-sum-char2short-s.c: Ditto.
        * vect-widen-sum-char2short-u-d.c: Ditto.
        * vect-widen-sum-char2short-u.c: Ditto.
        * vect-widen-sum-short2int-s-d.c: Ditto.
        * vect-widen-sum-short2int-s.c: Ditto.
        * vect-widen-sum-short2int-u-d.c: Ditto.
        * vect-widen-sum-short2int-u.c: Ditto.
        testsuite/lib/
        * target-supports.exp:
        (check_effective_target_vect_widen_sum_hi_to_si_pattern): Return 1 for ARM NEON.
        (check_effective_target_vect_widen_sum_hi_to_si): Ditto.
        (check_effective_target_vect_widen_sum_qi_to_hi): Ditto.

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index f7f8ab7..4ba5901 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -95,6 +95,9 @@
 ;; Widenable modes.
 (define_mode_iterator VW [V8QI V4HI V2SI])
 
+;; Widenable modes. Used by widen sum.
+(define_mode_iterator VWSD [V8QI V4HI V16QI V8HI])
+
 ;; Narrowable modes.
 (define_mode_iterator VN [V8HI V4SI V2DI])
 
@@ -558,6 +561,11 @@
 ;; Widen. Result is half the number of elements, but widened to double-width.
 (define_mode_attr V_unpack   [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
 
+;; Widen. Result is half the number of elements, but widened to double-width.
+;; Used by widen sum.
+(define_mode_attr V_widen_sum_d [(V8QI "V4HI") (V4HI "V2SI")
+                                 (V16QI "V8HI") (V8HI "V4SI")])
+
 ;; Conditions to be used in extend<mode>di patterns.
 (define_mode_attr qhs_zextenddi_cond [(SI "") (HI "&& arm_arch6") (QI "")])
 (define_mode_attr qhs_sextenddi_cond [(SI "") (HI "&& arm_arch6")
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 63c327e..6cac36d 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -1174,7 +1174,31 @@
 
 ;; Widening operations
 
-(define_insn "widen_ssum<mode>3"
+(define_expand "widen_usum<mode>3"
+ [(match_operand:<V_widen_sum_d> 0 "s_register_operand" "")
+  (match_operand:VWSD 1 "s_register_operand" "")
+  (match_operand:<V_widen_sum_d> 2 "s_register_operand" "")]
+  "TARGET_NEON"
+  {
+    emit_move_insn(operands[0], operands[2]);
+    emit_insn (gen_neon_vpadalu<mode> (operands[0], operands[0], operands[1]));
+    DONE;
+  }
+)
+
+(define_expand "widen_ssum<mode>3"
+ [(match_operand:<V_widen_sum_d> 0 "s_register_operand" "")
+  (match_operand:VWSD 1 "s_register_operand" "")
+  (match_operand:<V_widen_sum_d> 2 "s_register_operand" "")]
+  "TARGET_NEON"
+  {
+    emit_move_insn(operands[0], operands[2]);
+    emit_insn (gen_neon_vpadals<mode> (operands[0], operands[0], operands[1]));
+    DONE;
+  }
+)
+
+(define_insn "*neon_svaddw<mode>3"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
 	(plus:<V_widen> (sign_extend:<V_widen>
 			  (match_operand:VW 1 "s_register_operand" "%w"))
@@ -1184,7 +1208,7 @@
   [(set_attr "type" "neon_add_widen")]
 )
 
-(define_insn "widen_usum<mode>3"
+(define_insn "*neon_uvaddw<mode>3"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
 	(plus:<V_widen> (zero_extend:<V_widen>
 			  (match_operand:VW 1 "s_register_operand" "%w"))
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c
new file mode 100644
index 0000000..c81c325
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { xfail *-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef signed char STYPE1;
+typedef signed short STYPE2;
+
+#define N 128
+STYPE1 sdata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+ssum ()
+{
+  int i;
+  STYPE2 sum = 0;
+  STYPE2 check_sum = 0;
+
+  /* widenning sum: sum chars into short.
+
+     Like gcc.dg/vect/vect-reduc-pattern-2c.c, the widening-summation pattern
+     is currently not detected because of this patch:
+
+     2005-12-26  Kazu Hirata  <kazu@codesourcery.com>
+        PR tree-optimization/25125
+   */
+
+  for (i = 0; i < N; i++)
+    {
+      sdata[i] = i*2;
+      check_sum += sdata[i];
+      /* Avoid vectorization.  */
+      if (y)
+	abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += sdata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  ssum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c
new file mode 100644
index 0000000..de53f5c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { xfail *-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef signed char STYPE1;
+typedef signed short STYPE2;
+
+#define N 128
+STYPE1 sdata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+ssum ()
+{
+  int i;
+  STYPE2 sum = 0;
+  STYPE2 check_sum = 0;
+
+  /* widenning sum: sum chars into short.
+
+     Like gcc.dg/vect/vect-reduc-pattern-2c.c, the widening-summation pattern
+     is currently not detected because of this patch:
+
+     2005-12-26  Kazu Hirata  <kazu@codesourcery.com>
+        PR tree-optimization/25125
+   */
+
+  for (i = 0; i < N; i++)
+    {
+      sdata[i] = i*2;
+      check_sum += sdata[i];
+      /* Avoid vectorization.  */
+      if (y)
+	abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += sdata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  ssum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c
new file mode 100644
index 0000000..bfa17d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef unsigned char UTYPE1;
+typedef unsigned short UTYPE2;
+
+#define N 128
+UTYPE1 udata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+usum ()
+{
+  int i;
+  UTYPE2 sum = 0;
+  UTYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      udata[i] = i*2;
+      check_sum += udata[i];
+      /* Avoid vectorization.  */
+      if (y)
+	abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += udata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  usum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c
new file mode 100644
index 0000000..38d8179
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef unsigned char UTYPE1;
+typedef unsigned short UTYPE2;
+
+#define N 128
+UTYPE1 udata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+usum ()
+{
+  int i;
+  UTYPE2 sum = 0;
+  UTYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      udata[i] = i*2;
+      check_sum += udata[i];
+      /* Avoid vectorization.  */
+      if (y)
+	abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += udata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  usum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c
new file mode 100644
index 0000000..7aef5d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target arm_neon } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef signed short STYPE1;
+typedef signed int STYPE2;
+
+#define N 128
+STYPE1 sdata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+ssum ()
+{
+  int i;
+  STYPE2 sum = 0;
+  STYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      sdata[i] = i*2;
+      check_sum += sdata[i];
+      /* Avoid vectorization.  */
+      if (y)
+	abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += sdata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  ssum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c
new file mode 100644
index 0000000..914ad09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef signed short STYPE1;
+typedef signed int STYPE2;
+
+#define N 128
+STYPE1 sdata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+ssum ()
+{
+  int i;
+  STYPE2 sum = 0;
+  STYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      sdata[i] = i*2;
+      check_sum += sdata[i];
+      /* Avoid vectorization.  */
+      if (y)
+	abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += sdata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  ssum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c
new file mode 100644
index 0000000..6f4a29b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef unsigned short UTYPE1;
+typedef unsigned int UTYPE2;
+
+#define N 128
+UTYPE1 udata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+usum ()
+{
+  int i;
+  UTYPE2 sum = 0;
+  UTYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      udata[i] = i*2;
+      check_sum += udata[i];
+      /* Avoid vectorization.  */
+      if (y)
+	abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += udata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  usum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c
new file mode 100644
index 0000000..194b260
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef unsigned short UTYPE1;
+typedef unsigned int UTYPE2;
+
+#define N 128
+UTYPE1 udata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+usum ()
+{
+  int i;
+  UTYPE2 sum = 0;
+  UTYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      udata[i] = i*2;
+      check_sum += udata[i];
+      /* Avoid vectorization.  */
+      if (y)
+	abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += udata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  usum ();
+  return 0;
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 6b957de..eaccb83 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3767,6 +3767,7 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } {
     } else {
         set et_vect_widen_sum_hi_to_si_pattern_saved 0
         if { [istarget powerpc*-*-*]
+             || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
              || [istarget ia64-*-*] } {
             set et_vect_widen_sum_hi_to_si_pattern_saved 1
         }
@@ -3790,7 +3791,8 @@ proc check_effective_target_vect_widen_sum_hi_to_si { } {
     } else {
         set et_vect_widen_sum_hi_to_si_saved [check_effective_target_vect_unpack]
         if { [istarget powerpc*-*-*] 
-	     || [istarget ia64-*-*] } {
+             || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
+             || [istarget ia64-*-*] } {
             set et_vect_widen_sum_hi_to_si_saved 1
         }
     }
@@ -3813,7 +3815,7 @@ proc check_effective_target_vect_widen_sum_qi_to_hi { } {
     } else {
         set et_vect_widen_sum_qi_to_hi_saved 0
 	if { [check_effective_target_vect_unpack] 
-	     || [check_effective_target_arm_neon_ok]
+	     || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
 	     || [istarget ia64-*-*] } {
             set et_vect_widen_sum_qi_to_hi_saved 1
 	}

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] [ARM] Fix widen-sum pattern in neon.md.
  2015-03-05 13:35 [PATCH] [ARM] Fix widen-sum pattern in neon.md Xingxing Pan
@ 2015-03-05 13:55 ` Kyrill Tkachov
  2015-03-05 14:16 ` James Greenhalgh
  2015-04-14 19:13 ` Ramana Radhakrishnan
  2 siblings, 0 replies; 5+ messages in thread
From: Kyrill Tkachov @ 2015-03-05 13:55 UTC (permalink / raw)
  To: Xingxing Pan, julian, James Greenhalgh, Ramana Radhakrishnan,
	Richard Earnshaw, nickc, Xinyu Qi, Liping Gao, Joey Ye,
	gcc-patches


On 05/03/15 13:34, Xingxing Pan wrote:
> Hi,
Hi Xingxing,
Thanks for improving this! Some comments inline.

>
> The expanding of widen-sum pattern always fails. The vectorizer expects
> the operands to have the same size, while the current implementation of
> widen-sum pattern dose not conform to this.
>
> This patch implements the widen-sum pattern with vpadal. Change the
> vaddw pattern to anonymous. Add widen-sum test cases for neon.

How has this been tested? Bootstrap and testsuite?

> -- Regards, Xingxing
>
> fix-widen-sum.patch
>
>
> commit 62637f371a3329ff56644526bc5dbf9356cbdd6c
> Author: Xingxing Pan<xxingpan@marvell.com>
> Date:   Wed Feb 25 16:44:25 2015 +0800
>
>      Fix widen-sum pattern in neon.md.
>
>      2015-03-05  Xingxing Pan<xxingpan@marvell.com>
>      
>          config/arm/
>          * iterators.md:
>          (VWSD): New define_mode_iterator.
>          (V_widen_sum_d): New define_mode_attr.
>          * neon.md
>          (widen_ssum<mode>3): Redefined.
>          (widen_usum<mode>3): Ditto.
>          (neon_svaddw<mode>3): New anonymous define_insn.
>          (neon_uvaddw<mode>3): Ditto.

Please use proper ChangeLog format:
     * config/arm/iterators.md (VWSD): New.

and so on. Separate ChangeLog for the testsuite.

>          testsuite/gcc.target/arm/neon/
>          * vect-widen-sum-char2short-s-d.c: New file.
>          * vect-widen-sum-char2short-s.c: Ditto.
>          * vect-widen-sum-char2short-u-d.c: Ditto.
>          * vect-widen-sum-char2short-u.c: Ditto.
>          * vect-widen-sum-short2int-s-d.c: Ditto.
>          * vect-widen-sum-short2int-s.c: Ditto.
>          * vect-widen-sum-short2int-u-d.c: Ditto.
>          * vect-widen-sum-short2int-u.c: Ditto.
>          testsuite/lib/
>          * target-supports.exp:
>          (check_effective_target_vect_widen_sum_hi_to_si_pattern): Return 1 for ARM NEON.
>          (check_effective_target_vect_widen_sum_hi_to_si): Ditto.
>          (check_effective_target_vect_widen_sum_qi_to_hi): Ditto.
>
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index f7f8ab7..4ba5901 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -95,6 +95,9 @@
>   ;; Widenable modes.
>   (define_mode_iterator VW [V8QI V4HI V2SI])
>   
> +;; Widenable modes. Used by widen sum.
> +(define_mode_iterator VWSD [V8QI V4HI V16QI V8HI])

Two spaces after full stop in comment.

> +
>   ;; Narrowable modes.
>   (define_mode_iterator VN [V8HI V4SI V2DI])
>   
> @@ -558,6 +561,11 @@
>   ;; Widen. Result is half the number of elements, but widened to double-width.
>   (define_mode_attr V_unpack   [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
>   
> +;; Widen. Result is half the number of elements, but widened to double-width.
> +;; Used by widen sum.
Likewise.

> +(define_mode_attr V_widen_sum_d [(V8QI "V4HI") (V4HI "V2SI")
> +                                 (V16QI "V8HI") (V8HI "V4SI")])
> +
>   ;; Conditions to be used in extend<mode>di patterns.
>   (define_mode_attr qhs_zextenddi_cond [(SI "") (HI "&& arm_arch6") (QI "")])
>   (define_mode_attr qhs_sextenddi_cond [(SI "") (HI "&& arm_arch6")
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index 63c327e..6cac36d 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -1174,7 +1174,31 @@
>   
>   ;; Widening operations
>   
> -(define_insn "widen_ssum<mode>3"
> +(define_expand "widen_usum<mode>3"
> + [(match_operand:<V_widen_sum_d> 0 "s_register_operand" "")
> +  (match_operand:VWSD 1 "s_register_operand" "")
> +  (match_operand:<V_widen_sum_d> 2 "s_register_operand" "")]
> +  "TARGET_NEON"
> +  {
> +    emit_move_insn(operands[0], operands[2]);
> +    emit_insn (gen_neon_vpadalu<mode> (operands[0], operands[0], operands[1]));
> +    DONE;
> +  }
> +)

Is the move from operands[2] to operands[0] necessary?
Can you take advantage of the fact that neon_vpadal<sup><mode> has
"0" in it's constraint, thus making register-allocation tie the operands 
to the same register?

> +
> +(define_expand "widen_ssum<mode>3"
> + [(match_operand:<V_widen_sum_d> 0 "s_register_operand" "")
> +  (match_operand:VWSD 1 "s_register_operand" "")
> +  (match_operand:<V_widen_sum_d> 2 "s_register_operand" "")]
> +  "TARGET_NEON"
> +  {
> +    emit_move_insn(operands[0], operands[2]);
> +    emit_insn (gen_neon_vpadals<mode> (operands[0], operands[0], operands[1]));
> +    DONE;
> +  }
> +)
> +
> +(define_insn "*neon_svaddw<mode>3"
>     [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
>   	(plus:<V_widen> (sign_extend:<V_widen>
>   			  (match_operand:VW 1 "s_register_operand" "%w"))
> @@ -1184,7 +1208,7 @@
>     [(set_attr "type" "neon_add_widen")]
>   )
>   
> -(define_insn "widen_usum<mode>3"
> +(define_insn "*neon_uvaddw<mode>3"
>     [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
>   	(plus:<V_widen> (zero_extend:<V_widen>
>   			  (match_operand:VW 1 "s_register_operand" "%w"))
> diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c
> new file mode 100644
> index 0000000..c81c325
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c
> @@ -0,0 +1,64 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
> +/* { dg-add-options arm_neon } */
> +
> +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { xfail *-*-* } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { xfail *-*-* } } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> +
> +#include <stdlib.h>

Do you need this stdlib.h include? If not, please remove it, we try to 
avoid including system headers in tests if we can. To get the definition 
of abort () you can just do declare extern void abort (void); at the top 
of the file. Same in the other tests.

Cheers,
Kyrill


> +
> +typedef signed char STYPE1;
> +typedef signed short STYPE2;
> +
> +#define N 128
> +STYPE1 sdata[N];
> +
> +volatile int y = 0;
> +
> +__attribute__ ((noinline)) int
> +ssum ()
> +{
> +  int i;
> +  STYPE2 sum = 0;
> +  STYPE2 check_sum = 0;
> +
> +  /* widenning sum: sum chars into short.
> +
> +     Like gcc.dg/vect/vect-reduc-pattern-2c.c, the widening-summation pattern
> +     is currently not detected because of this patch:
> +
> +     2005-12-26  Kazu Hirata<kazu@codesourcery.com>
> +        PR tree-optimization/25125
> +   */
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      sdata[i] = i*2;
> +      check_sum += sdata[i];
> +      /* Avoid vectorization.  */
> +      if (y)
> +	abort ();
> +    }
> +
> +  /* widenning sum: sum chars into int.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      sum += sdata[i];
> +    }
> +
> +  /* check results:  */
> +  if (sum != check_sum)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main (void)
> +{
> +  ssum ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c
> new file mode 100644
> index 0000000..de53f5c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c
> @@ -0,0 +1,64 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
> +/* { dg-add-options arm_neon } */
> +
> +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { xfail *-*-* } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { xfail *-*-* } } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> +
> +#include <stdlib.h>
> +
> +typedef signed char STYPE1;
> +typedef signed short STYPE2;
> +
> +#define N 128
> +STYPE1 sdata[N];
> +
> +volatile int y = 0;
> +
> +__attribute__ ((noinline)) int
> +ssum ()
> +{
> +  int i;
> +  STYPE2 sum = 0;
> +  STYPE2 check_sum = 0;
> +
> +  /* widenning sum: sum chars into short.
> +
> +     Like gcc.dg/vect/vect-reduc-pattern-2c.c, the widening-summation pattern
> +     is currently not detected because of this patch:
> +
> +     2005-12-26  Kazu Hirata<kazu@codesourcery.com>
> +        PR tree-optimization/25125
> +   */
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      sdata[i] = i*2;
> +      check_sum += sdata[i];
> +      /* Avoid vectorization.  */
> +      if (y)
> +	abort ();
> +    }
> +
> +  /* widenning sum: sum chars into int.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      sum += sdata[i];
> +    }
> +
> +  /* check results:  */
> +  if (sum != check_sum)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main (void)
> +{
> +  ssum ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c
> new file mode 100644
> index 0000000..bfa17d5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c
> @@ -0,0 +1,55 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
> +/* { dg-add-options arm_neon } */
> +
> +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> +
> +#include <stdlib.h>
> +
> +typedef unsigned char UTYPE1;
> +typedef unsigned short UTYPE2;
> +
> +#define N 128
> +UTYPE1 udata[N];
> +
> +volatile int y = 0;
> +
> +__attribute__ ((noinline)) int
> +usum ()
> +{
> +  int i;
> +  UTYPE2 sum = 0;
> +  UTYPE2 check_sum = 0;
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      udata[i] = i*2;
> +      check_sum += udata[i];
> +      /* Avoid vectorization.  */
> +      if (y)
> +	abort ();
> +    }
> +
> +  /* widenning sum: sum chars into int.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      sum += udata[i];
> +    }
> +
> +  /* check results:  */
> +  if (sum != check_sum)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main (void)
> +{
> +  usum ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c
> new file mode 100644
> index 0000000..38d8179
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c
> @@ -0,0 +1,55 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
> +/* { dg-add-options arm_neon } */
> +
> +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> +
> +#include <stdlib.h>
> +
> +typedef unsigned char UTYPE1;
> +typedef unsigned short UTYPE2;
> +
> +#define N 128
> +UTYPE1 udata[N];
> +
> +volatile int y = 0;
> +
> +__attribute__ ((noinline)) int
> +usum ()
> +{
> +  int i;
> +  UTYPE2 sum = 0;
> +  UTYPE2 check_sum = 0;
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      udata[i] = i*2;
> +      check_sum += udata[i];
> +      /* Avoid vectorization.  */
> +      if (y)
> +	abort ();
> +    }
> +
> +  /* widenning sum: sum chars into int.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      sum += udata[i];
> +    }
> +
> +  /* check results:  */
> +  if (sum != check_sum)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main (void)
> +{
> +  usum ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c
> new file mode 100644
> index 0000000..7aef5d1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c
> @@ -0,0 +1,55 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
> +/* { dg-add-options arm_neon } */
> +
> +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target arm_neon } } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> +
> +#include <stdlib.h>
> +
> +typedef signed short STYPE1;
> +typedef signed int STYPE2;
> +
> +#define N 128
> +STYPE1 sdata[N];
> +
> +volatile int y = 0;
> +
> +__attribute__ ((noinline)) int
> +ssum ()
> +{
> +  int i;
> +  STYPE2 sum = 0;
> +  STYPE2 check_sum = 0;
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      sdata[i] = i*2;
> +      check_sum += sdata[i];
> +      /* Avoid vectorization.  */
> +      if (y)
> +	abort ();
> +    }
> +
> +  /* widenning sum: sum chars into int.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      sum += sdata[i];
> +    }
> +
> +  /* check results:  */
> +  if (sum != check_sum)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main (void)
> +{
> +  ssum ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c
> new file mode 100644
> index 0000000..914ad09
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c
> @@ -0,0 +1,55 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
> +/* { dg-add-options arm_neon } */
> +
> +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> +
> +#include <stdlib.h>
> +
> +typedef signed short STYPE1;
> +typedef signed int STYPE2;
> +
> +#define N 128
> +STYPE1 sdata[N];
> +
> +volatile int y = 0;
> +
> +__attribute__ ((noinline)) int
> +ssum ()
> +{
> +  int i;
> +  STYPE2 sum = 0;
> +  STYPE2 check_sum = 0;
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      sdata[i] = i*2;
> +      check_sum += sdata[i];
> +      /* Avoid vectorization.  */
> +      if (y)
> +	abort ();
> +    }
> +
> +  /* widenning sum: sum chars into int.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      sum += sdata[i];
> +    }
> +
> +  /* check results:  */
> +  if (sum != check_sum)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main (void)
> +{
> +  ssum ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c
> new file mode 100644
> index 0000000..6f4a29b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c
> @@ -0,0 +1,55 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
> +/* { dg-add-options arm_neon } */
> +
> +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> +
> +#include <stdlib.h>
> +
> +typedef unsigned short UTYPE1;
> +typedef unsigned int UTYPE2;
> +
> +#define N 128
> +UTYPE1 udata[N];
> +
> +volatile int y = 0;
> +
> +__attribute__ ((noinline)) int
> +usum ()
> +{
> +  int i;
> +  UTYPE2 sum = 0;
> +  UTYPE2 check_sum = 0;
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      udata[i] = i*2;
> +      check_sum += udata[i];
> +      /* Avoid vectorization.  */
> +      if (y)
> +	abort ();
> +    }
> +
> +  /* widenning sum: sum chars into int.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      sum += udata[i];
> +    }
> +
> +  /* check results:  */
> +  if (sum != check_sum)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main (void)
> +{
> +  usum ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c
> new file mode 100644
> index 0000000..194b260
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c
> @@ -0,0 +1,55 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
> +/* { dg-add-options arm_neon } */
> +
> +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> +
> +#include <stdlib.h>
> +
> +typedef unsigned short UTYPE1;
> +typedef unsigned int UTYPE2;
> +
> +#define N 128
> +UTYPE1 udata[N];
> +
> +volatile int y = 0;
> +
> +__attribute__ ((noinline)) int
> +usum ()
> +{
> +  int i;
> +  UTYPE2 sum = 0;
> +  UTYPE2 check_sum = 0;
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      udata[i] = i*2;
> +      check_sum += udata[i];
> +      /* Avoid vectorization.  */
> +      if (y)
> +	abort ();
> +    }
> +
> +  /* widenning sum: sum chars into int.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      sum += udata[i];
> +    }
> +
> +  /* check results:  */
> +  if (sum != check_sum)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main (void)
> +{
> +  usum ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index 6b957de..eaccb83 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -3767,6 +3767,7 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } {
>       } else {
>           set et_vect_widen_sum_hi_to_si_pattern_saved 0
>           if { [istarget powerpc*-*-*]
> +             || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
>                || [istarget ia64-*-*] } {
>               set et_vect_widen_sum_hi_to_si_pattern_saved 1
>           }
> @@ -3790,7 +3791,8 @@ proc check_effective_target_vect_widen_sum_hi_to_si { } {
>       } else {
>           set et_vect_widen_sum_hi_to_si_saved [check_effective_target_vect_unpack]
>           if { [istarget powerpc*-*-*]
> -	     || [istarget ia64-*-*] } {
> +             || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
> +             || [istarget ia64-*-*] } {
>               set et_vect_widen_sum_hi_to_si_saved 1
>           }
>       }
> @@ -3813,7 +3815,7 @@ proc check_effective_target_vect_widen_sum_qi_to_hi { } {
>       } else {
>           set et_vect_widen_sum_qi_to_hi_saved 0
>   	if { [check_effective_target_vect_unpack]
> -	     || [check_effective_target_arm_neon_ok]
> +	     || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
>   	     || [istarget ia64-*-*] } {
>               set et_vect_widen_sum_qi_to_hi_saved 1
>   	}


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] [ARM] Fix widen-sum pattern in neon.md.
  2015-03-05 13:35 [PATCH] [ARM] Fix widen-sum pattern in neon.md Xingxing Pan
  2015-03-05 13:55 ` Kyrill Tkachov
@ 2015-03-05 14:16 ` James Greenhalgh
  2015-04-14 19:13 ` Ramana Radhakrishnan
  2 siblings, 0 replies; 5+ messages in thread
From: James Greenhalgh @ 2015-03-05 14:16 UTC (permalink / raw)
  To: Xingxing Pan
  Cc: julian, Kyrylo Tkachov, Ramana Radhakrishnan, Richard Earnshaw,
	nickc, Xinyu Qi, Liping Gao, Joey Ye, gcc-patches

Hi Xingxing,

I'm a little confused by your reasons for adding testcases marked XFAIL.

On Thu, Mar 05, 2015 at 01:34:25PM +0000, Xingxing Pan wrote:
> +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { xfail *-*-* } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { xfail *-*-* } } } */

Why XFAIL here? Maybe I have not properly understood what you are checking
for, can this not be rewritten in to something we expect to PASS?

If you are testing that the pattern doesn't get recognized, use:

   { dg-final { scan-tree-dump-not "pattern recognized.*w\\\+" "vect" } }

Or is the reason that the pattern should be recognised in future but
currently is not? 

In any case, a comment on why these tests should be expected to fail
would be useful, even if that just means moving the comment you already
have in the testcase up beside these dg-directives.

Thanks,
James

> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> +
> +#include <stdlib.h>
> +
> +typedef signed char STYPE1;
> +typedef signed short STYPE2;
> +
> +#define N 128
> +STYPE1 sdata[N];
> +
> +volatile int y = 0;
> +
> +__attribute__ ((noinline)) int
> +ssum ()
> +{
> +  int i;
> +  STYPE2 sum = 0;
> +  STYPE2 check_sum = 0;
> +
> +  /* widenning sum: sum chars into short.
> +
> +     Like gcc.dg/vect/vect-reduc-pattern-2c.c, the widening-summation pattern
> +     is currently not detected because of this patch:
> +
> +     2005-12-26  Kazu Hirata  <kazu@codesourcery.com>
> +        PR tree-optimization/25125
> +   */
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      sdata[i] = i*2;
> +      check_sum += sdata[i];
> +      /* Avoid vectorization.  */
> +      if (y)
> +	abort ();
> +    }
> +
> +  /* widenning sum: sum chars into int.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      sum += sdata[i];
> +    }
> +
> +  /* check results:  */
> +  if (sum != check_sum)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main (void)
> +{
> +  ssum ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c
> new file mode 100644
> index 0000000..de53f5c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c
> @@ -0,0 +1,64 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
> +/* { dg-add-options arm_neon } */
> +
> +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { xfail *-*-* } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { xfail *-*-* } } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> +
> +#include <stdlib.h>
> +
> +typedef signed char STYPE1;
> +typedef signed short STYPE2;
> +
> +#define N 128
> +STYPE1 sdata[N];
> +
> +volatile int y = 0;
> +
> +__attribute__ ((noinline)) int
> +ssum ()
> +{
> +  int i;
> +  STYPE2 sum = 0;
> +  STYPE2 check_sum = 0;
> +
> +  /* widenning sum: sum chars into short.
> +
> +     Like gcc.dg/vect/vect-reduc-pattern-2c.c, the widening-summation pattern
> +     is currently not detected because of this patch:
> +
> +     2005-12-26  Kazu Hirata  <kazu@codesourcery.com>
> +        PR tree-optimization/25125
> +   */
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      sdata[i] = i*2;
> +      check_sum += sdata[i];
> +      /* Avoid vectorization.  */
> +      if (y)
> +	abort ();
> +    }
> +
> +  /* widenning sum: sum chars into int.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      sum += sdata[i];
> +    }
> +
> +  /* check results:  */
> +  if (sum != check_sum)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main (void)
> +{
> +  ssum ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c
> new file mode 100644
> index 0000000..bfa17d5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c
> @@ -0,0 +1,55 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
> +/* { dg-add-options arm_neon } */
> +
> +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> +
> +#include <stdlib.h>
> +
> +typedef unsigned char UTYPE1;
> +typedef unsigned short UTYPE2;
> +
> +#define N 128
> +UTYPE1 udata[N];
> +
> +volatile int y = 0;
> +
> +__attribute__ ((noinline)) int
> +usum ()
> +{
> +  int i;
> +  UTYPE2 sum = 0;
> +  UTYPE2 check_sum = 0;
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      udata[i] = i*2;
> +      check_sum += udata[i];
> +      /* Avoid vectorization.  */
> +      if (y)
> +	abort ();
> +    }
> +
> +  /* widenning sum: sum chars into int.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      sum += udata[i];
> +    }
> +
> +  /* check results:  */
> +  if (sum != check_sum)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main (void)
> +{
> +  usum ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c
> new file mode 100644
> index 0000000..38d8179
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c
> @@ -0,0 +1,55 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
> +/* { dg-add-options arm_neon } */
> +
> +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> +
> +#include <stdlib.h>
> +
> +typedef unsigned char UTYPE1;
> +typedef unsigned short UTYPE2;
> +
> +#define N 128
> +UTYPE1 udata[N];
> +
> +volatile int y = 0;
> +
> +__attribute__ ((noinline)) int
> +usum ()
> +{
> +  int i;
> +  UTYPE2 sum = 0;
> +  UTYPE2 check_sum = 0;
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      udata[i] = i*2;
> +      check_sum += udata[i];
> +      /* Avoid vectorization.  */
> +      if (y)
> +	abort ();
> +    }
> +
> +  /* widenning sum: sum chars into int.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      sum += udata[i];
> +    }
> +
> +  /* check results:  */
> +  if (sum != check_sum)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main (void)
> +{
> +  usum ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c
> new file mode 100644
> index 0000000..7aef5d1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c
> @@ -0,0 +1,55 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
> +/* { dg-add-options arm_neon } */
> +
> +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target arm_neon } } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> +
> +#include <stdlib.h>
> +
> +typedef signed short STYPE1;
> +typedef signed int STYPE2;
> +
> +#define N 128
> +STYPE1 sdata[N];
> +
> +volatile int y = 0;
> +
> +__attribute__ ((noinline)) int
> +ssum ()
> +{
> +  int i;
> +  STYPE2 sum = 0;
> +  STYPE2 check_sum = 0;
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      sdata[i] = i*2;
> +      check_sum += sdata[i];
> +      /* Avoid vectorization.  */
> +      if (y)
> +	abort ();
> +    }
> +
> +  /* widenning sum: sum chars into int.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      sum += sdata[i];
> +    }
> +
> +  /* check results:  */
> +  if (sum != check_sum)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main (void)
> +{
> +  ssum ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c
> new file mode 100644
> index 0000000..914ad09
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c
> @@ -0,0 +1,55 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
> +/* { dg-add-options arm_neon } */
> +
> +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> +
> +#include <stdlib.h>
> +
> +typedef signed short STYPE1;
> +typedef signed int STYPE2;
> +
> +#define N 128
> +STYPE1 sdata[N];
> +
> +volatile int y = 0;
> +
> +__attribute__ ((noinline)) int
> +ssum ()
> +{
> +  int i;
> +  STYPE2 sum = 0;
> +  STYPE2 check_sum = 0;
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      sdata[i] = i*2;
> +      check_sum += sdata[i];
> +      /* Avoid vectorization.  */
> +      if (y)
> +	abort ();
> +    }
> +
> +  /* widenning sum: sum chars into int.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      sum += sdata[i];
> +    }
> +
> +  /* check results:  */
> +  if (sum != check_sum)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main (void)
> +{
> +  ssum ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c
> new file mode 100644
> index 0000000..6f4a29b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c
> @@ -0,0 +1,55 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
> +/* { dg-add-options arm_neon } */
> +
> +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> +
> +#include <stdlib.h>
> +
> +typedef unsigned short UTYPE1;
> +typedef unsigned int UTYPE2;
> +
> +#define N 128
> +UTYPE1 udata[N];
> +
> +volatile int y = 0;
> +
> +__attribute__ ((noinline)) int
> +usum ()
> +{
> +  int i;
> +  UTYPE2 sum = 0;
> +  UTYPE2 check_sum = 0;
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      udata[i] = i*2;
> +      check_sum += udata[i];
> +      /* Avoid vectorization.  */
> +      if (y)
> +	abort ();
> +    }
> +
> +  /* widenning sum: sum chars into int.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      sum += udata[i];
> +    }
> +
> +  /* check results:  */
> +  if (sum != check_sum)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main (void)
> +{
> +  usum ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c
> new file mode 100644
> index 0000000..194b260
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c
> @@ -0,0 +1,55 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
> +/* { dg-add-options arm_neon } */
> +
> +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> +
> +#include <stdlib.h>
> +
> +typedef unsigned short UTYPE1;
> +typedef unsigned int UTYPE2;
> +
> +#define N 128
> +UTYPE1 udata[N];
> +
> +volatile int y = 0;
> +
> +__attribute__ ((noinline)) int
> +usum ()
> +{
> +  int i;
> +  UTYPE2 sum = 0;
> +  UTYPE2 check_sum = 0;
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      udata[i] = i*2;
> +      check_sum += udata[i];
> +      /* Avoid vectorization.  */
> +      if (y)
> +	abort ();
> +    }
> +
> +  /* widenning sum: sum chars into int.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      sum += udata[i];
> +    }
> +
> +  /* check results:  */
> +  if (sum != check_sum)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main (void)
> +{
> +  usum ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index 6b957de..eaccb83 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -3767,6 +3767,7 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } {
>      } else {
>          set et_vect_widen_sum_hi_to_si_pattern_saved 0
>          if { [istarget powerpc*-*-*]
> +             || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
>               || [istarget ia64-*-*] } {
>              set et_vect_widen_sum_hi_to_si_pattern_saved 1
>          }
> @@ -3790,7 +3791,8 @@ proc check_effective_target_vect_widen_sum_hi_to_si { } {
>      } else {
>          set et_vect_widen_sum_hi_to_si_saved [check_effective_target_vect_unpack]
>          if { [istarget powerpc*-*-*] 
> -	     || [istarget ia64-*-*] } {
> +             || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
> +             || [istarget ia64-*-*] } {
>              set et_vect_widen_sum_hi_to_si_saved 1
>          }
>      }
> @@ -3813,7 +3815,7 @@ proc check_effective_target_vect_widen_sum_qi_to_hi { } {
>      } else {
>          set et_vect_widen_sum_qi_to_hi_saved 0
>  	if { [check_effective_target_vect_unpack] 
> -	     || [check_effective_target_arm_neon_ok]
> +	     || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
>  	     || [istarget ia64-*-*] } {
>              set et_vect_widen_sum_qi_to_hi_saved 1
>  	}

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] [ARM] Fix widen-sum pattern in neon.md.
  2015-03-05 13:35 [PATCH] [ARM] Fix widen-sum pattern in neon.md Xingxing Pan
  2015-03-05 13:55 ` Kyrill Tkachov
  2015-03-05 14:16 ` James Greenhalgh
@ 2015-04-14 19:13 ` Ramana Radhakrishnan
  2015-04-20  6:05   ` Xingxing Pan
  2 siblings, 1 reply; 5+ messages in thread
From: Ramana Radhakrishnan @ 2015-04-14 19:13 UTC (permalink / raw)
  To: Xingxing Pan
  Cc: Julian Brown, James Greenhalgh, Kyrill Tkachov,
	Ramana Radhakrishnan, Richard Earnshaw, nickc, Xinyu Qi,
	Liping Gao, Joey Ye, gcc-patches

On Thu, Mar 5, 2015 at 1:34 PM, Xingxing Pan <xxingpan@marvell.com> wrote:
> Hi,
>
> The expanding of widen-sum pattern always fails. The vectorizer expects the
> operands to have the same size, while the current implementation of
> widen-sum pattern dose not conform to this.
>
> This patch implements the widen-sum pattern with vpadal. Change the vaddw
> pattern to anonymous. Add widen-sum test cases for neon.
>

Can you please respin addressing James and Kyrill's comments ?


Ramana

> --
> Regards,
> Xingxing

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] [ARM] Fix widen-sum pattern in neon.md.
  2015-04-14 19:13 ` Ramana Radhakrishnan
@ 2015-04-20  6:05   ` Xingxing Pan
  0 siblings, 0 replies; 5+ messages in thread
From: Xingxing Pan @ 2015-04-20  6:05 UTC (permalink / raw)
  To: ramrad01
  Cc: Julian Brown, James Greenhalgh, Kyrill Tkachov,
	Ramana Radhakrishnan, Richard Earnshaw, nickc, Xinyu Qi,
	Liping Gao, Joey Ye, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2070 bytes --]

On 04/15/2015 03:13 AM, Ramana Radhakrishnan wrote:
> On Thu, Mar 5, 2015 at 1:34 PM, Xingxing Pan <xxingpan@marvell.com> wrote:
>> Hi,
>>
>> The expanding of widen-sum pattern always fails. The vectorizer expects the
>> operands to have the same size, while the current implementation of
>> widen-sum pattern dose not conform to this.
>>
>> This patch implements the widen-sum pattern with vpadal. Change the vaddw
>> pattern to anonymous. Add widen-sum test cases for neon.
>>
>
> Can you please respin addressing James and Kyrill's comments ?
>
>
> Ramana
>
>> --
>> Regards,
>> Xingxing

Hi,

Sorry for late response.

The pattern is rewritten to utilize neon_vpadal<sup><mode>'s "0" 
constraints. Have run vect.exp and neon.exp in an armv7 board.

vect.exp has two new XFAILs:
XFAIL: gcc.dg/vect/slp-reduc-3.c scan-tree-dump-times vect "vectorizing 
stmts using SLP" 1
XFAIL: gcc.dg/vect/slp-reduc-3.c -flto -ffat-lto-objects 
scan-tree-dump-times vect "vectorizing stmts using SLP" 1

This is because widen-sum optimization precedes SLP. The xfail predicate 
vect_widen_sum_hi_to_si becomes true when widen-sum is enabled.

neon.exp has four new XFAILs:
XFAIL: gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c 
scan-tree-dump-times vect "pattern recognized.*w\\+" 1
XFAIL: gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c 
scan-rtl-dump-times expand "UNSPEC_VPADAL" 1
XFAIL: gcc.target/arm/neon/vect-widen-sum-char2short-s.c 
scan-tree-dump-times vect "pattern recognized.*w\\+" 1
XFAIL: gcc.target/arm/neon/vect-widen-sum-char2short-s.c 
scan-rtl-dump-times expand "UNSPEC_VPADAL" 1

If the widen-sum pattern is successfully expanded, "w+" and 
"UNSPEC_VPADAL" should appear in the dump file like other 
vect-widen-sum-*.c tests. But vect-widen-sum-char2short-s[-d].c is 
special because at tree level the signed operations will be converted 
into unsigned operations, which destroy the widen-sum pattern. That is 
due to the workaround of PR tree-optimization/25125. I just add xfail 
following gcc.dg/vect/vect-reduc-pattern-2c.c.


-- 
Regards,
Xingxing

[-- Attachment #2: fix-widen-sum.patch --]
[-- Type: text/x-patch, Size: 17736 bytes --]

commit c44b5bd19efb029b8bbd4e3c7e2d631bdc482b7c
Author: Xingxing Pan <xxingpan@marvell.com>
Date:   Sun Apr 19 15:54:43 2015 +0800

    Fix widen-sum pattern in neon.md.
    
    gcc/
    
    2015-04-19  Xingxing Pan  <xxingpan@marvell.com>
    
        * config/arm/iterators.md (VWSD): New.
          (V_widen_sum_d): New.
        * config/arm/neon.md (widen_ssum<mode>3): Redefined.
        (widen_usum<mode>3): Ditto.
        (neon_svaddw<mode>3): New anonymous define_insn.
        (neon_uvaddw<mode>3): Ditto.
    
    gcc/testsuite/
    
    2015-04-19  Xingxing Pan  <xxingpan@marvell.com>
    
        * gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c: New.
        * gcc.target/arm/neon/vect-widen-sum-char2short-s.c: New.
        * gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c: New.
        * gcc.target/arm/neon/vect-widen-sum-char2short-u.c: New.
        * gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c: New.
        * gcc.target/arm/neon/vect-widen-sum-short2int-s.c: New.
        * gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c: New.
        * gcc.target/arm/neon/vect-widen-sum-short2int-u.c: New.
        * lib/target-supports.exp
        (check_effective_target_vect_widen_sum_hi_to_si_pattern): Return 1 for ARM NEON.
        (check_effective_target_vect_widen_sum_hi_to_si): Ditto.
        (check_effective_target_vect_widen_sum_qi_to_hi): Ditto.

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index f7f8ab7..f73278d 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -95,6 +95,9 @@
 ;; Widenable modes.
 (define_mode_iterator VW [V8QI V4HI V2SI])
 
+;; Widenable modes.  Used by widen sum.
+(define_mode_iterator VWSD [V8QI V4HI V16QI V8HI])
+
 ;; Narrowable modes.
 (define_mode_iterator VN [V8HI V4SI V2DI])
 
@@ -555,9 +558,14 @@
 ;; Same as V_widen, but lower-case.
 (define_mode_attr V_widen_l [(V8QI "v8hi") (V4HI "v4si") ( V2SI "v2di")])
 
-;; Widen. Result is half the number of elements, but widened to double-width.
+;; Widen.  Result is half the number of elements, but widened to double-width.
 (define_mode_attr V_unpack   [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
 
+;; Widen.  Result is half the number of elements, but widened to double-width.
+;; Used by widen sum.
+(define_mode_attr V_widen_sum_d [(V8QI "V4HI") (V4HI "V2SI")
+                                 (V16QI "V8HI") (V8HI "V4SI")])
+
 ;; Conditions to be used in extend<mode>di patterns.
 (define_mode_attr qhs_zextenddi_cond [(SI "") (HI "&& arm_arch6") (QI "")])
 (define_mode_attr qhs_sextenddi_cond [(SI "") (HI "&& arm_arch6")
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 63c327e..839883f 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -1174,7 +1174,29 @@
 
 ;; Widening operations
 
-(define_insn "widen_ssum<mode>3"
+(define_expand "widen_usum<mode>3"
+ [(match_operand:<V_widen_sum_d> 0 "s_register_operand" "")
+  (match_operand:VWSD 1 "s_register_operand" "")
+  (match_operand:<V_widen_sum_d> 2 "s_register_operand" "")]
+  "TARGET_NEON"
+  {
+    emit_insn (gen_neon_vpadalu<mode> (operands[0], operands[2], operands[1]));
+    DONE;
+  }
+)
+
+(define_expand "widen_ssum<mode>3"
+ [(match_operand:<V_widen_sum_d> 0 "s_register_operand" "")
+  (match_operand:VWSD 1 "s_register_operand" "")
+  (match_operand:<V_widen_sum_d> 2 "s_register_operand" "")]
+  "TARGET_NEON"
+  {
+    emit_insn (gen_neon_vpadals<mode> (operands[0], operands[2], operands[1]));
+    DONE;
+  }
+)
+
+(define_insn "*neon_svaddw<mode>3"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
 	(plus:<V_widen> (sign_extend:<V_widen>
 			  (match_operand:VW 1 "s_register_operand" "%w"))
@@ -1184,7 +1206,7 @@
   [(set_attr "type" "neon_add_widen")]
 )
 
-(define_insn "widen_usum<mode>3"
+(define_insn "*neon_uvaddw<mode>3"
   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
 	(plus:<V_widen> (zero_extend:<V_widen>
 			  (match_operand:VW 1 "s_register_operand" "%w"))
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c
new file mode 100644
index 0000000..8d0278c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c
@@ -0,0 +1,63 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { xfail *-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+typedef signed char STYPE1;
+typedef signed short STYPE2;
+extern void abort (void);
+
+#define N 128
+STYPE1 sdata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+ssum ()
+{
+  int i;
+  STYPE2 sum = 0;
+  STYPE2 check_sum = 0;
+
+  /* widenning sum: sum chars into short.
+
+     Like gcc.dg/vect/vect-reduc-pattern-2c.c, the widening-summation pattern
+     is currently not detected because of this patch:
+
+     2005-12-26  Kazu Hirata  <kazu@codesourcery.com>
+        PR tree-optimization/25125
+   */
+
+  for (i = 0; i < N; i++)
+    {
+      sdata[i] = i*2;
+      check_sum += sdata[i];
+      /* Avoid vectorization.  */
+      if (y)
+        abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += sdata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  ssum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c
new file mode 100644
index 0000000..f7384c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c
@@ -0,0 +1,63 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { xfail *-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+typedef signed char STYPE1;
+typedef signed short STYPE2;
+extern void abort (void);
+
+#define N 128
+STYPE1 sdata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+ssum ()
+{
+  int i;
+  STYPE2 sum = 0;
+  STYPE2 check_sum = 0;
+
+  /* widenning sum: sum chars into short.
+
+     Like gcc.dg/vect/vect-reduc-pattern-2c.c, the widening-summation pattern
+     is currently not detected because of this patch:
+
+     2005-12-26  Kazu Hirata  <kazu@codesourcery.com>
+        PR tree-optimization/25125
+   */
+
+  for (i = 0; i < N; i++)
+    {
+      sdata[i] = i*2;
+      check_sum += sdata[i];
+      /* Avoid vectorization.  */
+      if (y)
+        abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += sdata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  ssum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c
new file mode 100644
index 0000000..35f8fa7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+typedef unsigned char UTYPE1;
+typedef unsigned short UTYPE2;
+extern void abort (void);
+
+#define N 128
+UTYPE1 udata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+usum ()
+{
+  int i;
+  UTYPE2 sum = 0;
+  UTYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      udata[i] = i*2;
+      check_sum += udata[i];
+      /* Avoid vectorization.  */
+      if (y)
+        abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += udata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  usum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c
new file mode 100644
index 0000000..38af5f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+typedef unsigned char UTYPE1;
+typedef unsigned short UTYPE2;
+extern void abort (void);
+
+#define N 128
+UTYPE1 udata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+usum ()
+{
+  int i;
+  UTYPE2 sum = 0;
+  UTYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      udata[i] = i*2;
+      check_sum += udata[i];
+      /* Avoid vectorization.  */
+      if (y)
+        abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += udata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  usum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c
new file mode 100644
index 0000000..ef765de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target arm_neon } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+typedef signed short STYPE1;
+typedef signed int STYPE2;
+extern void abort (void);
+
+#define N 128
+STYPE1 sdata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+ssum ()
+{
+  int i;
+  STYPE2 sum = 0;
+  STYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      sdata[i] = i*2;
+      check_sum += sdata[i];
+      /* Avoid vectorization.  */
+      if (y)
+        abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += sdata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  ssum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c
new file mode 100644
index 0000000..fb38d56
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+typedef signed short STYPE1;
+typedef signed int STYPE2;
+extern void abort (void);
+
+#define N 128
+STYPE1 sdata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+ssum ()
+{
+  int i;
+  STYPE2 sum = 0;
+  STYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      sdata[i] = i*2;
+      check_sum += sdata[i];
+      /* Avoid vectorization.  */
+      if (y)
+        abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += sdata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  ssum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c
new file mode 100644
index 0000000..5a3dfd6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+typedef unsigned short UTYPE1;
+typedef unsigned int UTYPE2;
+extern void abort (void);
+
+#define N 128
+UTYPE1 udata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+usum ()
+{
+  int i;
+  UTYPE2 sum = 0;
+  UTYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      udata[i] = i*2;
+      check_sum += udata[i];
+      /* Avoid vectorization.  */
+      if (y)
+        abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += udata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  usum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c
new file mode 100644
index 0000000..770b08d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+typedef unsigned short UTYPE1;
+typedef unsigned int UTYPE2;
+extern void abort (void);
+
+#define N 128
+UTYPE1 udata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+usum ()
+{
+  int i;
+  UTYPE2 sum = 0;
+  UTYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      udata[i] = i*2;
+      check_sum += udata[i];
+      /* Avoid vectorization.  */
+      if (y)
+        abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += udata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  usum ();
+  return 0;
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index f632d00..477ab53 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3795,6 +3795,7 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } {
     } else {
         set et_vect_widen_sum_hi_to_si_pattern_saved 0
         if { [istarget powerpc*-*-*]
+             || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
              || [istarget ia64-*-*] } {
             set et_vect_widen_sum_hi_to_si_pattern_saved 1
         }
@@ -3818,7 +3819,8 @@ proc check_effective_target_vect_widen_sum_hi_to_si { } {
     } else {
         set et_vect_widen_sum_hi_to_si_saved [check_effective_target_vect_unpack]
         if { [istarget powerpc*-*-*] 
-	     || [istarget ia64-*-*] } {
+             || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
+             || [istarget ia64-*-*] } {
             set et_vect_widen_sum_hi_to_si_saved 1
         }
     }
@@ -3841,7 +3843,7 @@ proc check_effective_target_vect_widen_sum_qi_to_hi { } {
     } else {
         set et_vect_widen_sum_qi_to_hi_saved 0
 	if { [check_effective_target_vect_unpack] 
-	     || [check_effective_target_arm_neon_ok]
+	     || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
 	     || [istarget ia64-*-*] } {
             set et_vect_widen_sum_qi_to_hi_saved 1
 	}

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-04-20  6:05 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-05 13:35 [PATCH] [ARM] Fix widen-sum pattern in neon.md Xingxing Pan
2015-03-05 13:55 ` Kyrill Tkachov
2015-03-05 14:16 ` James Greenhalgh
2015-04-14 19:13 ` Ramana Radhakrishnan
2015-04-20  6:05   ` Xingxing Pan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).