commit 62637f371a3329ff56644526bc5dbf9356cbdd6c Author: Xingxing Pan Date: Wed Feb 25 16:44:25 2015 +0800 Fix widen-sum pattern in neon.md. 2015-03-05 Xingxing Pan config/arm/ * iterators.md: (VWSD): New define_mode_iterator. (V_widen_sum_d): New define_mode_attr. * neon.md (widen_ssum3): Redefined. (widen_usum3): Ditto. (neon_svaddw3): New anonymous define_insn. (neon_uvaddw3): Ditto. testsuite/gcc.target/arm/neon/ * vect-widen-sum-char2short-s-d.c: New file. * vect-widen-sum-char2short-s.c: Ditto. * vect-widen-sum-char2short-u-d.c: Ditto. * vect-widen-sum-char2short-u.c: Ditto. * vect-widen-sum-short2int-s-d.c: Ditto. * vect-widen-sum-short2int-s.c: Ditto. * vect-widen-sum-short2int-u-d.c: Ditto. * vect-widen-sum-short2int-u.c: Ditto. testsuite/lib/ * target-supports.exp: (check_effective_target_vect_widen_sum_hi_to_si_pattern): Return 1 for ARM NEON. (check_effective_target_vect_widen_sum_hi_to_si): Ditto. (check_effective_target_vect_widen_sum_qi_to_hi): Ditto. diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index f7f8ab7..4ba5901 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -95,6 +95,9 @@ ;; Widenable modes. (define_mode_iterator VW [V8QI V4HI V2SI]) +;; Widenable modes. Used by widen sum. +(define_mode_iterator VWSD [V8QI V4HI V16QI V8HI]) + ;; Narrowable modes. (define_mode_iterator VN [V8HI V4SI V2DI]) @@ -558,6 +561,11 @@ ;; Widen. Result is half the number of elements, but widened to double-width. (define_mode_attr V_unpack [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")]) +;; Widen. Result is half the number of elements, but widened to double-width. +;; Used by widen sum. +(define_mode_attr V_widen_sum_d [(V8QI "V4HI") (V4HI "V2SI") + (V16QI "V8HI") (V8HI "V4SI")]) + ;; Conditions to be used in extenddi patterns. (define_mode_attr qhs_zextenddi_cond [(SI "") (HI "&& arm_arch6") (QI "")]) (define_mode_attr qhs_sextenddi_cond [(SI "") (HI "&& arm_arch6") diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 63c327e..6cac36d 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -1174,7 +1174,31 @@ ;; Widening operations -(define_insn "widen_ssum3" +(define_expand "widen_usum3" + [(match_operand: 0 "s_register_operand" "") + (match_operand:VWSD 1 "s_register_operand" "") + (match_operand: 2 "s_register_operand" "")] + "TARGET_NEON" + { + emit_move_insn(operands[0], operands[2]); + emit_insn (gen_neon_vpadalu (operands[0], operands[0], operands[1])); + DONE; + } +) + +(define_expand "widen_ssum3" + [(match_operand: 0 "s_register_operand" "") + (match_operand:VWSD 1 "s_register_operand" "") + (match_operand: 2 "s_register_operand" "")] + "TARGET_NEON" + { + emit_move_insn(operands[0], operands[2]); + emit_insn (gen_neon_vpadals (operands[0], operands[0], operands[1])); + DONE; + } +) + +(define_insn "*neon_svaddw3" [(set (match_operand: 0 "s_register_operand" "=w") (plus: (sign_extend: (match_operand:VW 1 "s_register_operand" "%w")) @@ -1184,7 +1208,7 @@ [(set_attr "type" "neon_add_widen")] ) -(define_insn "widen_usum3" +(define_insn "*neon_uvaddw3" [(set (match_operand: 0 "s_register_operand" "=w") (plus: (zero_extend: (match_operand:VW 1 "s_register_operand" "%w")) diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c new file mode 100644 index 0000000..c81c325 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c @@ -0,0 +1,64 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */ +/* { dg-add-options arm_neon } */ + +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { xfail *-*-* } } } */ +/* { dg-final { cleanup-rtl-dump "expand" } } */ + +#include + +typedef signed char STYPE1; +typedef signed short STYPE2; + +#define N 128 +STYPE1 sdata[N]; + +volatile int y = 0; + +__attribute__ ((noinline)) int +ssum () +{ + int i; + STYPE2 sum = 0; + STYPE2 check_sum = 0; + + /* widenning sum: sum chars into short. + + Like gcc.dg/vect/vect-reduc-pattern-2c.c, the widening-summation pattern + is currently not detected because of this patch: + + 2005-12-26 Kazu Hirata + PR tree-optimization/25125 + */ + + for (i = 0; i < N; i++) + { + sdata[i] = i*2; + check_sum += sdata[i]; + /* Avoid vectorization. */ + if (y) + abort (); + } + + /* widenning sum: sum chars into int. */ + for (i = 0; i < N; i++) + { + sum += sdata[i]; + } + + /* check results: */ + if (sum != check_sum) + abort (); + + return 0; +} + +int +main (void) +{ + ssum (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c new file mode 100644 index 0000000..de53f5c --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c @@ -0,0 +1,64 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */ +/* { dg-add-options arm_neon } */ + +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { xfail *-*-* } } } */ +/* { dg-final { cleanup-rtl-dump "expand" } } */ + +#include + +typedef signed char STYPE1; +typedef signed short STYPE2; + +#define N 128 +STYPE1 sdata[N]; + +volatile int y = 0; + +__attribute__ ((noinline)) int +ssum () +{ + int i; + STYPE2 sum = 0; + STYPE2 check_sum = 0; + + /* widenning sum: sum chars into short. + + Like gcc.dg/vect/vect-reduc-pattern-2c.c, the widening-summation pattern + is currently not detected because of this patch: + + 2005-12-26 Kazu Hirata + PR tree-optimization/25125 + */ + + for (i = 0; i < N; i++) + { + sdata[i] = i*2; + check_sum += sdata[i]; + /* Avoid vectorization. */ + if (y) + abort (); + } + + /* widenning sum: sum chars into int. */ + for (i = 0; i < N; i++) + { + sum += sdata[i]; + } + + /* check results: */ + if (sum != check_sum) + abort (); + + return 0; +} + +int +main (void) +{ + ssum (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c new file mode 100644 index 0000000..bfa17d5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */ +/* { dg-add-options arm_neon } */ + +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */ +/* { dg-final { cleanup-rtl-dump "expand" } } */ + +#include + +typedef unsigned char UTYPE1; +typedef unsigned short UTYPE2; + +#define N 128 +UTYPE1 udata[N]; + +volatile int y = 0; + +__attribute__ ((noinline)) int +usum () +{ + int i; + UTYPE2 sum = 0; + UTYPE2 check_sum = 0; + + for (i = 0; i < N; i++) + { + udata[i] = i*2; + check_sum += udata[i]; + /* Avoid vectorization. */ + if (y) + abort (); + } + + /* widenning sum: sum chars into int. */ + for (i = 0; i < N; i++) + { + sum += udata[i]; + } + + /* check results: */ + if (sum != check_sum) + abort (); + + return 0; +} + +int +main (void) +{ + usum (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c new file mode 100644 index 0000000..38d8179 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */ +/* { dg-add-options arm_neon } */ + +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */ +/* { dg-final { cleanup-rtl-dump "expand" } } */ + +#include + +typedef unsigned char UTYPE1; +typedef unsigned short UTYPE2; + +#define N 128 +UTYPE1 udata[N]; + +volatile int y = 0; + +__attribute__ ((noinline)) int +usum () +{ + int i; + UTYPE2 sum = 0; + UTYPE2 check_sum = 0; + + for (i = 0; i < N; i++) + { + udata[i] = i*2; + check_sum += udata[i]; + /* Avoid vectorization. */ + if (y) + abort (); + } + + /* widenning sum: sum chars into int. */ + for (i = 0; i < N; i++) + { + sum += udata[i]; + } + + /* check results: */ + if (sum != check_sum) + abort (); + + return 0; +} + +int +main (void) +{ + usum (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c new file mode 100644 index 0000000..7aef5d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */ +/* { dg-add-options arm_neon } */ + +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target arm_neon } } } */ +/* { dg-final { cleanup-rtl-dump "expand" } } */ + +#include + +typedef signed short STYPE1; +typedef signed int STYPE2; + +#define N 128 +STYPE1 sdata[N]; + +volatile int y = 0; + +__attribute__ ((noinline)) int +ssum () +{ + int i; + STYPE2 sum = 0; + STYPE2 check_sum = 0; + + for (i = 0; i < N; i++) + { + sdata[i] = i*2; + check_sum += sdata[i]; + /* Avoid vectorization. */ + if (y) + abort (); + } + + /* widenning sum: sum chars into int. */ + for (i = 0; i < N; i++) + { + sum += sdata[i]; + } + + /* check results: */ + if (sum != check_sum) + abort (); + + return 0; +} + +int +main (void) +{ + ssum (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c new file mode 100644 index 0000000..914ad09 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */ +/* { dg-add-options arm_neon } */ + +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */ +/* { dg-final { cleanup-rtl-dump "expand" } } */ + +#include + +typedef signed short STYPE1; +typedef signed int STYPE2; + +#define N 128 +STYPE1 sdata[N]; + +volatile int y = 0; + +__attribute__ ((noinline)) int +ssum () +{ + int i; + STYPE2 sum = 0; + STYPE2 check_sum = 0; + + for (i = 0; i < N; i++) + { + sdata[i] = i*2; + check_sum += sdata[i]; + /* Avoid vectorization. */ + if (y) + abort (); + } + + /* widenning sum: sum chars into int. */ + for (i = 0; i < N; i++) + { + sum += sdata[i]; + } + + /* check results: */ + if (sum != check_sum) + abort (); + + return 0; +} + +int +main (void) +{ + ssum (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c new file mode 100644 index 0000000..6f4a29b --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */ +/* { dg-add-options arm_neon } */ + +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */ +/* { dg-final { cleanup-rtl-dump "expand" } } */ + +#include + +typedef unsigned short UTYPE1; +typedef unsigned int UTYPE2; + +#define N 128 +UTYPE1 udata[N]; + +volatile int y = 0; + +__attribute__ ((noinline)) int +usum () +{ + int i; + UTYPE2 sum = 0; + UTYPE2 check_sum = 0; + + for (i = 0; i < N; i++) + { + udata[i] = i*2; + check_sum += udata[i]; + /* Avoid vectorization. */ + if (y) + abort (); + } + + /* widenning sum: sum chars into int. */ + for (i = 0; i < N; i++) + { + sum += udata[i]; + } + + /* check results: */ + if (sum != check_sum) + abort (); + + return 0; +} + +int +main (void) +{ + usum (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c new file mode 100644 index 0000000..194b260 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */ +/* { dg-add-options arm_neon } */ + +/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ +/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */ +/* { dg-final { cleanup-rtl-dump "expand" } } */ + +#include + +typedef unsigned short UTYPE1; +typedef unsigned int UTYPE2; + +#define N 128 +UTYPE1 udata[N]; + +volatile int y = 0; + +__attribute__ ((noinline)) int +usum () +{ + int i; + UTYPE2 sum = 0; + UTYPE2 check_sum = 0; + + for (i = 0; i < N; i++) + { + udata[i] = i*2; + check_sum += udata[i]; + /* Avoid vectorization. */ + if (y) + abort (); + } + + /* widenning sum: sum chars into int. */ + for (i = 0; i < N; i++) + { + sum += udata[i]; + } + + /* check results: */ + if (sum != check_sum) + abort (); + + return 0; +} + +int +main (void) +{ + usum (); + return 0; +} diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 6b957de..eaccb83 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -3767,6 +3767,7 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } { } else { set et_vect_widen_sum_hi_to_si_pattern_saved 0 if { [istarget powerpc*-*-*] + || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) || [istarget ia64-*-*] } { set et_vect_widen_sum_hi_to_si_pattern_saved 1 } @@ -3790,7 +3791,8 @@ proc check_effective_target_vect_widen_sum_hi_to_si { } { } else { set et_vect_widen_sum_hi_to_si_saved [check_effective_target_vect_unpack] if { [istarget powerpc*-*-*] - || [istarget ia64-*-*] } { + || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) + || [istarget ia64-*-*] } { set et_vect_widen_sum_hi_to_si_saved 1 } } @@ -3813,7 +3815,7 @@ proc check_effective_target_vect_widen_sum_qi_to_hi { } { } else { set et_vect_widen_sum_qi_to_hi_saved 0 if { [check_effective_target_vect_unpack] - || [check_effective_target_arm_neon_ok] + || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) || [istarget ia64-*-*] } { set et_vect_widen_sum_qi_to_hi_saved 1 }