* [2/2] PR88598: Optimise reduc (bit_and)
@ 2019-01-04 11:49 Richard Sandiford
2019-01-07 11:33 ` Richard Biener
0 siblings, 1 reply; 2+ messages in thread
From: Richard Sandiford @ 2019-01-04 11:49 UTC (permalink / raw)
To: gcc-patches
This patch folds certain reductions of X & CST to X[I] & CST[I] if I is
the only nonzero element of CST. This includes the motivating case in
which CST[I] is -1.
We could do the same for REDUC_MAX on unsigned types, but I wasn't sure
that that special case was worth it.
Tested on aarch64-linux-gnu, aarch64_be-elf and x86_64-linux-gnu.
OK to install?
Obvious follow-ons include handling multiplications of
single_nonzero_element constants, multiplications of uniform constants,
and additions of any constant, but this is enough to fix the PR.
Richard
2019-01-04 Richard Sandiford <richard.sandiford@arm.com>
gcc/
PR tree-optimization/88598
* tree.h (single_nonzero_element): Declare.
* tree.c (single_nonzero_element): New function.
* match.pd: Fold certain reductions of X & CST to X[I] & CST[I]
if I is the only nonzero element of CST.
gcc/testsuite/
PR tree-optimization/88598
* gcc.dg/vect/pr88598-1.c: New test.
* gcc.dg/vect/pr88598-2.c: Likewise.
* gcc.dg/vect/pr88598-3.c: Likewise.
* gcc.dg/vect/pr88598-4.c: Likewise.
* gcc.dg/vect/pr88598-5.c: Likewise.
* gcc.dg/vect/pr88598-6.c: Likewise.
Index: gcc/tree.h
===================================================================
--- gcc/tree.h 2019-01-04 11:40:33.141683783 +0000
+++ gcc/tree.h 2019-01-04 11:40:36.581654424 +0000
@@ -4522,6 +4522,8 @@ extern tree uniform_vector_p (const_tree
extern tree uniform_integer_cst_p (tree);
+extern int single_nonzero_element (const_tree);
+
/* Given a CONSTRUCTOR CTOR, return the element values as a vector. */
extern vec<tree, va_gc> *ctor_to_vec (tree);
Index: gcc/tree.c
===================================================================
--- gcc/tree.c 2019-01-04 11:40:33.141683783 +0000
+++ gcc/tree.c 2019-01-04 11:40:36.577654458 +0000
@@ -11355,6 +11355,38 @@ uniform_integer_cst_p (tree t)
return NULL_TREE;
}
+/* If VECTOR_CST T has a single nonzero element, return the index of that
+ element, otherwise return -1. */
+
+int
+single_nonzero_element (const_tree t)
+{
+ unsigned HOST_WIDE_INT nelts;
+ unsigned int repeat_nelts;
+ if (VECTOR_CST_NELTS (t).is_constant (&nelts))
+ repeat_nelts = nelts;
+ else if (VECTOR_CST_NELTS_PER_PATTERN (t) == 2)
+ {
+ nelts = vector_cst_encoded_nelts (t);
+ repeat_nelts = VECTOR_CST_NPATTERNS (t);
+ }
+ else
+ return -1;
+
+ int res = -1;
+ for (unsigned int i = 0; i < nelts; ++i)
+ {
+ tree elt = vector_cst_elt (t, i);
+ if (!integer_zerop (elt) && !real_zerop (elt))
+ {
+ if (res >= 0 || i >= repeat_nelts)
+ return -1;
+ res = i;
+ }
+ }
+ return res;
+}
+
/* Build an empty statement at location LOC. */
tree
Index: gcc/match.pd
===================================================================
--- gcc/match.pd 2019-01-04 11:40:33.137683817 +0000
+++ gcc/match.pd 2019-01-04 11:40:36.573654492 +0000
@@ -5251,3 +5251,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
{ wide_int_to_tree (sizetype, off); })
{ swap_p ? @0 : @2; }))
{ rhs_tree; })))))))))
+
+/* Fold REDUC (@0 & @1) -> @0[I] & @1[I] if element I is the only nonzero
+ element of @1. */
+(for reduc (IFN_REDUC_PLUS IFN_REDUC_IOR IFN_REDUC_XOR)
+ (simplify (reduc (view_convert? (bit_and @0 VECTOR_CST@1)))
+ (with { int i = single_nonzero_element (@1); }
+ (if (i >= 0)
+ (with { tree elt = vector_cst_elt (@1, i);
+ tree elt_type = TREE_TYPE (elt);
+ unsigned int elt_bits = tree_to_uhwi (TYPE_SIZE (elt_type));
+ tree size = bitsize_int (elt_bits);
+ tree pos = bitsize_int (elt_bits * i); }
+ (view_convert
+ (bit_and:elt_type
+ (BIT_FIELD_REF:elt_type @0 { size; } { pos; })
+ { elt; })))))))
Index: gcc/testsuite/gcc.dg/vect/pr88598-1.c
===================================================================
--- /dev/null 2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/vect/pr88598-1.c 2019-01-04 11:40:36.577654458 +0000
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include "tree-vect.h"
+
+#define N 4
+
+int a[N];
+
+int __attribute__ ((noipa))
+f1 (void)
+{
+ int b[N] = { 15, 0, 0, 0 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] & b[i];
+ return res;
+}
+
+int __attribute__ ((noipa))
+f2 (void)
+{
+ int b[N] = { 0, 31, 0, 0 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] & b[i];
+ return res;
+}
+
+int __attribute__ ((noipa))
+f3 (void)
+{
+ int b[N] = { 0, 0, 0, -1 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] & b[i];
+ return res;
+}
+
+int
+main ()
+{
+ check_vect ();
+
+ for (int i = 0; i < N; ++i)
+ a[i] = 0xe0 + i;
+
+ if (f1 () != (a[0] & 15)
+ || f2 () != (a[1] & 31)
+ || f3 () != a[N - 1])
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* ??? We need more constant folding for this to work with fully-masked
+ loops. */
+/* { dg-final { scan-tree-dump-not {REDUC_PLUS} "optimized" { xfail aarch64_sve } } } */
Index: gcc/testsuite/gcc.dg/vect/pr88598-2.c
===================================================================
--- /dev/null 2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/vect/pr88598-2.c 2019-01-04 11:40:36.577654458 +0000
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include "tree-vect.h"
+
+#define N 4
+
+int a[N];
+
+int __attribute__ ((noipa))
+f1 (void)
+{
+ int b[N] = { 1, 0, 0, 0 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] * b[i];
+ return res;
+}
+
+int __attribute__ ((noipa))
+f2 (void)
+{
+ int b[N] = { 0, 1, 0, 0 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] * b[i];
+ return res;
+}
+
+int __attribute__ ((noipa))
+f3 (void)
+{
+ int b[N] = { 0, 0, 0, 1 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] * b[i];
+ return res;
+}
+
+int
+main ()
+{
+ check_vect ();
+
+ for (int i = 0; i < N; ++i)
+ a[i] = 0xe0 + i;
+
+ if (f1 () != a[0]
+ || f2 () != a[1]
+ || f3 () != a[N - 1])
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* ??? We need more constant folding for this to work with fully-masked
+ loops. */
+/* { dg-final { scan-tree-dump-not {REDUC_PLUS} "optimized" { xfail aarch64_sve } } } */
Index: gcc/testsuite/gcc.dg/vect/pr88598-3.c
===================================================================
--- /dev/null 2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/vect/pr88598-3.c 2019-01-04 11:40:36.577654458 +0000
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ffast-math -fdump-tree-optimized" } */
+
+#include "tree-vect.h"
+
+#define N 4
+
+float a[N];
+
+float __attribute__ ((noipa))
+f1 (void)
+{
+ float b[N] = { 1, 0, 0, 0 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] * b[i];
+ return res;
+}
+
+float __attribute__ ((noipa))
+f2 (void)
+{
+ float b[N] = { 0, 1, 0, 0 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] * b[i];
+ return res;
+}
+
+float __attribute__ ((noipa))
+f3 (void)
+{
+ float b[N] = { 0, 0, 0, 1 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] * b[i];
+ return res;
+}
+
+int
+main ()
+{
+ check_vect ();
+
+ for (int i = 0; i < N; ++i)
+ a[i] = 0xe0 + i;
+
+ if (f1 () != a[0]
+ || f2 () != a[1]
+ || f3 () != a[N - 1])
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* ??? We need more constant folding for this to work with fully-masked
+ loops. */
+/* { dg-final { scan-tree-dump-not {REDUC_PLUS} "optimized" { xfail aarch64_sve } } } */
Index: gcc/testsuite/gcc.dg/vect/pr88598-4.c
===================================================================
--- /dev/null 2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/vect/pr88598-4.c 2019-01-04 11:40:36.577654458 +0000
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include "tree-vect.h"
+
+#define N 4
+
+int a[N];
+
+int __attribute__ ((noipa))
+f1 (void)
+{
+ int b[N] = { 0, 15, 15, 15 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] & b[i];
+ return res;
+}
+
+int __attribute__ ((noipa))
+f2 (void)
+{
+ int b[N] = { 0, 31, 0, 31 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] & b[i];
+ return res;
+}
+
+int __attribute__ ((noipa))
+f3 (void)
+{
+ int b[N] = { -1, -1, 0, 0 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] & b[i];
+ return res;
+}
+
+int
+main ()
+{
+ check_vect ();
+
+ for (int i = 0; i < N; ++i)
+ a[i] = 0xe0 + i;
+
+ if (f1 () != (a[1] & 15) + (a[2] & 15) + (a[3] & 15)
+ || f2 () != (a[1] & 31) + (a[3] & 31)
+ || f3 () != a[0] + a[1])
+ __builtin_abort ();
+
+ return 0;
+}
Index: gcc/testsuite/gcc.dg/vect/pr88598-5.c
===================================================================
--- /dev/null 2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/vect/pr88598-5.c 2019-01-04 11:40:36.577654458 +0000
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include "tree-vect.h"
+
+#define N 4
+
+int a[N];
+
+int __attribute__ ((noipa))
+f1 (void)
+{
+ int b[N] = { 0, 1, 1, 1 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] * b[i];
+ return res;
+}
+
+int __attribute__ ((noipa))
+f2 (void)
+{
+ int b[N] = { 0, 1, 0, 1 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] * b[i];
+ return res;
+}
+
+int __attribute__ ((noipa))
+f3 (void)
+{
+ int b[N] = { 1, 1, 0, 0 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] * b[i];
+ return res;
+}
+
+int
+main ()
+{
+ check_vect ();
+
+ for (int i = 0; i < N; ++i)
+ a[i] = 0xe0 + i;
+
+ if (f1 () != a[1] + a[2] + a[3]
+ || f2 () != a[1] + a[3]
+ || f3 () != a[0] + a[1])
+ __builtin_abort ();
+
+ return 0;
+}
Index: gcc/testsuite/gcc.dg/vect/pr88598-6.c
===================================================================
--- /dev/null 2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/vect/pr88598-6.c 2019-01-04 11:40:36.577654458 +0000
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ffast-math -fdump-tree-optimized" } */
+
+#include "tree-vect.h"
+
+#define N 4
+
+float a[N];
+
+float __attribute__ ((noipa))
+f1 (void)
+{
+ float b[N] = { 0, 1, 1, 1 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] * b[i];
+ return res;
+}
+
+float __attribute__ ((noipa))
+f2 (void)
+{
+ float b[N] = { 0, 1, 0, 1 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] * b[i];
+ return res;
+}
+
+float __attribute__ ((noipa))
+f3 (void)
+{
+ float b[N] = { 1, 1, 0, 0 }, res = 0;
+ for (int i = 0; i < N; ++i)
+ res += a[i] * b[i];
+ return res;
+}
+
+int
+main ()
+{
+ check_vect ();
+
+ for (int i = 0; i < N; ++i)
+ a[i] = 0xe0 + i;
+
+ if (f1 () != a[1] + a[2] + a[3]
+ || f2 () != a[1] + a[3]
+ || f3 () != a[0] + a[1])
+ __builtin_abort ();
+
+ return 0;
+}
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [2/2] PR88598: Optimise reduc (bit_and)
2019-01-04 11:49 [2/2] PR88598: Optimise reduc (bit_and) Richard Sandiford
@ 2019-01-07 11:33 ` Richard Biener
0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2019-01-07 11:33 UTC (permalink / raw)
To: GCC Patches, Richard Sandiford
On Fri, Jan 4, 2019 at 12:49 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> This patch folds certain reductions of X & CST to X[I] & CST[I] if I is
> the only nonzero element of CST. This includes the motivating case in
> which CST[I] is -1.
>
> We could do the same for REDUC_MAX on unsigned types, but I wasn't sure
> that that special case was worth it.
>
> Tested on aarch64-linux-gnu, aarch64_be-elf and x86_64-linux-gnu.
> OK to install?
>
> Obvious follow-ons include handling multiplications of
> single_nonzero_element constants, multiplications of uniform constants,
> and additions of any constant, but this is enough to fix the PR.
OK.
Thanks,
Richard.
> Richard
>
>
> 2019-01-04 Richard Sandiford <richard.sandiford@arm.com>
>
> gcc/
> PR tree-optimization/88598
> * tree.h (single_nonzero_element): Declare.
> * tree.c (single_nonzero_element): New function.
> * match.pd: Fold certain reductions of X & CST to X[I] & CST[I]
> if I is the only nonzero element of CST.
>
> gcc/testsuite/
> PR tree-optimization/88598
> * gcc.dg/vect/pr88598-1.c: New test.
> * gcc.dg/vect/pr88598-2.c: Likewise.
> * gcc.dg/vect/pr88598-3.c: Likewise.
> * gcc.dg/vect/pr88598-4.c: Likewise.
> * gcc.dg/vect/pr88598-5.c: Likewise.
> * gcc.dg/vect/pr88598-6.c: Likewise.
>
> Index: gcc/tree.h
> ===================================================================
> --- gcc/tree.h 2019-01-04 11:40:33.141683783 +0000
> +++ gcc/tree.h 2019-01-04 11:40:36.581654424 +0000
> @@ -4522,6 +4522,8 @@ extern tree uniform_vector_p (const_tree
>
> extern tree uniform_integer_cst_p (tree);
>
> +extern int single_nonzero_element (const_tree);
> +
> /* Given a CONSTRUCTOR CTOR, return the element values as a vector. */
>
> extern vec<tree, va_gc> *ctor_to_vec (tree);
> Index: gcc/tree.c
> ===================================================================
> --- gcc/tree.c 2019-01-04 11:40:33.141683783 +0000
> +++ gcc/tree.c 2019-01-04 11:40:36.577654458 +0000
> @@ -11355,6 +11355,38 @@ uniform_integer_cst_p (tree t)
> return NULL_TREE;
> }
>
> +/* If VECTOR_CST T has a single nonzero element, return the index of that
> + element, otherwise return -1. */
> +
> +int
> +single_nonzero_element (const_tree t)
> +{
> + unsigned HOST_WIDE_INT nelts;
> + unsigned int repeat_nelts;
> + if (VECTOR_CST_NELTS (t).is_constant (&nelts))
> + repeat_nelts = nelts;
> + else if (VECTOR_CST_NELTS_PER_PATTERN (t) == 2)
> + {
> + nelts = vector_cst_encoded_nelts (t);
> + repeat_nelts = VECTOR_CST_NPATTERNS (t);
> + }
> + else
> + return -1;
> +
> + int res = -1;
> + for (unsigned int i = 0; i < nelts; ++i)
> + {
> + tree elt = vector_cst_elt (t, i);
> + if (!integer_zerop (elt) && !real_zerop (elt))
> + {
> + if (res >= 0 || i >= repeat_nelts)
> + return -1;
> + res = i;
> + }
> + }
> + return res;
> +}
> +
> /* Build an empty statement at location LOC. */
>
> tree
> Index: gcc/match.pd
> ===================================================================
> --- gcc/match.pd 2019-01-04 11:40:33.137683817 +0000
> +++ gcc/match.pd 2019-01-04 11:40:36.573654492 +0000
> @@ -5251,3 +5251,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> { wide_int_to_tree (sizetype, off); })
> { swap_p ? @0 : @2; }))
> { rhs_tree; })))))))))
> +
> +/* Fold REDUC (@0 & @1) -> @0[I] & @1[I] if element I is the only nonzero
> + element of @1. */
> +(for reduc (IFN_REDUC_PLUS IFN_REDUC_IOR IFN_REDUC_XOR)
> + (simplify (reduc (view_convert? (bit_and @0 VECTOR_CST@1)))
> + (with { int i = single_nonzero_element (@1); }
> + (if (i >= 0)
> + (with { tree elt = vector_cst_elt (@1, i);
> + tree elt_type = TREE_TYPE (elt);
> + unsigned int elt_bits = tree_to_uhwi (TYPE_SIZE (elt_type));
> + tree size = bitsize_int (elt_bits);
> + tree pos = bitsize_int (elt_bits * i); }
> + (view_convert
> + (bit_and:elt_type
> + (BIT_FIELD_REF:elt_type @0 { size; } { pos; })
> + { elt; })))))))
> Index: gcc/testsuite/gcc.dg/vect/pr88598-1.c
> ===================================================================
> --- /dev/null 2018-12-31 11:20:29.178325188 +0000
> +++ gcc/testsuite/gcc.dg/vect/pr88598-1.c 2019-01-04 11:40:36.577654458 +0000
> @@ -0,0 +1,55 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-fdump-tree-optimized" } */
> +
> +#include "tree-vect.h"
> +
> +#define N 4
> +
> +int a[N];
> +
> +int __attribute__ ((noipa))
> +f1 (void)
> +{
> + int b[N] = { 15, 0, 0, 0 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] & b[i];
> + return res;
> +}
> +
> +int __attribute__ ((noipa))
> +f2 (void)
> +{
> + int b[N] = { 0, 31, 0, 0 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] & b[i];
> + return res;
> +}
> +
> +int __attribute__ ((noipa))
> +f3 (void)
> +{
> + int b[N] = { 0, 0, 0, -1 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] & b[i];
> + return res;
> +}
> +
> +int
> +main ()
> +{
> + check_vect ();
> +
> + for (int i = 0; i < N; ++i)
> + a[i] = 0xe0 + i;
> +
> + if (f1 () != (a[0] & 15)
> + || f2 () != (a[1] & 31)
> + || f3 () != a[N - 1])
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* ??? We need more constant folding for this to work with fully-masked
> + loops. */
> +/* { dg-final { scan-tree-dump-not {REDUC_PLUS} "optimized" { xfail aarch64_sve } } } */
> Index: gcc/testsuite/gcc.dg/vect/pr88598-2.c
> ===================================================================
> --- /dev/null 2018-12-31 11:20:29.178325188 +0000
> +++ gcc/testsuite/gcc.dg/vect/pr88598-2.c 2019-01-04 11:40:36.577654458 +0000
> @@ -0,0 +1,55 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-fdump-tree-optimized" } */
> +
> +#include "tree-vect.h"
> +
> +#define N 4
> +
> +int a[N];
> +
> +int __attribute__ ((noipa))
> +f1 (void)
> +{
> + int b[N] = { 1, 0, 0, 0 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] * b[i];
> + return res;
> +}
> +
> +int __attribute__ ((noipa))
> +f2 (void)
> +{
> + int b[N] = { 0, 1, 0, 0 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] * b[i];
> + return res;
> +}
> +
> +int __attribute__ ((noipa))
> +f3 (void)
> +{
> + int b[N] = { 0, 0, 0, 1 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] * b[i];
> + return res;
> +}
> +
> +int
> +main ()
> +{
> + check_vect ();
> +
> + for (int i = 0; i < N; ++i)
> + a[i] = 0xe0 + i;
> +
> + if (f1 () != a[0]
> + || f2 () != a[1]
> + || f3 () != a[N - 1])
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* ??? We need more constant folding for this to work with fully-masked
> + loops. */
> +/* { dg-final { scan-tree-dump-not {REDUC_PLUS} "optimized" { xfail aarch64_sve } } } */
> Index: gcc/testsuite/gcc.dg/vect/pr88598-3.c
> ===================================================================
> --- /dev/null 2018-12-31 11:20:29.178325188 +0000
> +++ gcc/testsuite/gcc.dg/vect/pr88598-3.c 2019-01-04 11:40:36.577654458 +0000
> @@ -0,0 +1,55 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ffast-math -fdump-tree-optimized" } */
> +
> +#include "tree-vect.h"
> +
> +#define N 4
> +
> +float a[N];
> +
> +float __attribute__ ((noipa))
> +f1 (void)
> +{
> + float b[N] = { 1, 0, 0, 0 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] * b[i];
> + return res;
> +}
> +
> +float __attribute__ ((noipa))
> +f2 (void)
> +{
> + float b[N] = { 0, 1, 0, 0 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] * b[i];
> + return res;
> +}
> +
> +float __attribute__ ((noipa))
> +f3 (void)
> +{
> + float b[N] = { 0, 0, 0, 1 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] * b[i];
> + return res;
> +}
> +
> +int
> +main ()
> +{
> + check_vect ();
> +
> + for (int i = 0; i < N; ++i)
> + a[i] = 0xe0 + i;
> +
> + if (f1 () != a[0]
> + || f2 () != a[1]
> + || f3 () != a[N - 1])
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* ??? We need more constant folding for this to work with fully-masked
> + loops. */
> +/* { dg-final { scan-tree-dump-not {REDUC_PLUS} "optimized" { xfail aarch64_sve } } } */
> Index: gcc/testsuite/gcc.dg/vect/pr88598-4.c
> ===================================================================
> --- /dev/null 2018-12-31 11:20:29.178325188 +0000
> +++ gcc/testsuite/gcc.dg/vect/pr88598-4.c 2019-01-04 11:40:36.577654458 +0000
> @@ -0,0 +1,51 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-fdump-tree-optimized" } */
> +
> +#include "tree-vect.h"
> +
> +#define N 4
> +
> +int a[N];
> +
> +int __attribute__ ((noipa))
> +f1 (void)
> +{
> + int b[N] = { 0, 15, 15, 15 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] & b[i];
> + return res;
> +}
> +
> +int __attribute__ ((noipa))
> +f2 (void)
> +{
> + int b[N] = { 0, 31, 0, 31 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] & b[i];
> + return res;
> +}
> +
> +int __attribute__ ((noipa))
> +f3 (void)
> +{
> + int b[N] = { -1, -1, 0, 0 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] & b[i];
> + return res;
> +}
> +
> +int
> +main ()
> +{
> + check_vect ();
> +
> + for (int i = 0; i < N; ++i)
> + a[i] = 0xe0 + i;
> +
> + if (f1 () != (a[1] & 15) + (a[2] & 15) + (a[3] & 15)
> + || f2 () != (a[1] & 31) + (a[3] & 31)
> + || f3 () != a[0] + a[1])
> + __builtin_abort ();
> +
> + return 0;
> +}
> Index: gcc/testsuite/gcc.dg/vect/pr88598-5.c
> ===================================================================
> --- /dev/null 2018-12-31 11:20:29.178325188 +0000
> +++ gcc/testsuite/gcc.dg/vect/pr88598-5.c 2019-01-04 11:40:36.577654458 +0000
> @@ -0,0 +1,51 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-fdump-tree-optimized" } */
> +
> +#include "tree-vect.h"
> +
> +#define N 4
> +
> +int a[N];
> +
> +int __attribute__ ((noipa))
> +f1 (void)
> +{
> + int b[N] = { 0, 1, 1, 1 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] * b[i];
> + return res;
> +}
> +
> +int __attribute__ ((noipa))
> +f2 (void)
> +{
> + int b[N] = { 0, 1, 0, 1 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] * b[i];
> + return res;
> +}
> +
> +int __attribute__ ((noipa))
> +f3 (void)
> +{
> + int b[N] = { 1, 1, 0, 0 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] * b[i];
> + return res;
> +}
> +
> +int
> +main ()
> +{
> + check_vect ();
> +
> + for (int i = 0; i < N; ++i)
> + a[i] = 0xe0 + i;
> +
> + if (f1 () != a[1] + a[2] + a[3]
> + || f2 () != a[1] + a[3]
> + || f3 () != a[0] + a[1])
> + __builtin_abort ();
> +
> + return 0;
> +}
> Index: gcc/testsuite/gcc.dg/vect/pr88598-6.c
> ===================================================================
> --- /dev/null 2018-12-31 11:20:29.178325188 +0000
> +++ gcc/testsuite/gcc.dg/vect/pr88598-6.c 2019-01-04 11:40:36.577654458 +0000
> @@ -0,0 +1,51 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ffast-math -fdump-tree-optimized" } */
> +
> +#include "tree-vect.h"
> +
> +#define N 4
> +
> +float a[N];
> +
> +float __attribute__ ((noipa))
> +f1 (void)
> +{
> + float b[N] = { 0, 1, 1, 1 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] * b[i];
> + return res;
> +}
> +
> +float __attribute__ ((noipa))
> +f2 (void)
> +{
> + float b[N] = { 0, 1, 0, 1 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] * b[i];
> + return res;
> +}
> +
> +float __attribute__ ((noipa))
> +f3 (void)
> +{
> + float b[N] = { 1, 1, 0, 0 }, res = 0;
> + for (int i = 0; i < N; ++i)
> + res += a[i] * b[i];
> + return res;
> +}
> +
> +int
> +main ()
> +{
> + check_vect ();
> +
> + for (int i = 0; i < N; ++i)
> + a[i] = 0xe0 + i;
> +
> + if (f1 () != a[1] + a[2] + a[3]
> + || f2 () != a[1] + a[3]
> + || f3 () != a[0] + a[1])
> + __builtin_abort ();
> +
> + return 0;
> +}
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2019-01-07 11:33 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-04 11:49 [2/2] PR88598: Optimise reduc (bit_and) Richard Sandiford
2019-01-07 11:33 ` Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).