* [PATCH] Fix *vector_shift_pattern (PR tree-optimization/70354)
@ 2016-03-22 22:12 Jakub Jelinek
2016-03-23 9:14 ` Richard Biener
0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2016-03-22 22:12 UTC (permalink / raw)
To: Richard Biener; +Cc: gcc-patches
Hi!
As the testcase shows, the C/C++ FEs narrow the shift counters from whatever
type they had originally to unsigned int (previously signed int).
Then the vect-patterns code, to be able to use vector by vector shifts
attempts to narrow or widen them again to the right type. If there is
already a cast from the right precision, it just uses the rhs1 of that case,
otherwise it adds a cast to the pattern, which performs the needed widening
or narrowing.
Unfortunately, we have information loss during optimizations, we don't know
anymore if it was say:
long a[64], b[64];
void foo (void)
{
for (int i = 0; i < 64; i++)
a[i] <<= b[i]; // Here we don't need any masking, it would be UB
// if b isn't in range
}
void bar (void)
{
for (int i = 0; i < 64; i++)
a[i] <<= (unsigned) b[i]; // But here we can't just use b[i] as the
// shift count, because the upper bits are to be masked off.
}
void baz (void)
{
for (int i = 0; i < 64; i++)
a[i] <<= b[i] - 0x7200000000ULL; // And here the optimizers will likely
// optimize away the subtraction, because there
// is implicit cast to (unsigned int). We need
// to mask instead of using b[i] directly.
}
But, not casting say long long shift counters to unsigned int would penalize
other code, computing unneeded operations. So I'm afraid we want the
following fix, which I've bootstrapped/regtested on x86_64-linux and
i686-linux. For short/char shifts we don't need this of course.
Ok for trunk?
2016-03-22 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/70354
* tree-vect-patterns.c (vect_recog_vector_vector_shift_pattern): If
oprnd0 is wider than oprnd1 and there is a cast from the wider
type to oprnd1, mask it with the mask of the narrower type.
* gcc.dg/vect/pr70354-1.c: New test.
* gcc.dg/vect/pr70354-2.c: New test.
* gcc.target/i386/avx2-pr70354-1.c: New test.
* gcc.target/i386/avx2-pr70354-2.c: New test.
--- gcc/tree-vect-patterns.c.jj 2016-03-04 15:42:12.000000000 +0100
+++ gcc/tree-vect-patterns.c 2016-03-22 15:28:24.403579426 +0100
@@ -2097,7 +2097,20 @@ vect_recog_vector_vector_shift_pattern (
if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
&& TYPE_PRECISION (TREE_TYPE (rhs1))
== TYPE_PRECISION (TREE_TYPE (oprnd0)))
- def = rhs1;
+ {
+ if (TYPE_PRECISION (TREE_TYPE (oprnd1))
+ >= TYPE_PRECISION (TREE_TYPE (rhs1)))
+ def = rhs1;
+ else
+ {
+ tree mask
+ = build_low_bits_mask (TREE_TYPE (rhs1),
+ TYPE_PRECISION (TREE_TYPE (oprnd1)));
+ def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
+ def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
+ new_pattern_def_seq (stmt_vinfo, def_stmt);
+ }
+ }
}
if (def == NULL_TREE)
--- gcc/testsuite/gcc.dg/vect/pr70354-1.c.jj 2016-03-22 15:36:42.210847707 +0100
+++ gcc/testsuite/gcc.dg/vect/pr70354-1.c 2016-03-22 15:47:45.448878909 +0100
@@ -0,0 +1,50 @@
+/* PR tree-optimization/70354 */
+/* { dg-do run } */
+
+#ifndef main
+#include "tree-vect.h"
+#endif
+
+long long int b[64], c[64], g[64];
+unsigned long long int a[64], d[64], e[64], f[64], h[64];
+
+__attribute__ ((noinline, noclone)) void
+foo (void)
+{
+ int i;
+ for (i = 0; i < 64; i++)
+ {
+ d[i] = h[i] << (((((unsigned long long int) b[i] * e[i])
+ << (-a[i] - 3752448776177690134ULL))
+ - 8214565720323784703ULL) - 1ULL);
+ e[i] = (_Bool) (f[i] + (unsigned long long int) g[i]);
+ g[i] = c[i];
+ }
+}
+
+int
+main ()
+{
+ int i;
+#ifndef main
+ check_vect ();
+#endif
+ if (__CHAR_BIT__ != 8 || sizeof (long long int) != 8)
+ return 0;
+ for (i = 0; i < 64; ++i)
+ {
+ a[i] = 14694295297531861425ULL;
+ b[i] = -1725558902283030715LL;
+ c[i] = 4402992416302558097LL;
+ e[i] = 6297173129107286501ULL;
+ f[i] = 13865724171235650855ULL;
+ g[i] = 982871027473857427LL;
+ h[i] = 8193845517487445944ULL;
+ }
+ foo ();
+ for (i = 0; i < 64; i++)
+ if (d[i] != 8193845517487445944ULL || e[i] != 1
+ || g[i] != 4402992416302558097ULL)
+ abort ();
+ return 0;
+}
--- gcc/testsuite/gcc.dg/vect/pr70354-2.c.jj 2016-03-22 15:36:45.527802852 +0100
+++ gcc/testsuite/gcc.dg/vect/pr70354-2.c 2016-03-22 16:07:09.397164461 +0100
@@ -0,0 +1,37 @@
+/* PR tree-optimization/70354 */
+/* { dg-do run } */
+
+#ifndef main
+#include "tree-vect.h"
+#endif
+
+unsigned long long a[64], b[64];
+
+__attribute__((noinline, noclone)) void
+foo (void)
+{
+ int i;
+ for (i = 0; i < 64; i++)
+ a[i] <<= (b[i] - 0x1200000000ULL);
+}
+
+int
+main ()
+{
+ int i;
+#ifndef main
+ check_vect ();
+#endif
+ if (__CHAR_BIT__ != 8 || sizeof (long long int) != 8)
+ return 0;
+ for (i = 0; i < 64; i++)
+ {
+ a[i] = 0x1234ULL;
+ b[i] = 0x1200000000ULL + (i % 54);
+ }
+ foo ();
+ for (i = 0; i < 64; i++)
+ if (a[i] != (0x1234ULL << (i % 54)))
+ abort ();
+ return 0;
+}
--- gcc/testsuite/gcc.target/i386/avx2-pr70354-1.c.jj 2016-03-22 15:44:42.228356553 +0100
+++ gcc/testsuite/gcc.target/i386/avx2-pr70354-1.c 2016-03-22 15:47:26.738131930 +0100
@@ -0,0 +1,16 @@
+/* PR tree-optimization/70354 */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -mavx2" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/pr70354-1.c"
+
+static void
+avx2_test (void)
+{
+ do_main ();
+}
--- gcc/testsuite/gcc.target/i386/avx2-pr70354-2.c.jj 2016-03-22 15:44:50.283247629 +0100
+++ gcc/testsuite/gcc.target/i386/avx2-pr70354-2.c 2016-03-22 15:44:55.856172268 +0100
@@ -0,0 +1,16 @@
+/* PR tree-optimization/70354 */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -mavx2" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/pr70354-2.c"
+
+static void
+avx2_test (void)
+{
+ do_main ();
+}
Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] Fix *vector_shift_pattern (PR tree-optimization/70354)
2016-03-22 22:12 [PATCH] Fix *vector_shift_pattern (PR tree-optimization/70354) Jakub Jelinek
@ 2016-03-23 9:14 ` Richard Biener
0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2016-03-23 9:14 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: gcc-patches
On Tue, 22 Mar 2016, Jakub Jelinek wrote:
> Hi!
>
> As the testcase shows, the C/C++ FEs narrow the shift counters from whatever
> type they had originally to unsigned int (previously signed int).
> Then the vect-patterns code, to be able to use vector by vector shifts
> attempts to narrow or widen them again to the right type. If there is
> already a cast from the right precision, it just uses the rhs1 of that case,
> otherwise it adds a cast to the pattern, which performs the needed widening
> or narrowing.
>
> Unfortunately, we have information loss during optimizations, we don't know
> anymore if it was say:
> long a[64], b[64];
> void foo (void)
> {
> for (int i = 0; i < 64; i++)
> a[i] <<= b[i]; // Here we don't need any masking, it would be UB
> // if b isn't in range
> }
> void bar (void)
> {
> for (int i = 0; i < 64; i++)
> a[i] <<= (unsigned) b[i]; // But here we can't just use b[i] as the
> // shift count, because the upper bits are to be masked off.
> }
True. I suppose as signed ops with well-defined overflow would come
in useful sometimes here we'd like to have a conversion with
undefined overflow behavior to capture this fact in the IL...
> void baz (void)
> {
> for (int i = 0; i < 64; i++)
> a[i] <<= b[i] - 0x7200000000ULL; // And here the optimizers will likely
> // optimize away the subtraction, because there
> // is implicit cast to (unsigned int). We need
> // to mask instead of using b[i] directly.
> }
> But, not casting say long long shift counters to unsigned int would penalize
> other code, computing unneeded operations. So I'm afraid we want the
> following fix, which I've bootstrapped/regtested on x86_64-linux and
> i686-linux. For short/char shifts we don't need this of course.
>
> Ok for trunk?
Ok.
Thanks,
Richard.
> 2016-03-22 Jakub Jelinek <jakub@redhat.com>
>
> PR tree-optimization/70354
> * tree-vect-patterns.c (vect_recog_vector_vector_shift_pattern): If
> oprnd0 is wider than oprnd1 and there is a cast from the wider
> type to oprnd1, mask it with the mask of the narrower type.
>
> * gcc.dg/vect/pr70354-1.c: New test.
> * gcc.dg/vect/pr70354-2.c: New test.
> * gcc.target/i386/avx2-pr70354-1.c: New test.
> * gcc.target/i386/avx2-pr70354-2.c: New test.
>
> --- gcc/tree-vect-patterns.c.jj 2016-03-04 15:42:12.000000000 +0100
> +++ gcc/tree-vect-patterns.c 2016-03-22 15:28:24.403579426 +0100
> @@ -2097,7 +2097,20 @@ vect_recog_vector_vector_shift_pattern (
> if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
> && TYPE_PRECISION (TREE_TYPE (rhs1))
> == TYPE_PRECISION (TREE_TYPE (oprnd0)))
> - def = rhs1;
> + {
> + if (TYPE_PRECISION (TREE_TYPE (oprnd1))
> + >= TYPE_PRECISION (TREE_TYPE (rhs1)))
> + def = rhs1;
> + else
> + {
> + tree mask
> + = build_low_bits_mask (TREE_TYPE (rhs1),
> + TYPE_PRECISION (TREE_TYPE (oprnd1)));
> + def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
> + def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
> + new_pattern_def_seq (stmt_vinfo, def_stmt);
> + }
> + }
> }
>
> if (def == NULL_TREE)
> --- gcc/testsuite/gcc.dg/vect/pr70354-1.c.jj 2016-03-22 15:36:42.210847707 +0100
> +++ gcc/testsuite/gcc.dg/vect/pr70354-1.c 2016-03-22 15:47:45.448878909 +0100
> @@ -0,0 +1,50 @@
> +/* PR tree-optimization/70354 */
> +/* { dg-do run } */
> +
> +#ifndef main
> +#include "tree-vect.h"
> +#endif
> +
> +long long int b[64], c[64], g[64];
> +unsigned long long int a[64], d[64], e[64], f[64], h[64];
> +
> +__attribute__ ((noinline, noclone)) void
> +foo (void)
> +{
> + int i;
> + for (i = 0; i < 64; i++)
> + {
> + d[i] = h[i] << (((((unsigned long long int) b[i] * e[i])
> + << (-a[i] - 3752448776177690134ULL))
> + - 8214565720323784703ULL) - 1ULL);
> + e[i] = (_Bool) (f[i] + (unsigned long long int) g[i]);
> + g[i] = c[i];
> + }
> +}
> +
> +int
> +main ()
> +{
> + int i;
> +#ifndef main
> + check_vect ();
> +#endif
> + if (__CHAR_BIT__ != 8 || sizeof (long long int) != 8)
> + return 0;
> + for (i = 0; i < 64; ++i)
> + {
> + a[i] = 14694295297531861425ULL;
> + b[i] = -1725558902283030715LL;
> + c[i] = 4402992416302558097LL;
> + e[i] = 6297173129107286501ULL;
> + f[i] = 13865724171235650855ULL;
> + g[i] = 982871027473857427LL;
> + h[i] = 8193845517487445944ULL;
> + }
> + foo ();
> + for (i = 0; i < 64; i++)
> + if (d[i] != 8193845517487445944ULL || e[i] != 1
> + || g[i] != 4402992416302558097ULL)
> + abort ();
> + return 0;
> +}
> --- gcc/testsuite/gcc.dg/vect/pr70354-2.c.jj 2016-03-22 15:36:45.527802852 +0100
> +++ gcc/testsuite/gcc.dg/vect/pr70354-2.c 2016-03-22 16:07:09.397164461 +0100
> @@ -0,0 +1,37 @@
> +/* PR tree-optimization/70354 */
> +/* { dg-do run } */
> +
> +#ifndef main
> +#include "tree-vect.h"
> +#endif
> +
> +unsigned long long a[64], b[64];
> +
> +__attribute__((noinline, noclone)) void
> +foo (void)
> +{
> + int i;
> + for (i = 0; i < 64; i++)
> + a[i] <<= (b[i] - 0x1200000000ULL);
> +}
> +
> +int
> +main ()
> +{
> + int i;
> +#ifndef main
> + check_vect ();
> +#endif
> + if (__CHAR_BIT__ != 8 || sizeof (long long int) != 8)
> + return 0;
> + for (i = 0; i < 64; i++)
> + {
> + a[i] = 0x1234ULL;
> + b[i] = 0x1200000000ULL + (i % 54);
> + }
> + foo ();
> + for (i = 0; i < 64; i++)
> + if (a[i] != (0x1234ULL << (i % 54)))
> + abort ();
> + return 0;
> +}
> --- gcc/testsuite/gcc.target/i386/avx2-pr70354-1.c.jj 2016-03-22 15:44:42.228356553 +0100
> +++ gcc/testsuite/gcc.target/i386/avx2-pr70354-1.c 2016-03-22 15:47:26.738131930 +0100
> @@ -0,0 +1,16 @@
> +/* PR tree-optimization/70354 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -ftree-vectorize -mavx2" } */
> +/* { dg-require-effective-target avx2 } */
> +
> +#include "avx2-check.h"
> +
> +#define main() do_main ()
> +
> +#include "../../gcc.dg/vect/pr70354-1.c"
> +
> +static void
> +avx2_test (void)
> +{
> + do_main ();
> +}
> --- gcc/testsuite/gcc.target/i386/avx2-pr70354-2.c.jj 2016-03-22 15:44:50.283247629 +0100
> +++ gcc/testsuite/gcc.target/i386/avx2-pr70354-2.c 2016-03-22 15:44:55.856172268 +0100
> @@ -0,0 +1,16 @@
> +/* PR tree-optimization/70354 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -ftree-vectorize -mavx2" } */
> +/* { dg-require-effective-target avx2 } */
> +
> +#include "avx2-check.h"
> +
> +#define main() do_main ()
> +
> +#include "../../gcc.dg/vect/pr70354-2.c"
> +
> +static void
> +avx2_test (void)
> +{
> + do_main ();
> +}
>
> Jakub
>
>
--
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2016-03-23 8:43 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-22 22:12 [PATCH] Fix *vector_shift_pattern (PR tree-optimization/70354) Jakub Jelinek
2016-03-23 9:14 ` Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).