public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-1100] Fold truncations of left shifts in match.pd
@ 2022-06-15  7:32 Roger Sayle
  0 siblings, 0 replies; only message in thread
From: Roger Sayle @ 2022-06-15  7:32 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:acb1e6f43dc2bbedd1248ea61c7ab537a11fe59b

commit r13-1100-gacb1e6f43dc2bbedd1248ea61c7ab537a11fe59b
Author: Roger Sayle <roger@nextmovesoftware.com>
Date:   Wed Jun 15 09:31:13 2022 +0200

    Fold truncations of left shifts in match.pd
    
    Whilst investigating PR 55278, I noticed that the tree-ssa optimizers
    aren't eliminating the promotions of shifts to "int" as inserted by the
    c-family front-ends, instead leaving this simplification to be left to
    the RTL optimizers.  This patch allows match.pd to do this itself earlier,
    narrowing (T)(X << C) to (T)X << C when the constant C is known to be
    valid for the (narrower) type T.
    
    Hence for this simple test case:
    short foo(short x) { return x << 5; }
    
    the .optimized dump currently looks like:
    
    short int foo (short int x)
    {
      int _1;
      int _2;
      short int _4;
    
      <bb 2> [local count: 1073741824]:
      _1 = (int) x_3(D);
      _2 = _1 << 5;
      _4 = (short int) _2;
      return _4;
    }
    
    but with this patch, now becomes:
    
    short int foo (short int x)
    {
      short int _2;
    
      <bb 2> [local count: 1073741824]:
      _2 = x_1(D) << 5;
      return _2;
    }
    
    This is always reasonable as RTL expansion knows how to use
    widening optabs if it makes sense at the RTL level to perform
    this shift in a wider mode.
    
    Of course, there's often a catch.  The above simplification not only
    reduces the number of statements in gimple, but also allows further
    optimizations, for example including the perception of rotate idioms
    and bswap16.  Alas, optimizing things earlier than anticipated
    requires several testsuite changes [though all these tests have
    been confirmed to generate identical assembly code on x86_64].
    The only significant change is that the vectorization pass wouldn't
    previously lower rotations of signed integer types.  Hence this
    patch includes a refinement to tree-vect-patterns to allow signed
    types, by using the equivalent unsigned shifts.
    
    2022-06-15  Roger Sayle  <roger@nextmovesoftware.com>
                Richard Biener  <rguenther@suse.de>
    
    gcc/ChangeLog
            * match.pd (convert (lshift @1 INTEGER_CST@2)): Narrow integer
            left shifts by a constant when the result is truncated, and the
            shift constant is well-defined.
            * tree-vect-patterns.cc (vect_recog_rotate_pattern): Add
            support for rotations of signed integer types, by lowering
            using unsigned vector shifts.
    
    gcc/testsuite/ChangeLog
            * gcc.dg/fold-convlshift-4.c: New test case.
            * gcc.dg/optimize-bswaphi-1.c: Update found bswap count.
            * gcc.dg/tree-ssa/pr61839_3.c: Shift is now optimized before VRP.
            * gcc.dg/vect/vect-over-widen-1-big-array.c: Remove obsolete tests.
            * gcc.dg/vect/vect-over-widen-1.c: Likewise.
            * gcc.dg/vect/vect-over-widen-3-big-array.c: Likewise.
            * gcc.dg/vect/vect-over-widen-3.c: Likewise.
            * gcc.dg/vect/vect-over-widen-4-big-array.c: Likewise.
            * gcc.dg/vect/vect-over-widen-4.c: Likewise.

Diff:
---
 gcc/match.pd                                       | 19 ++++-----
 gcc/testsuite/gcc.dg/fold-convlshift-4.c           |  9 +++++
 gcc/testsuite/gcc.dg/optimize-bswaphi-1.c          |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr61839_3.c          |  4 +-
 .../gcc.dg/vect/vect-over-widen-1-big-array.c      |  2 -
 gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c      |  2 -
 .../gcc.dg/vect/vect-over-widen-3-big-array.c      |  2 -
 gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c      |  2 -
 .../gcc.dg/vect/vect-over-widen-4-big-array.c      |  2 -
 gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c      |  2 -
 gcc/tree-vect-patterns.cc                          | 47 +++++++++++++---------
 11 files changed, 49 insertions(+), 44 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 776c9c6489a..d4058d61979 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3621,17 +3621,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     (if (integer_zerop (@2) || integer_all_onesp (@2))
      (cmp @0 @2)))))
 
-/* Both signed and unsigned lshift produce the same result, so use
-   the form that minimizes the number of conversions.  Postpone this
-   transformation until after shifts by zero have been folded.  */
+/* Narrow a lshift by constant.  */
 (simplify
- (convert (lshift:s@0 (convert:s@1 @2) INTEGER_CST@3))
+ (convert (lshift:s@0 @1 INTEGER_CST@2))
  (if (INTEGRAL_TYPE_P (type)
-      && tree_nop_conversion_p (type, TREE_TYPE (@0))
-      && INTEGRAL_TYPE_P (TREE_TYPE (@2))
-      && TYPE_PRECISION (TREE_TYPE (@2)) <= TYPE_PRECISION (type)
-      && !integer_zerop (@3))
-  (lshift (convert @2) @3)))
+      && INTEGRAL_TYPE_P (TREE_TYPE (@0))
+      && !integer_zerop (@2)
+      && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@0)))
+  (if (TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (@0))
+       || wi::ltu_p (wi::to_wide (@2), TYPE_PRECISION (type)))
+   (lshift (convert @1) @2)
+   (if (wi::ltu_p (wi::to_wide (@2), TYPE_PRECISION (TREE_TYPE (@0))))
+    { build_zero_cst (type); }))))
 
 /* Simplifications of conversions.  */
 
diff --git a/gcc/testsuite/gcc.dg/fold-convlshift-4.c b/gcc/testsuite/gcc.dg/fold-convlshift-4.c
new file mode 100644
index 00000000000..001627fc185
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-convlshift-4.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+short foo(short x)
+{
+  return x << 5;
+}
+
+/* { dg-final { scan-tree-dump-not "\\(int\\)" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "\\(short int\\)" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c b/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c
index d045da9ea80..a5d8bfd5838 100644
--- a/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c
+++ b/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c
@@ -68,4 +68,4 @@ get_unaligned_16_be (unsigned char *p)
 
 
 /* { dg-final { scan-tree-dump-times "16 bit load in target endianness found at" 4 "bswap" } } */
-/* { dg-final { scan-tree-dump-times "16 bit bswap implementation found at" 5 "bswap" } } */
+/* { dg-final { scan-tree-dump-times "16 bit bswap implementation found at" 4 "bswap" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr61839_3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr61839_3.c
index bc2126fce4e..38cf792aca2 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr61839_3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr61839_3.c
@@ -1,6 +1,6 @@
 /* PR tree-optimization/61839.  */
 /* { dg-do run } */
-/* { dg-options "-O2 -fdump-tree-vrp -fdump-tree-optimized -fdisable-tree-ethread -fdisable-tree-threadfull1" } */
+/* { dg-options "-O2 -fdump-tree-optimized -fdisable-tree-ethread -fdisable-tree-threadfull1" } */
 
 __attribute__ ((noinline))
 int foo (int a, unsigned b)
@@ -21,6 +21,4 @@ int main ()
   foo (-1, b);
 }
 
-/* Scan for c [12, 13] << 8 in function foo.  */
-/* { dg-final { scan-tree-dump-times "3072 : 3328" 1  "vrp1" } } */
 /* { dg-final { scan-tree-dump-times "3072" 0  "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-over-widen-1-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-over-widen-1-big-array.c
index 9e5f464a88f..9a5141ee6ec 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-over-widen-1-big-array.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-over-widen-1-big-array.c
@@ -58,9 +58,7 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 8} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 5} "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c b/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c
index c2d07974dfb..f2d284ca9be 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c
@@ -62,9 +62,7 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 8} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 5} "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-over-widen-3-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-over-widen-3-big-array.c
index 37da7c917e4..6f89aacbebf 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-over-widen-3-big-array.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-over-widen-3-big-array.c
@@ -59,9 +59,7 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 8} "vect" } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 9} "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c b/gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c
index 41384807a45..a1e1182c606 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c
@@ -57,9 +57,7 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 8} "vect" } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 9} "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-over-widen-4-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-over-widen-4-big-array.c
index 514337c579c..03a6e6795ec 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-over-widen-4-big-array.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-over-widen-4-big-array.c
@@ -62,9 +62,7 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 8} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 5} "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c b/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c
index 3d536d5ddcc..0ef377f1f58 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c
@@ -66,9 +66,7 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 8} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 5} "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 0fad4dbd094..8f624863971 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -2614,8 +2614,7 @@ vect_recog_rotate_pattern (vec_info *vinfo,
 	  || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
 	  || TYPE_PRECISION (type) <= 16
 	  || TREE_CODE (oprnd0) != SSA_NAME
-	  || BITS_PER_UNIT != 8
-	  || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
+	  || BITS_PER_UNIT != 8)
 	return NULL;
 
       stmt_vec_info def_stmt_info;
@@ -2688,8 +2687,7 @@ vect_recog_rotate_pattern (vec_info *vinfo,
 
   if (TREE_CODE (oprnd0) != SSA_NAME
       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
-      || !INTEGRAL_TYPE_P (type)
-      || !TYPE_UNSIGNED (type))
+      || !INTEGRAL_TYPE_P (type))
     return NULL;
 
   stmt_vec_info def_stmt_info;
@@ -2745,31 +2743,36 @@ vect_recog_rotate_pattern (vec_info *vinfo,
 	goto use_rotate;
     }
 
+  tree utype = unsigned_type_for (type);
+  tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
+  if (!uvectype)
+    return NULL;
+
   /* If vector/vector or vector/scalar shifts aren't supported by the target,
      don't do anything here either.  */
-  optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
-  optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
+  optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
+  optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
   if (!optab1
-      || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
+      || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
       || !optab2
-      || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+      || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
     {
       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
 	return NULL;
-      optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
-      optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
+      optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
+      optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
       if (!optab1
-	  || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
+	  || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
 	  || !optab2
-	  || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+	  || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
 	return NULL;
     }
 
   *type_out = vectype;
 
-  if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
+  if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
     {
-      def = vect_recog_temp_ssa_var (type, NULL);
+      def = vect_recog_temp_ssa_var (utype, NULL);
       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
       oprnd0 = def;
@@ -2779,7 +2782,7 @@ vect_recog_rotate_pattern (vec_info *vinfo,
     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
 
   def = NULL_TREE;
-  scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
+  scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
     def = oprnd1;
   else if (def_stmt && gimple_assign_cast_p (def_stmt))
@@ -2793,7 +2796,7 @@ vect_recog_rotate_pattern (vec_info *vinfo,
 
   if (def == NULL_TREE)
     {
-      def = vect_recog_temp_ssa_var (type, NULL);
+      def = vect_recog_temp_ssa_var (utype, NULL);
       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
     }
@@ -2839,13 +2842,13 @@ vect_recog_rotate_pattern (vec_info *vinfo,
 	append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
     }
 
-  var1 = vect_recog_temp_ssa_var (type, NULL);
+  var1 = vect_recog_temp_ssa_var (utype, NULL);
   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
 					? LSHIFT_EXPR : RSHIFT_EXPR,
 				  oprnd0, def);
   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
 
-  var2 = vect_recog_temp_ssa_var (type, NULL);
+  var2 = vect_recog_temp_ssa_var (utype, NULL);
   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
 					? RSHIFT_EXPR : LSHIFT_EXPR,
 				  oprnd0, def2);
@@ -2855,9 +2858,15 @@ vect_recog_rotate_pattern (vec_info *vinfo,
   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
 
   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
-  var = vect_recog_temp_ssa_var (type, NULL);
+  var = vect_recog_temp_ssa_var (utype, NULL);
   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
 
+  if (!useless_type_conversion_p (type, utype))
+    {
+      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+      tree result = vect_recog_temp_ssa_var (type, NULL);
+      pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
+    }
   return pattern_stmt;
 }


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-06-15  7:32 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-15  7:32 [gcc r13-1100] Fold truncations of left shifts in match.pd Roger Sayle

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).