public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][2/2] Fix PR81502
@ 2017-07-27 13:51 Richard Biener
  2017-07-27 15:59 ` Andrew Pinski
  0 siblings, 1 reply; 3+ messages in thread
From: Richard Biener @ 2017-07-27 13:51 UTC (permalink / raw)
  To: gcc-patches


I am testing the following additional pattern for match.pd to fix
PR81502 resulting in the desired optimization to

bar:
.LFB526:
        .cfi_startproc
        movl    %edi, %eax
        ret

the pattern optimizes a BIT_FIELD_REF on a BIT_INSERT_EXPR by
either extracting from the destination or the inserted value.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2017-07-27  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/81502
	* match.pd: Add pattern combining BIT_INSERT_EXPR with
	BIT_FIELD_REF.

	* gcc.target/i386/pr81502.c: New testcase.

Index: gcc/match.pd
===================================================================
*** gcc/match.pd	(revision 250620)
--- gcc/match.pd	(working copy)
*************** DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
*** 4178,4180 ****
--- 4178,4195 ----
  	 { CONSTRUCTOR_ELT (ctor, idx / k)->value; })
  	(BIT_FIELD_REF { CONSTRUCTOR_ELT (ctor, idx / k)->value; }
  		       @1 { bitsize_int ((idx % k) * width); })))))))))
+ 
+ /* Simplify a bit extraction from a bit insertion for the cases with
+    the inserted element fully covering the extraction or the insertion
+    not touching the extraction.  */
+ (simplify
+  (BIT_FIELD_REF (bit_insert @0 @1 @ipos) @rsize @rpos)
+  (switch
+   (if (wi::leu_p (@ipos, @rpos)
+        && wi::leu_p (wi::add (@rpos, @rsize),
+                      wi::add (@ipos, TYPE_PRECISION (TREE_TYPE (@1)))))
+    (BIT_FIELD_REF @1 @rsize { wide_int_to_tree (bitsizetype,
+                                                 wi::sub (@rpos, @ipos)); }))
+   (if (wi::geu_p (@ipos, wi::add (@rpos, @rsize))
+        || wi::geu_p (@rpos, wi::add (@ipos, TYPE_PRECISION (TREE_TYPE (@1)))))
+    (BIT_FIELD_REF @0 @rsize @rpos))))
Index: gcc/testsuite/gcc.target/i386/pr81502.c
===================================================================
*** gcc/testsuite/gcc.target/i386/pr81502.c	(nonexistent)
--- gcc/testsuite/gcc.target/i386/pr81502.c	(working copy)
***************
*** 0 ****
--- 1,34 ----
+ /* { dg-do compile { target lp64 } } */
+ /* { dg-options "-O2 -msse2" } */
+ 
+ #include <emmintrin.h>
+ 
+ #define SIZE (sizeof (void *))
+ 
+ static int foo(unsigned char (*foo)[SIZE])
+ {
+   __m128i acc = _mm_set_epi32(0, 0, 0, 0);
+   size_t i = 0;
+   for(; i + sizeof(__m128i) <= SIZE; i += sizeof(__m128i)) {
+       __m128i word;
+       __builtin_memcpy(&word, foo + i, sizeof(__m128i));
+       acc = _mm_add_epi32(word, acc);
+   }
+   if (i != SIZE) {
+       __m128i word = _mm_set_epi32(0, 0, 0, 0);
+       __builtin_memcpy(&word, foo + i, SIZE - i); // (1)
+       acc = _mm_add_epi32(word, acc);
+   }
+   int res;
+   __builtin_memcpy(&res, &acc, sizeof(res));
+   return res;
+ }
+ 
+ int bar(void *ptr)
+ {
+   unsigned char buf[SIZE];
+   __builtin_memcpy(buf, &ptr, SIZE);
+   return foo((unsigned char(*)[SIZE])buf);
+ }
+ 
+ /* { dg-final { scan-assembler-times "mov" 1 } } */

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH][2/2] Fix PR81502
  2017-07-27 13:51 [PATCH][2/2] Fix PR81502 Richard Biener
@ 2017-07-27 15:59 ` Andrew Pinski
  2017-07-28 11:26   ` Richard Biener
  0 siblings, 1 reply; 3+ messages in thread
From: Andrew Pinski @ 2017-07-27 15:59 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

On Thu, Jul 27, 2017 at 6:50 AM, Richard Biener <rguenther@suse.de> wrote:
>
> I am testing the following additional pattern for match.pd to fix
> PR81502 resulting in the desired optimization to
>
> bar:
> .LFB526:
>         .cfi_startproc
>         movl    %edi, %eax
>         ret
>
> the pattern optimizes a BIT_FIELD_REF on a BIT_INSERT_EXPR by
> either extracting from the destination or the inserted value.

Note this optimization pattern was on my list to implement for
bit-field optimizations after lowering.

Thanks,
Andrew Pinski

>
> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>
> Richard.
>
> 2017-07-27  Richard Biener  <rguenther@suse.de>
>
>         PR tree-optimization/81502
>         * match.pd: Add pattern combining BIT_INSERT_EXPR with
>         BIT_FIELD_REF.
>
>         * gcc.target/i386/pr81502.c: New testcase.
>
> Index: gcc/match.pd
> ===================================================================
> *** gcc/match.pd        (revision 250620)
> --- gcc/match.pd        (working copy)
> *************** DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> *** 4178,4180 ****
> --- 4178,4195 ----
>          { CONSTRUCTOR_ELT (ctor, idx / k)->value; })
>         (BIT_FIELD_REF { CONSTRUCTOR_ELT (ctor, idx / k)->value; }
>                        @1 { bitsize_int ((idx % k) * width); })))))))))
> +
> + /* Simplify a bit extraction from a bit insertion for the cases with
> +    the inserted element fully covering the extraction or the insertion
> +    not touching the extraction.  */
> + (simplify
> +  (BIT_FIELD_REF (bit_insert @0 @1 @ipos) @rsize @rpos)
> +  (switch
> +   (if (wi::leu_p (@ipos, @rpos)
> +        && wi::leu_p (wi::add (@rpos, @rsize),
> +                      wi::add (@ipos, TYPE_PRECISION (TREE_TYPE (@1)))))
> +    (BIT_FIELD_REF @1 @rsize { wide_int_to_tree (bitsizetype,
> +                                                 wi::sub (@rpos, @ipos)); }))
> +   (if (wi::geu_p (@ipos, wi::add (@rpos, @rsize))
> +        || wi::geu_p (@rpos, wi::add (@ipos, TYPE_PRECISION (TREE_TYPE (@1)))))
> +    (BIT_FIELD_REF @0 @rsize @rpos))))
> Index: gcc/testsuite/gcc.target/i386/pr81502.c
> ===================================================================
> *** gcc/testsuite/gcc.target/i386/pr81502.c     (nonexistent)
> --- gcc/testsuite/gcc.target/i386/pr81502.c     (working copy)
> ***************
> *** 0 ****
> --- 1,34 ----
> + /* { dg-do compile { target lp64 } } */
> + /* { dg-options "-O2 -msse2" } */
> +
> + #include <emmintrin.h>
> +
> + #define SIZE (sizeof (void *))
> +
> + static int foo(unsigned char (*foo)[SIZE])
> + {
> +   __m128i acc = _mm_set_epi32(0, 0, 0, 0);
> +   size_t i = 0;
> +   for(; i + sizeof(__m128i) <= SIZE; i += sizeof(__m128i)) {
> +       __m128i word;
> +       __builtin_memcpy(&word, foo + i, sizeof(__m128i));
> +       acc = _mm_add_epi32(word, acc);
> +   }
> +   if (i != SIZE) {
> +       __m128i word = _mm_set_epi32(0, 0, 0, 0);
> +       __builtin_memcpy(&word, foo + i, SIZE - i); // (1)
> +       acc = _mm_add_epi32(word, acc);
> +   }
> +   int res;
> +   __builtin_memcpy(&res, &acc, sizeof(res));
> +   return res;
> + }
> +
> + int bar(void *ptr)
> + {
> +   unsigned char buf[SIZE];
> +   __builtin_memcpy(buf, &ptr, SIZE);
> +   return foo((unsigned char(*)[SIZE])buf);
> + }
> +
> + /* { dg-final { scan-assembler-times "mov" 1 } } */

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH][2/2] Fix PR81502
  2017-07-27 15:59 ` Andrew Pinski
@ 2017-07-28 11:26   ` Richard Biener
  0 siblings, 0 replies; 3+ messages in thread
From: Richard Biener @ 2017-07-28 11:26 UTC (permalink / raw)
  To: Andrew Pinski; +Cc: GCC Patches

On Thu, 27 Jul 2017, Andrew Pinski wrote:

> On Thu, Jul 27, 2017 at 6:50 AM, Richard Biener <rguenther@suse.de> wrote:
> >
> > I am testing the following additional pattern for match.pd to fix
> > PR81502 resulting in the desired optimization to
> >
> > bar:
> > .LFB526:
> >         .cfi_startproc
> >         movl    %edi, %eax
> >         ret
> >
> > the pattern optimizes a BIT_FIELD_REF on a BIT_INSERT_EXPR by
> > either extracting from the destination or the inserted value.
> 
> Note this optimization pattern was on my list to implement for
> bit-field optimizations after lowering.

Had to do some adjustments and ended up enforcing bitsizetype operands
for BIT_INSERT_EXPR and BIT_FIELD_REF.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2017-07-28  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/81502
	* match.pd: Add pattern combining BIT_INSERT_EXPR with
	BIT_FIELD_REF.
	* tree-cfg.c (verify_expr): Verify types of BIT_FIELD_REF
	size/pos operands.
	(verify_gimple_assign_ternary): Likewise for BIT_INSERT_EXPR pos.
	* gimple-fold.c (maybe_canonicalize_mem_ref_addr): Use bitsizetype
	for BIT_FIELD_REF args.
	* fold-const.c (make_bit_field_ref): Likewise.
	* tree-vect-stmts.c (vectorizable_simd_clone_call): Likewise.

	* gcc.target/i386/pr81502.c: New testcase.

Index: gcc/testsuite/gcc.target/i386/pr81502.c
===================================================================
*** gcc/testsuite/gcc.target/i386/pr81502.c	(nonexistent)
--- gcc/testsuite/gcc.target/i386/pr81502.c	(working copy)
***************
*** 0 ****
--- 1,34 ----
+ /* { dg-do compile { target lp64 } } */
+ /* { dg-options "-O2 -msse2" } */
+ 
+ #include <emmintrin.h>
+ 
+ #define SIZE (sizeof (void *))
+ 
+ static int foo(unsigned char (*foo)[SIZE])
+ {
+   __m128i acc = _mm_set_epi32(0, 0, 0, 0);
+   size_t i = 0;
+   for(; i + sizeof(__m128i) <= SIZE; i += sizeof(__m128i)) {
+       __m128i word;
+       __builtin_memcpy(&word, foo + i, sizeof(__m128i));
+       acc = _mm_add_epi32(word, acc);
+   }
+   if (i != SIZE) {
+       __m128i word = _mm_set_epi32(0, 0, 0, 0);
+       __builtin_memcpy(&word, foo + i, SIZE - i); // (1)
+       acc = _mm_add_epi32(word, acc);
+   }
+   int res;
+   __builtin_memcpy(&res, &acc, sizeof(res));
+   return res;
+ }
+ 
+ int bar(void *ptr)
+ {
+   unsigned char buf[SIZE];
+   __builtin_memcpy(buf, &ptr, SIZE);
+   return foo((unsigned char(*)[SIZE])buf);
+ }
+ 
+ /* { dg-final { scan-assembler-times "mov" 1 } } */
Index: gcc/match.pd
===================================================================
--- gcc/match.pd	(revision 250625)
+++ gcc/match.pd	(working copy)
@@ -4178,3 +4178,25 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	 { CONSTRUCTOR_ELT (ctor, idx / k)->value; })
 	(BIT_FIELD_REF { CONSTRUCTOR_ELT (ctor, idx / k)->value; }
 		       @1 { bitsize_int ((idx % k) * width); })))))))))
+
+/* Simplify a bit extraction from a bit insertion for the cases with
+   the inserted element fully covering the extraction or the insertion
+   not touching the extraction.  */
+(simplify
+ (BIT_FIELD_REF (bit_insert @0 @1 @ipos) @rsize @rpos)
+ (with
+  {
+    unsigned HOST_WIDE_INT isize;
+    if (INTEGRAL_TYPE_P (TREE_TYPE (@1)))
+      isize = TYPE_PRECISION (TREE_TYPE (@1));
+    else
+      isize = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (@1)));
+  }
+  (switch
+   (if (wi::leu_p (@ipos, @rpos)
+        && wi::leu_p (wi::add (@rpos, @rsize), wi::add (@ipos, isize)))
+    (BIT_FIELD_REF @1 @rsize { wide_int_to_tree (bitsizetype,
+                                                 wi::sub (@rpos, @ipos)); }))
+   (if (wi::geu_p (@ipos, wi::add (@rpos, @rsize))
+        || wi::geu_p (@rpos, wi::add (@ipos, isize)))
+    (BIT_FIELD_REF @0 @rsize @rpos)))))
Index: gcc/gimple-fold.c
===================================================================
--- gcc/gimple-fold.c	(revision 250620)
+++ gcc/gimple-fold.c	(working copy)
@@ -4245,7 +4245,7 @@ maybe_canonicalize_mem_ref_addr (tree *t
 				       TREE_TYPE (*t),
 				       TREE_OPERAND (TREE_OPERAND (*t, 0), 0),
 				       TYPE_SIZE (TREE_TYPE (*t)),
-				       wide_int_to_tree (sizetype, idx));
+				       wide_int_to_tree (bitsizetype, idx));
 		      res = true;
 		    }
 		}
Index: gcc/tree-cfg.c
===================================================================
--- gcc/tree-cfg.c	(revision 250620)
+++ gcc/tree-cfg.c	(working copy)
@@ -3053,7 +3053,9 @@ verify_expr (tree *tp, int *walk_subtree
 	  tree t1 = TREE_OPERAND (t, 1);
 	  tree t2 = TREE_OPERAND (t, 2);
 	  if (!tree_fits_uhwi_p (t1)
-	      || !tree_fits_uhwi_p (t2))
+	      || !tree_fits_uhwi_p (t2)
+	      || !types_compatible_p (bitsizetype, TREE_TYPE (t1))
+	      || !types_compatible_p (bitsizetype, TREE_TYPE (t2)))
 	    {
 	      error ("invalid position or size operand to BIT_FIELD_REF");
 	      return t;
@@ -4247,6 +4249,7 @@ verify_gimple_assign_ternary (gassign *s
 	  return true;
 	}
       if (! tree_fits_uhwi_p (rhs3)
+	  || ! types_compatible_p (bitsizetype, TREE_TYPE (rhs3))
 	  || ! tree_fits_uhwi_p (TYPE_SIZE (rhs2_type)))
 	{
 	  error ("invalid position or size in BIT_INSERT_EXPR");
Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	(revision 250625)
+++ gcc/fold-const.c	(working copy)
@@ -3936,7 +3936,7 @@ make_bit_field_ref (location_t loc, tree
     bftype = build_nonstandard_integer_type (bitsize, 0);
 
   result = build3_loc (loc, BIT_FIELD_REF, bftype, inner,
-		       size_int (bitsize), bitsize_int (bitpos));
+		       bitsize_int (bitsize), bitsize_int (bitpos));
   REF_REVERSE_STORAGE_ORDER (result) = reversep;
 
   if (bftype != type)
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	(revision 250625)
+++ gcc/tree-vect-stmts.c	(working copy)
@@ -3531,7 +3531,7 @@ vectorizable_simd_clone_call (gimple *st
 		      arginfo[i].op = vec_oprnd0;
 		      vec_oprnd0
 			= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
-				  size_int (prec),
+				  bitsize_int (prec),
 				  bitsize_int ((m & (k - 1)) * prec));
 		      new_stmt
 			= gimple_build_assign (make_ssa_name (atype),
@@ -3692,7 +3692,7 @@ vectorizable_simd_clone_call (gimple *st
 		    }
 		  else
 		    t = build3 (BIT_FIELD_REF, vectype, new_temp,
-				size_int (prec), bitsize_int (l * prec));
+				bitsize_int (prec), bitsize_int (l * prec));
 		  new_stmt
 		    = gimple_build_assign (make_ssa_name (vectype), t);
 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-07-28 11:26 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-27 13:51 [PATCH][2/2] Fix PR81502 Richard Biener
2017-07-27 15:59 ` Andrew Pinski
2017-07-28 11:26   ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).