* [PATCH][2/2] Fix PR81502
@ 2017-07-27 13:51 Richard Biener
2017-07-27 15:59 ` Andrew Pinski
0 siblings, 1 reply; 3+ messages in thread
From: Richard Biener @ 2017-07-27 13:51 UTC (permalink / raw)
To: gcc-patches
I am testing the following additional pattern for match.pd to fix
PR81502 resulting in the desired optimization to
bar:
.LFB526:
.cfi_startproc
movl %edi, %eax
ret
the pattern optimizes a BIT_FIELD_REF on a BIT_INSERT_EXPR by
either extracting from the destination or the inserted value.
Bootstrap and regtest running on x86_64-unknown-linux-gnu.
Richard.
2017-07-27 Richard Biener <rguenther@suse.de>
PR tree-optimization/81502
* match.pd: Add pattern combining BIT_INSERT_EXPR with
BIT_FIELD_REF.
* gcc.target/i386/pr81502.c: New testcase.
Index: gcc/match.pd
===================================================================
*** gcc/match.pd (revision 250620)
--- gcc/match.pd (working copy)
*************** DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
*** 4178,4180 ****
--- 4178,4195 ----
{ CONSTRUCTOR_ELT (ctor, idx / k)->value; })
(BIT_FIELD_REF { CONSTRUCTOR_ELT (ctor, idx / k)->value; }
@1 { bitsize_int ((idx % k) * width); })))))))))
+
+ /* Simplify a bit extraction from a bit insertion for the cases with
+ the inserted element fully covering the extraction or the insertion
+ not touching the extraction. */
+ (simplify
+ (BIT_FIELD_REF (bit_insert @0 @1 @ipos) @rsize @rpos)
+ (switch
+ (if (wi::leu_p (@ipos, @rpos)
+ && wi::leu_p (wi::add (@rpos, @rsize),
+ wi::add (@ipos, TYPE_PRECISION (TREE_TYPE (@1)))))
+ (BIT_FIELD_REF @1 @rsize { wide_int_to_tree (bitsizetype,
+ wi::sub (@rpos, @ipos)); }))
+ (if (wi::geu_p (@ipos, wi::add (@rpos, @rsize))
+ || wi::geu_p (@rpos, wi::add (@ipos, TYPE_PRECISION (TREE_TYPE (@1)))))
+ (BIT_FIELD_REF @0 @rsize @rpos))))
Index: gcc/testsuite/gcc.target/i386/pr81502.c
===================================================================
*** gcc/testsuite/gcc.target/i386/pr81502.c (nonexistent)
--- gcc/testsuite/gcc.target/i386/pr81502.c (working copy)
***************
*** 0 ****
--- 1,34 ----
+ /* { dg-do compile { target lp64 } } */
+ /* { dg-options "-O2 -msse2" } */
+
+ #include <emmintrin.h>
+
+ #define SIZE (sizeof (void *))
+
+ static int foo(unsigned char (*foo)[SIZE])
+ {
+ __m128i acc = _mm_set_epi32(0, 0, 0, 0);
+ size_t i = 0;
+ for(; i + sizeof(__m128i) <= SIZE; i += sizeof(__m128i)) {
+ __m128i word;
+ __builtin_memcpy(&word, foo + i, sizeof(__m128i));
+ acc = _mm_add_epi32(word, acc);
+ }
+ if (i != SIZE) {
+ __m128i word = _mm_set_epi32(0, 0, 0, 0);
+ __builtin_memcpy(&word, foo + i, SIZE - i); // (1)
+ acc = _mm_add_epi32(word, acc);
+ }
+ int res;
+ __builtin_memcpy(&res, &acc, sizeof(res));
+ return res;
+ }
+
+ int bar(void *ptr)
+ {
+ unsigned char buf[SIZE];
+ __builtin_memcpy(buf, &ptr, SIZE);
+ return foo((unsigned char(*)[SIZE])buf);
+ }
+
+ /* { dg-final { scan-assembler-times "mov" 1 } } */
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH][2/2] Fix PR81502
2017-07-27 13:51 [PATCH][2/2] Fix PR81502 Richard Biener
@ 2017-07-27 15:59 ` Andrew Pinski
2017-07-28 11:26 ` Richard Biener
0 siblings, 1 reply; 3+ messages in thread
From: Andrew Pinski @ 2017-07-27 15:59 UTC (permalink / raw)
To: Richard Biener; +Cc: GCC Patches
On Thu, Jul 27, 2017 at 6:50 AM, Richard Biener <rguenther@suse.de> wrote:
>
> I am testing the following additional pattern for match.pd to fix
> PR81502 resulting in the desired optimization to
>
> bar:
> .LFB526:
> .cfi_startproc
> movl %edi, %eax
> ret
>
> the pattern optimizes a BIT_FIELD_REF on a BIT_INSERT_EXPR by
> either extracting from the destination or the inserted value.
Note this optimization pattern was on my list to implement for
bit-field optimizations after lowering.
Thanks,
Andrew Pinski
>
> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>
> Richard.
>
> 2017-07-27 Richard Biener <rguenther@suse.de>
>
> PR tree-optimization/81502
> * match.pd: Add pattern combining BIT_INSERT_EXPR with
> BIT_FIELD_REF.
>
> * gcc.target/i386/pr81502.c: New testcase.
>
> Index: gcc/match.pd
> ===================================================================
> *** gcc/match.pd (revision 250620)
> --- gcc/match.pd (working copy)
> *************** DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> *** 4178,4180 ****
> --- 4178,4195 ----
> { CONSTRUCTOR_ELT (ctor, idx / k)->value; })
> (BIT_FIELD_REF { CONSTRUCTOR_ELT (ctor, idx / k)->value; }
> @1 { bitsize_int ((idx % k) * width); })))))))))
> +
> + /* Simplify a bit extraction from a bit insertion for the cases with
> + the inserted element fully covering the extraction or the insertion
> + not touching the extraction. */
> + (simplify
> + (BIT_FIELD_REF (bit_insert @0 @1 @ipos) @rsize @rpos)
> + (switch
> + (if (wi::leu_p (@ipos, @rpos)
> + && wi::leu_p (wi::add (@rpos, @rsize),
> + wi::add (@ipos, TYPE_PRECISION (TREE_TYPE (@1)))))
> + (BIT_FIELD_REF @1 @rsize { wide_int_to_tree (bitsizetype,
> + wi::sub (@rpos, @ipos)); }))
> + (if (wi::geu_p (@ipos, wi::add (@rpos, @rsize))
> + || wi::geu_p (@rpos, wi::add (@ipos, TYPE_PRECISION (TREE_TYPE (@1)))))
> + (BIT_FIELD_REF @0 @rsize @rpos))))
> Index: gcc/testsuite/gcc.target/i386/pr81502.c
> ===================================================================
> *** gcc/testsuite/gcc.target/i386/pr81502.c (nonexistent)
> --- gcc/testsuite/gcc.target/i386/pr81502.c (working copy)
> ***************
> *** 0 ****
> --- 1,34 ----
> + /* { dg-do compile { target lp64 } } */
> + /* { dg-options "-O2 -msse2" } */
> +
> + #include <emmintrin.h>
> +
> + #define SIZE (sizeof (void *))
> +
> + static int foo(unsigned char (*foo)[SIZE])
> + {
> + __m128i acc = _mm_set_epi32(0, 0, 0, 0);
> + size_t i = 0;
> + for(; i + sizeof(__m128i) <= SIZE; i += sizeof(__m128i)) {
> + __m128i word;
> + __builtin_memcpy(&word, foo + i, sizeof(__m128i));
> + acc = _mm_add_epi32(word, acc);
> + }
> + if (i != SIZE) {
> + __m128i word = _mm_set_epi32(0, 0, 0, 0);
> + __builtin_memcpy(&word, foo + i, SIZE - i); // (1)
> + acc = _mm_add_epi32(word, acc);
> + }
> + int res;
> + __builtin_memcpy(&res, &acc, sizeof(res));
> + return res;
> + }
> +
> + int bar(void *ptr)
> + {
> + unsigned char buf[SIZE];
> + __builtin_memcpy(buf, &ptr, SIZE);
> + return foo((unsigned char(*)[SIZE])buf);
> + }
> +
> + /* { dg-final { scan-assembler-times "mov" 1 } } */
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH][2/2] Fix PR81502
2017-07-27 15:59 ` Andrew Pinski
@ 2017-07-28 11:26 ` Richard Biener
0 siblings, 0 replies; 3+ messages in thread
From: Richard Biener @ 2017-07-28 11:26 UTC (permalink / raw)
To: Andrew Pinski; +Cc: GCC Patches
On Thu, 27 Jul 2017, Andrew Pinski wrote:
> On Thu, Jul 27, 2017 at 6:50 AM, Richard Biener <rguenther@suse.de> wrote:
> >
> > I am testing the following additional pattern for match.pd to fix
> > PR81502 resulting in the desired optimization to
> >
> > bar:
> > .LFB526:
> > .cfi_startproc
> > movl %edi, %eax
> > ret
> >
> > the pattern optimizes a BIT_FIELD_REF on a BIT_INSERT_EXPR by
> > either extracting from the destination or the inserted value.
>
> Note this optimization pattern was on my list to implement for
> bit-field optimizations after lowering.
Had to do some adjustments and ended up enforcing bitsizetype operands
for BIT_INSERT_EXPR and BIT_FIELD_REF.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
Richard.
2017-07-28 Richard Biener <rguenther@suse.de>
PR tree-optimization/81502
* match.pd: Add pattern combining BIT_INSERT_EXPR with
BIT_FIELD_REF.
* tree-cfg.c (verify_expr): Verify types of BIT_FIELD_REF
size/pos operands.
(verify_gimple_assign_ternary): Likewise for BIT_INSERT_EXPR pos.
* gimple-fold.c (maybe_canonicalize_mem_ref_addr): Use bitsizetype
for BIT_FIELD_REF args.
* fold-const.c (make_bit_field_ref): Likewise.
* tree-vect-stmts.c (vectorizable_simd_clone_call): Likewise.
* gcc.target/i386/pr81502.c: New testcase.
Index: gcc/testsuite/gcc.target/i386/pr81502.c
===================================================================
*** gcc/testsuite/gcc.target/i386/pr81502.c (nonexistent)
--- gcc/testsuite/gcc.target/i386/pr81502.c (working copy)
***************
*** 0 ****
--- 1,34 ----
+ /* { dg-do compile { target lp64 } } */
+ /* { dg-options "-O2 -msse2" } */
+
+ #include <emmintrin.h>
+
+ #define SIZE (sizeof (void *))
+
+ static int foo(unsigned char (*foo)[SIZE])
+ {
+ __m128i acc = _mm_set_epi32(0, 0, 0, 0);
+ size_t i = 0;
+ for(; i + sizeof(__m128i) <= SIZE; i += sizeof(__m128i)) {
+ __m128i word;
+ __builtin_memcpy(&word, foo + i, sizeof(__m128i));
+ acc = _mm_add_epi32(word, acc);
+ }
+ if (i != SIZE) {
+ __m128i word = _mm_set_epi32(0, 0, 0, 0);
+ __builtin_memcpy(&word, foo + i, SIZE - i); // (1)
+ acc = _mm_add_epi32(word, acc);
+ }
+ int res;
+ __builtin_memcpy(&res, &acc, sizeof(res));
+ return res;
+ }
+
+ int bar(void *ptr)
+ {
+ unsigned char buf[SIZE];
+ __builtin_memcpy(buf, &ptr, SIZE);
+ return foo((unsigned char(*)[SIZE])buf);
+ }
+
+ /* { dg-final { scan-assembler-times "mov" 1 } } */
Index: gcc/match.pd
===================================================================
--- gcc/match.pd (revision 250625)
+++ gcc/match.pd (working copy)
@@ -4178,3 +4178,25 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
{ CONSTRUCTOR_ELT (ctor, idx / k)->value; })
(BIT_FIELD_REF { CONSTRUCTOR_ELT (ctor, idx / k)->value; }
@1 { bitsize_int ((idx % k) * width); })))))))))
+
+/* Simplify a bit extraction from a bit insertion for the cases with
+ the inserted element fully covering the extraction or the insertion
+ not touching the extraction. */
+(simplify
+ (BIT_FIELD_REF (bit_insert @0 @1 @ipos) @rsize @rpos)
+ (with
+ {
+ unsigned HOST_WIDE_INT isize;
+ if (INTEGRAL_TYPE_P (TREE_TYPE (@1)))
+ isize = TYPE_PRECISION (TREE_TYPE (@1));
+ else
+ isize = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (@1)));
+ }
+ (switch
+ (if (wi::leu_p (@ipos, @rpos)
+ && wi::leu_p (wi::add (@rpos, @rsize), wi::add (@ipos, isize)))
+ (BIT_FIELD_REF @1 @rsize { wide_int_to_tree (bitsizetype,
+ wi::sub (@rpos, @ipos)); }))
+ (if (wi::geu_p (@ipos, wi::add (@rpos, @rsize))
+ || wi::geu_p (@rpos, wi::add (@ipos, isize)))
+ (BIT_FIELD_REF @0 @rsize @rpos)))))
Index: gcc/gimple-fold.c
===================================================================
--- gcc/gimple-fold.c (revision 250620)
+++ gcc/gimple-fold.c (working copy)
@@ -4245,7 +4245,7 @@ maybe_canonicalize_mem_ref_addr (tree *t
TREE_TYPE (*t),
TREE_OPERAND (TREE_OPERAND (*t, 0), 0),
TYPE_SIZE (TREE_TYPE (*t)),
- wide_int_to_tree (sizetype, idx));
+ wide_int_to_tree (bitsizetype, idx));
res = true;
}
}
Index: gcc/tree-cfg.c
===================================================================
--- gcc/tree-cfg.c (revision 250620)
+++ gcc/tree-cfg.c (working copy)
@@ -3053,7 +3053,9 @@ verify_expr (tree *tp, int *walk_subtree
tree t1 = TREE_OPERAND (t, 1);
tree t2 = TREE_OPERAND (t, 2);
if (!tree_fits_uhwi_p (t1)
- || !tree_fits_uhwi_p (t2))
+ || !tree_fits_uhwi_p (t2)
+ || !types_compatible_p (bitsizetype, TREE_TYPE (t1))
+ || !types_compatible_p (bitsizetype, TREE_TYPE (t2)))
{
error ("invalid position or size operand to BIT_FIELD_REF");
return t;
@@ -4247,6 +4249,7 @@ verify_gimple_assign_ternary (gassign *s
return true;
}
if (! tree_fits_uhwi_p (rhs3)
+ || ! types_compatible_p (bitsizetype, TREE_TYPE (rhs3))
|| ! tree_fits_uhwi_p (TYPE_SIZE (rhs2_type)))
{
error ("invalid position or size in BIT_INSERT_EXPR");
Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c (revision 250625)
+++ gcc/fold-const.c (working copy)
@@ -3936,7 +3936,7 @@ make_bit_field_ref (location_t loc, tree
bftype = build_nonstandard_integer_type (bitsize, 0);
result = build3_loc (loc, BIT_FIELD_REF, bftype, inner,
- size_int (bitsize), bitsize_int (bitpos));
+ bitsize_int (bitsize), bitsize_int (bitpos));
REF_REVERSE_STORAGE_ORDER (result) = reversep;
if (bftype != type)
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c (revision 250625)
+++ gcc/tree-vect-stmts.c (working copy)
@@ -3531,7 +3531,7 @@ vectorizable_simd_clone_call (gimple *st
arginfo[i].op = vec_oprnd0;
vec_oprnd0
= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
- size_int (prec),
+ bitsize_int (prec),
bitsize_int ((m & (k - 1)) * prec));
new_stmt
= gimple_build_assign (make_ssa_name (atype),
@@ -3692,7 +3692,7 @@ vectorizable_simd_clone_call (gimple *st
}
else
t = build3 (BIT_FIELD_REF, vectype, new_temp,
- size_int (prec), bitsize_int (l * prec));
+ bitsize_int (prec), bitsize_int (l * prec));
new_stmt
= gimple_build_assign (make_ssa_name (vectype), t);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2017-07-28 11:26 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-27 13:51 [PATCH][2/2] Fix PR81502 Richard Biener
2017-07-27 15:59 ` Andrew Pinski
2017-07-28 11:26 ` Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).