public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][RFC] Fix PR61473, inline small memcpy/memmove during tree opts
@ 2014-06-12 10:15 Richard Biener
  2014-06-12 20:32 ` Jeff Law
  0 siblings, 1 reply; 13+ messages in thread
From: Richard Biener @ 2014-06-12 10:15 UTC (permalink / raw)
  To: gcc-patches


This implements the requested inlining of memmove for possibly
overlapping arguments by doing first all loads and then all stores.
The easiest place is to do this in memory op folding where we already
perform inlining of some memcpy cases (but fail to do the equivalent
memcpy optimization - though RTL expansion later does it).

The following patch restricts us to max. word-mode size.  Ideally
we'd have a way to check for the number of real instructions needed
to load an (aligned) value of size N.  But maybe we don't care
and are fine with doing multiple loads / stores?

Anyway, the following is conservative (but maybe not enough).

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

These transforms don't really belong to GENERIC folding (they
also run at -O0 ...), similar to most builtin foldings.  But this
patch is not to change that.

Any comments on the size/cost issue?

Thanks,
Richard.

2014-06-12  Richard Biener  <rguenther@suse.de>

	PR middle-end/61473
	* builtins.c (fold_builtin_memory_op): Inline memory moves
	that can be implemented with a single load followed by a
	single store.

	* gcc.dg/memmove-4.c: New testcase.

Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c	(revision 211449)
+++ gcc/builtins.c	(working copy)
@@ -8637,11 +8637,53 @@ fold_builtin_memory_op (location_t loc,
       unsigned int src_align, dest_align;
       tree off0;
 
-      if (endp == 3)
+      /* Build accesses at offset zero with a ref-all character type.  */
+      off0 = build_int_cst (build_pointer_type_for_mode (char_type_node,
+							 ptr_mode, true), 0);
+
+      /* If we can perform the copy efficiently with first doing all loads
+         and then all stores inline it that way.  Currently efficiently
+	 means that we can load all the memory into a single integer
+	 register and thus limited to word_mode size.  Ideally we'd have
+	 a way to query the largest mode that we can load/store with
+	 a signle instruction.  */
+      src_align = get_pointer_alignment (src);
+      dest_align = get_pointer_alignment (dest);
+      if (tree_fits_uhwi_p (len)
+	  && compare_tree_int (len, BITS_PER_WORD / 8) <= 0)
 	{
-	  src_align = get_pointer_alignment (src);
-	  dest_align = get_pointer_alignment (dest);
+	  unsigned ilen = tree_to_uhwi (len);
+	  if (exact_log2 (ilen) != -1)
+	    {
+	      tree type = lang_hooks.types.type_for_size (ilen * 8, 1);
+	      if (type
+		  && TYPE_MODE (type) != BLKmode
+		  && (GET_MODE_SIZE (TYPE_MODE (type)) * BITS_PER_UNIT
+		      == ilen * 8)
+		  /* If the pointers are not aligned we must be able to
+		     emit an unaligned load.  */
+		  && ((src_align >= GET_MODE_ALIGNMENT (TYPE_MODE (type))
+		       && dest_align >= GET_MODE_ALIGNMENT (TYPE_MODE (type)))
+		      || !SLOW_UNALIGNED_ACCESS (TYPE_MODE (type),
+						 MIN (src_align, dest_align))))
+		{
+		  tree srctype = type;
+		  tree desttype = type;
+		  if (src_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
+		    srctype = build_aligned_type (type, src_align);
+		  if (dest_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
+		    desttype = build_aligned_type (type, dest_align);
+		  destvar = fold_build2 (MEM_REF, desttype, dest, off0);
+		  expr = build2 (MODIFY_EXPR, type,
+				 fold_build2 (MEM_REF, desttype, dest, off0),
+				 fold_build2 (MEM_REF, srctype, src, off0));
+		  goto done;
+		}
+	    }
+	}
 
+      if (endp == 3)
+	{
 	  /* Both DEST and SRC must be pointer types.
 	     ??? This is what old code did.  Is the testing for pointer types
 	     really mandatory?
@@ -8818,10 +8860,6 @@ fold_builtin_memory_op (location_t loc,
       if (!ignore)
         dest = builtin_save_expr (dest);
 
-      /* Build accesses at offset zero with a ref-all character type.  */
-      off0 = build_int_cst (build_pointer_type_for_mode (char_type_node,
-							 ptr_mode, true), 0);
-
       destvar = dest;
       STRIP_NOPS (destvar);
       if (TREE_CODE (destvar) == ADDR_EXPR
@@ -8888,6 +8926,7 @@ fold_builtin_memory_op (location_t loc,
       expr = build2 (MODIFY_EXPR, TREE_TYPE (destvar), destvar, srcvar);
     }
 
+done:
   if (ignore)
     return expr;
 
Index: gcc/testsuite/gcc.dg/memmove-4.c
===================================================================
--- gcc/testsuite/gcc.dg/memmove-4.c	(revision 0)
+++ gcc/testsuite/gcc.dg/memmove-4.c	(working copy)
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+typedef int w __attribute__((mode(word)));
+
+void b(char *a, char *b, int i)
+{
+  __builtin_memmove (&a[i], &b[i], sizeof(w));
+}
+
+/* { dg-final { scan-tree-dump-not "memmove" "optimized" { xfail { ! non_strict_align } } } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH][RFC] Fix PR61473, inline small memcpy/memmove during tree opts
  2014-06-12 10:15 [PATCH][RFC] Fix PR61473, inline small memcpy/memmove during tree opts Richard Biener
@ 2014-06-12 20:32 ` Jeff Law
  2014-06-27 11:52   ` Richard Biener
  0 siblings, 1 reply; 13+ messages in thread
From: Jeff Law @ 2014-06-12 20:32 UTC (permalink / raw)
  To: Richard Biener, gcc-patches

On 06/12/14 04:12, Richard Biener wrote:
>
> This implements the requested inlining of memmove for possibly
> overlapping arguments by doing first all loads and then all stores.
> The easiest place is to do this in memory op folding where we already
> perform inlining of some memcpy cases (but fail to do the equivalent
> memcpy optimization - though RTL expansion later does it).
>
> The following patch restricts us to max. word-mode size.  Ideally
> we'd have a way to check for the number of real instructions needed
> to load an (aligned) value of size N.  But maybe we don't care
> and are fine with doing multiple loads / stores?
>
> Anyway, the following is conservative (but maybe not enough).
>
> Bootstrap / regtest running on x86_64-unknown-linux-gnu.
>
> These transforms don't really belong to GENERIC folding (they
> also run at -O0 ...), similar to most builtin foldings.  But this
> patch is not to change that.
>
> Any comments on the size/cost issue?
I recall seeing something in one of the BZ databases that asked for 
double-word to be expanded inline.  Presumably the reporter's code did 
lots of double-word things of this nature.

Obviously someone else might want quad-word and so-on.  However, double 
words seem like a very reasonable request.

jeff

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH][RFC] Fix PR61473, inline small memcpy/memmove during tree opts
  2014-06-12 20:32 ` Jeff Law
@ 2014-06-27 11:52   ` Richard Biener
  2014-06-27 11:57     ` Jakub Jelinek
  0 siblings, 1 reply; 13+ messages in thread
From: Richard Biener @ 2014-06-27 11:52 UTC (permalink / raw)
  To: Jeff Law; +Cc: gcc-patches

On Thu, 12 Jun 2014, Jeff Law wrote:

> On 06/12/14 04:12, Richard Biener wrote:
> > 
> > This implements the requested inlining of memmove for possibly
> > overlapping arguments by doing first all loads and then all stores.
> > The easiest place is to do this in memory op folding where we already
> > perform inlining of some memcpy cases (but fail to do the equivalent
> > memcpy optimization - though RTL expansion later does it).
> > 
> > The following patch restricts us to max. word-mode size.  Ideally
> > we'd have a way to check for the number of real instructions needed
> > to load an (aligned) value of size N.  But maybe we don't care
> > and are fine with doing multiple loads / stores?
> > 
> > Anyway, the following is conservative (but maybe not enough).
> > 
> > Bootstrap / regtest running on x86_64-unknown-linux-gnu.
> > 
> > These transforms don't really belong to GENERIC folding (they
> > also run at -O0 ...), similar to most builtin foldings.  But this
> > patch is not to change that.
> > 
> > Any comments on the size/cost issue?
> I recall seeing something in one of the BZ databases that asked for 
> double-word to be expanded inline.  Presumably the reporter's code did 
> lots of double-word things of this nature.
> 
> Obviously someone else might want quad-word and so-on.  However, double 
> words seem like a very reasonable request.

Hmm, yeah.  Meanwhile I found the MOVE_MAX target macro which gives
me what I was looking for (max memory-reg move size with a single
instruction).  Eventually increasing the maximum number of
loads/stores to two also allows handling of (some) non-power-of-two sizes
and some more cases of unaligned accesses on SLOW_UNALIGNED_ACCESS
targets.  But then we're re-implementing move_by_pieces on GIMPLE ... ;)
(maybe not totally unreasonable?).

I'm going to go for a single load/store and MOVE_MAX for now - I
have quite some fallout to deal with anyway (analyzed strlenopt-1.c
FAIL only, the other strlenopt cases are probably similar)

FAIL: gcc.dg/strlenopt-1.c scan-tree-dump-times strlen "strlen \\\\(" 2
FAIL: gcc.dg/strlenopt-1.c scan-tree-dump-times strlen "memcpy \\\\(" 4

we generate (note unfolded read from "/"!)

  _15 = MEM[(char * {ref-all})"/"];
  MEM[(char * {ref-all})_14] = _15;

where strlenopt doesn't catch (short *)_14 = (short)"/\0" (maybe too
much asked, given endianess and so ...).

FAIL: gcc.dg/strlenopt-10.c scan-tree-dump-times strlen "strlen \\\\(" 2
FAIL: gcc.dg/strlenopt-10.c scan-tree-dump-times strlen "memcpy \\\\(" 8
FAIL: gcc.dg/strlenopt-10.c scan-tree-dump-times strlen "strchr \\\\(" 0
FAIL: gcc.dg/strlenopt-10.c scan-tree-dump-times strlen "memcpy 
\\\\([^\\n\\r]*,
 1\\\\)" 1
FAIL: gcc.dg/strlenopt-18g.c scan-tree-dump-times strlen "memcpy \\\\(" 4
FAIL: gcc.dg/strlenopt-19.c scan-tree-dump-times strlen "memcpy \\\\(" 6
FAIL: gcc.dg/strlenopt-1f.c scan-tree-dump-times strlen "strlen \\\\(" 2
FAIL: gcc.dg/strlenopt-1f.c scan-tree-dump-times strlen "memcpy \\\\(" 4
FAIL: gcc.dg/strlenopt-2.c scan-tree-dump-times strlen "strlen \\\\(" 2
FAIL: gcc.dg/strlenopt-2.c scan-tree-dump-times strlen "memcpy \\\\(" 5
FAIL: gcc.dg/strlenopt-20.c scan-tree-dump-times strlen "memcpy \\\\(" 4
FAIL: gcc.dg/strlenopt-21.c scan-tree-dump-times strlen "memcpy \\\\(" 3
FAIL: gcc.dg/strlenopt-6.c scan-tree-dump-times strlen "memcpy \\\\(" 7
FAIL: gcc.dg/strlenopt-6.c scan-tree-dump-times strlen "strchr \\\\(" 0
FAIL: gcc.dg/strlenopt-7.c scan-tree-dump-times strlen "strlen \\\\(" 0
FAIL: gcc.dg/strlenopt-7.c scan-tree-dump-times strlen "memcpy \\\\(" 2
FAIL: gcc.dg/strlenopt-7.c scan-tree-dump-times optimized "return 3;" 1
FAIL: gcc.dg/strlenopt-8.c scan-tree-dump-times strlen "strlen \\\\(" 0
FAIL: gcc.dg/strlenopt-8.c scan-tree-dump-times strlen "memcpy \\\\(" 4
FAIL: gcc.dg/strlenopt-9.c scan-tree-dump-times strlen "memcpy \\\\(" 6

FAIL: gfortran.dg/coarray_lib_realloc_1.f90  -O   scan-tree-dump-times 
original
"__builtin_memcpy" 2

Both memcpy calls are now plain assignments.  The testcase needs
changing to test what it really wanted to test ...

FAIL: gfortran.dg/pr45636.f90  -O   scan-tree-dump-times forwprop2 
"memset" 0

This shows that funny DSE patterns in forwprop no longer work
when the store is not a memcpy but a plain assignment.  Of course
DSE also doesn't remove dead memset()s.  The testcase also shows
that we could/should handle memset() in the same way if it only
requires a single store from a constant.

Thanks,
Richard.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH][RFC] Fix PR61473, inline small memcpy/memmove during tree opts
  2014-06-27 11:52   ` Richard Biener
@ 2014-06-27 11:57     ` Jakub Jelinek
  2014-06-27 12:01       ` Richard Biener
  2014-06-27 15:44       ` Jeff Law
  0 siblings, 2 replies; 13+ messages in thread
From: Jakub Jelinek @ 2014-06-27 11:57 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jeff Law, gcc-patches

On Fri, Jun 27, 2014 at 01:49:38PM +0200, Richard Biener wrote:
> I'm going to go for a single load/store and MOVE_MAX for now - I
> have quite some fallout to deal with anyway (analyzed strlenopt-1.c
> FAIL only, the other strlenopt cases are probably similar)
> 
> FAIL: gcc.dg/strlenopt-1.c scan-tree-dump-times strlen "strlen \\\\(" 2
> FAIL: gcc.dg/strlenopt-1.c scan-tree-dump-times strlen "memcpy \\\\(" 4

But is it really desirable to do this kind of expansion so early on GIMPLE?
Doing it during folding is e.g. very much harmful to offloading, the
offloading target might have different preferences from the host.
So, if it is really beneficial (do you have some benchmark that shows
that?), can it be done e.g. in the strlen pass or even somewhat later?

	Jakub

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH][RFC] Fix PR61473, inline small memcpy/memmove during tree opts
  2014-06-27 11:57     ` Jakub Jelinek
@ 2014-06-27 12:01       ` Richard Biener
  2014-07-10 14:33         ` Richard Biener
  2014-06-27 15:44       ` Jeff Law
  1 sibling, 1 reply; 13+ messages in thread
From: Richard Biener @ 2014-06-27 12:01 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Jeff Law, gcc-patches

On Fri, 27 Jun 2014, Jakub Jelinek wrote:

> On Fri, Jun 27, 2014 at 01:49:38PM +0200, Richard Biener wrote:
> > I'm going to go for a single load/store and MOVE_MAX for now - I
> > have quite some fallout to deal with anyway (analyzed strlenopt-1.c
> > FAIL only, the other strlenopt cases are probably similar)
> > 
> > FAIL: gcc.dg/strlenopt-1.c scan-tree-dump-times strlen "strlen \\\\(" 2
> > FAIL: gcc.dg/strlenopt-1.c scan-tree-dump-times strlen "memcpy \\\\(" 4
> 
> But is it really desirable to do this kind of expansion so early on GIMPLE?
> Doing it during folding is e.g. very much harmful to offloading, the
> offloading target might have different preferences from the host.
> So, if it is really beneficial (do you have some benchmark that shows
> that?), can it be done e.g. in the strlen pass or even somewhat later?

strlen pass now runs very very late, for PR61619 it is important
before some constant propagation happening before vectorization.

But yes, it's not necessary doing that on GENERIC (nor is any of
those foldings - but as said, it's not the objective of the patch
to change where we do such optimizations).

Oh, and offloading targets always will have the issue facing
IL optimized for another target (LOGICAL_OP_NON_SHORT_CIRCUIT
for example).

Richard.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH][RFC] Fix PR61473, inline small memcpy/memmove during tree opts
  2014-06-27 11:57     ` Jakub Jelinek
  2014-06-27 12:01       ` Richard Biener
@ 2014-06-27 15:44       ` Jeff Law
  1 sibling, 0 replies; 13+ messages in thread
From: Jeff Law @ 2014-06-27 15:44 UTC (permalink / raw)
  To: Jakub Jelinek, Richard Biener; +Cc: gcc-patches

On 06/27/14 05:56, Jakub Jelinek wrote:
> On Fri, Jun 27, 2014 at 01:49:38PM +0200, Richard Biener wrote:
>> I'm going to go for a single load/store and MOVE_MAX for now - I
>> have quite some fallout to deal with anyway (analyzed strlenopt-1.c
>> FAIL only, the other strlenopt cases are probably similar)
>>
>> FAIL: gcc.dg/strlenopt-1.c scan-tree-dump-times strlen "strlen \\\\(" 2
>> FAIL: gcc.dg/strlenopt-1.c scan-tree-dump-times strlen "memcpy \\\\(" 4
>
> But is it really desirable to do this kind of expansion so early on GIMPLE?
> Doing it during folding is e.g. very much harmful to offloading, the
> offloading target might have different preferences from the host.
> So, if it is really beneficial (do you have some benchmark that shows
> that?), can it be done e.g. in the strlen pass or even somewhat later?
You want to do it early enough in the pipeline to expose those to the 
bulk of the gimple optimizers.  My recollection was that was the biggest 
win for the customer case I looked at -- suddenly we can expose those 
objects to CSE, DSE & friends and lots of crud just starts to go away.

Jeff

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH][RFC] Fix PR61473, inline small memcpy/memmove during tree opts
  2014-06-27 12:01       ` Richard Biener
@ 2014-07-10 14:33         ` Richard Biener
  2014-07-10 15:13           ` Jakub Jelinek
  0 siblings, 1 reply; 13+ messages in thread
From: Richard Biener @ 2014-07-10 14:33 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Jeff Law, gcc-patches

On Fri, 27 Jun 2014, Richard Biener wrote:

> On Fri, 27 Jun 2014, Jakub Jelinek wrote:
> 
> > On Fri, Jun 27, 2014 at 01:49:38PM +0200, Richard Biener wrote:
> > > I'm going to go for a single load/store and MOVE_MAX for now - I
> > > have quite some fallout to deal with anyway (analyzed strlenopt-1.c
> > > FAIL only, the other strlenopt cases are probably similar)
> > > 
> > > FAIL: gcc.dg/strlenopt-1.c scan-tree-dump-times strlen "strlen \\\\(" 2
> > > FAIL: gcc.dg/strlenopt-1.c scan-tree-dump-times strlen "memcpy \\\\(" 4
> > 
> > But is it really desirable to do this kind of expansion so early on GIMPLE?
> > Doing it during folding is e.g. very much harmful to offloading, the
> > offloading target might have different preferences from the host.
> > So, if it is really beneficial (do you have some benchmark that shows
> > that?), can it be done e.g. in the strlen pass or even somewhat later?
> 
> strlen pass now runs very very late, for PR61619 it is important
> before some constant propagation happening before vectorization.
> 
> But yes, it's not necessary doing that on GENERIC (nor is any of
> those foldings - but as said, it's not the objective of the patch
> to change where we do such optimizations).
> 
> Oh, and offloading targets always will have the issue facing
> IL optimized for another target (LOGICAL_OP_NON_SHORT_CIRCUIT
> for example).

Compromise "hack" below.  It simply avoids the transform for
sources that c_strlen can compute a length of.  That "fixes" all
strlenopt testcase apart from strlenopt-8.c which does
memcpy (, flag ? "a" : "b"); which then still folds
during gimplification.  I have XFAILed that.

I've tried to comb my way through strlenopt but failed to quickly
add support for generic stores (it has very rough support for
char stores, see also PR61773).

Does the below look ok?

Thanks,
Richard.

2014-07-10  Richard Biener  <rguenther@suse.de>

	PR middle-end/61473
	* builtins.c (fold_builtin_memory_op): Inline memory moves
	that can be implemented with a single load followed by a
	single store.

	* gcc.dg/memmove-4.c: New testcase.
	* gcc.dg/strlenopt-8.c: XFAIL.
	* gfortran.dg/coarray_lib_realloc_1.f90: Adjust.

Index: gcc/builtins.c
===================================================================
*** gcc/builtins.c.orig	2014-07-10 15:34:27.787477771 +0200
--- gcc/builtins.c	2014-07-10 16:20:08.071289106 +0200
*************** fold_builtin_memory_op (location_t loc,
*** 8637,8647 ****
        unsigned int src_align, dest_align;
        tree off0;
  
!       if (endp == 3)
  	{
! 	  src_align = get_pointer_alignment (src);
! 	  dest_align = get_pointer_alignment (dest);
  
  	  /* Both DEST and SRC must be pointer types.
  	     ??? This is what old code did.  Is the testing for pointer types
  	     really mandatory?
--- 8637,8693 ----
        unsigned int src_align, dest_align;
        tree off0;
  
!       /* Build accesses at offset zero with a ref-all character type.  */
!       off0 = build_int_cst (build_pointer_type_for_mode (char_type_node,
! 							 ptr_mode, true), 0);
! 
!       /* If we can perform the copy efficiently with first doing all loads
!          and then all stores inline it that way.  Currently efficiently
! 	 means that we can load all the memory into a single integer
! 	 register which is what MOVE_MAX gives us.  */
!       src_align = get_pointer_alignment (src);
!       dest_align = get_pointer_alignment (dest);
!       if (tree_fits_uhwi_p (len)
! 	  && compare_tree_int (len, MOVE_MAX) <= 0
! 	  /* ???  Don't transform copies from strings with known length this
! 	     confuses the tree-ssa-strlen.c.  This doesn't handle
! 	     the case in gcc.dg/strlenopt-8.c which is XFAILed for that
! 	     reason.  */
! 	  && !c_strlen (src, 1))
  	{
! 	  unsigned ilen = tree_to_uhwi (len);
! 	  if (exact_log2 (ilen) != -1)
! 	    {
! 	      tree type = lang_hooks.types.type_for_size (ilen * 8, 1);
! 	      if (type
! 		  && TYPE_MODE (type) != BLKmode
! 		  && (GET_MODE_SIZE (TYPE_MODE (type)) * BITS_PER_UNIT
! 		      == ilen * 8)
! 		  /* If the pointers are not aligned we must be able to
! 		     emit an unaligned load.  */
! 		  && ((src_align >= GET_MODE_ALIGNMENT (TYPE_MODE (type))
! 		       && dest_align >= GET_MODE_ALIGNMENT (TYPE_MODE (type)))
! 		      || !SLOW_UNALIGNED_ACCESS (TYPE_MODE (type),
! 						 MIN (src_align, dest_align))))
! 		{
! 		  tree srctype = type;
! 		  tree desttype = type;
! 		  if (src_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
! 		    srctype = build_aligned_type (type, src_align);
! 		  if (dest_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
! 		    desttype = build_aligned_type (type, dest_align);
! 		  if (!ignore)
! 		    dest = builtin_save_expr (dest);
! 		  expr = build2 (MODIFY_EXPR, type,
! 				 fold_build2 (MEM_REF, desttype, dest, off0),
! 				 fold_build2 (MEM_REF, srctype, src, off0));
! 		  goto done;
! 		}
! 	    }
! 	}
  
+       if (endp == 3)
+ 	{
  	  /* Both DEST and SRC must be pointer types.
  	     ??? This is what old code did.  Is the testing for pointer types
  	     really mandatory?
*************** fold_builtin_memory_op (location_t loc,
*** 8818,8827 ****
        if (!ignore)
          dest = builtin_save_expr (dest);
  
-       /* Build accesses at offset zero with a ref-all character type.  */
-       off0 = build_int_cst (build_pointer_type_for_mode (char_type_node,
- 							 ptr_mode, true), 0);
- 
        destvar = dest;
        STRIP_NOPS (destvar);
        if (TREE_CODE (destvar) == ADDR_EXPR
--- 8864,8869 ----
*************** fold_builtin_memory_op (location_t loc,
*** 8888,8893 ****
--- 8930,8936 ----
        expr = build2 (MODIFY_EXPR, TREE_TYPE (destvar), destvar, srcvar);
      }
  
+ done:
    if (ignore)
      return expr;
  
Index: gcc/testsuite/gcc.dg/memmove-4.c
===================================================================
*** /dev/null	1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/memmove-4.c	2014-07-10 16:09:31.035332965 +0200
***************
*** 0 ****
--- 1,12 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O -fdump-tree-optimized" } */
+ 
+ typedef int w __attribute__((mode(word)));
+ 
+ void b(char *a, char *b, int i)
+ {
+   __builtin_memmove (&a[i], &b[i], sizeof(w));
+ }
+ 
+ /* { dg-final { scan-tree-dump-not "memmove" "optimized" { xfail { ! non_strict_align } } } } */
+ /* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/testsuite/gcc.dg/strlenopt-8.c
===================================================================
*** gcc/testsuite/gcc.dg/strlenopt-8.c.orig	2011-09-28 10:16:34.000000000 +0200
--- gcc/testsuite/gcc.dg/strlenopt-8.c	2014-07-10 16:21:26.501283706 +0200
*************** main ()
*** 43,50 ****
    return 0;
  }
  
! /* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen" } } */
! /* { dg-final { scan-tree-dump-times "memcpy \\(" 4 "strlen" } } */
  /* { dg-final { scan-tree-dump-times "strcpy \\(" 0 "strlen" } } */
  /* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */
  /* { dg-final { scan-tree-dump-times "strchr \\(" 0 "strlen" } } */
--- 43,50 ----
    return 0;
  }
  
! /* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen" { xfail *-*-* } } } */
! /* { dg-final { scan-tree-dump-times "memcpy \\(" 4 "strlen" { xfail *-*-* } } } */
  /* { dg-final { scan-tree-dump-times "strcpy \\(" 0 "strlen" } } */
  /* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */
  /* { dg-final { scan-tree-dump-times "strchr \\(" 0 "strlen" } } */
Index: gcc/testsuite/gfortran.dg/coarray_lib_realloc_1.f90
===================================================================
*** gcc/testsuite/gfortran.dg/coarray_lib_realloc_1.f90.orig	2013-08-27 11:13:18.873996208 +0200
--- gcc/testsuite/gfortran.dg/coarray_lib_realloc_1.f90	2014-07-10 16:24:00.310273117 +0200
*************** end
*** 30,35 ****
  ! { dg-final { scan-tree-dump-times "__builtin_malloc" 1 "original" } }
  
  ! But copy "ii" and "CAF":
! ! { dg-final { scan-tree-dump-times "__builtin_memcpy" 2 "original" } }
  
  ! { dg-final { cleanup-tree-dump "original" } }
--- 30,35 ----
  ! { dg-final { scan-tree-dump-times "__builtin_malloc" 1 "original" } }
  
  ! But copy "ii" and "CAF":
! ! { dg-final { scan-tree-dump-times "__builtin_memcpy|= MEM" 2 "original" } }
  
  ! { dg-final { cleanup-tree-dump "original" } }

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH][RFC] Fix PR61473, inline small memcpy/memmove during tree opts
  2014-07-10 14:33         ` Richard Biener
@ 2014-07-10 15:13           ` Jakub Jelinek
  2014-07-11  8:03             ` Richard Biener
  0 siblings, 1 reply; 13+ messages in thread
From: Jakub Jelinek @ 2014-07-10 15:13 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jeff Law, gcc-patches

On Thu, Jul 10, 2014 at 04:30:13PM +0200, Richard Biener wrote:
> Compromise "hack" below.  It simply avoids the transform for
> sources that c_strlen can compute a length of.  That "fixes" all
> strlenopt testcase apart from strlenopt-8.c which does
> memcpy (, flag ? "a" : "b"); which then still folds
> during gimplification.  I have XFAILed that.
> 
> I've tried to comb my way through strlenopt but failed to quickly
> add support for generic stores (it has very rough support for
> char stores, see also PR61773).
> 
> Does the below look ok?

I can look at the tree-ssa-strlen.c stuff and removing the !c_strlen
hack incrementally.

	Jakub

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH][RFC] Fix PR61473, inline small memcpy/memmove during tree opts
  2014-07-10 15:13           ` Jakub Jelinek
@ 2014-07-11  8:03             ` Richard Biener
  2014-07-11 13:39               ` Richard Biener
  0 siblings, 1 reply; 13+ messages in thread
From: Richard Biener @ 2014-07-11  8:03 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Jeff Law, gcc-patches

On Thu, 10 Jul 2014, Jakub Jelinek wrote:

> On Thu, Jul 10, 2014 at 04:30:13PM +0200, Richard Biener wrote:
> > Compromise "hack" below.  It simply avoids the transform for
> > sources that c_strlen can compute a length of.  That "fixes" all
> > strlenopt testcase apart from strlenopt-8.c which does
> > memcpy (, flag ? "a" : "b"); which then still folds
> > during gimplification.  I have XFAILed that.
> > 
> > I've tried to comb my way through strlenopt but failed to quickly
> > add support for generic stores (it has very rough support for
> > char stores, see also PR61773).
> > 
> > Does the below look ok?
> 
> I can look at the tree-ssa-strlen.c stuff and removing the !c_strlen
> hack incrementally.

Ok, I'll bootstrap/test the patch then and will commit if there are
no issues left.

Thanks,
Richard.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH][RFC] Fix PR61473, inline small memcpy/memmove during tree opts
  2014-07-11  8:03             ` Richard Biener
@ 2014-07-11 13:39               ` Richard Biener
  2014-07-11 13:42                 ` Jakub Jelinek
  0 siblings, 1 reply; 13+ messages in thread
From: Richard Biener @ 2014-07-11 13:39 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Jeff Law, gcc-patches

On Fri, 11 Jul 2014, Richard Biener wrote:

> On Thu, 10 Jul 2014, Jakub Jelinek wrote:
> 
> > On Thu, Jul 10, 2014 at 04:30:13PM +0200, Richard Biener wrote:
> > > Compromise "hack" below.  It simply avoids the transform for
> > > sources that c_strlen can compute a length of.  That "fixes" all
> > > strlenopt testcase apart from strlenopt-8.c which does
> > > memcpy (, flag ? "a" : "b"); which then still folds
> > > during gimplification.  I have XFAILed that.
> > > 
> > > I've tried to comb my way through strlenopt but failed to quickly
> > > add support for generic stores (it has very rough support for
> > > char stores, see also PR61773).
> > > 
> > > Does the below look ok?
> > 
> > I can look at the tree-ssa-strlen.c stuff and removing the !c_strlen
> > hack incrementally.
> 
> Ok, I'll bootstrap/test the patch then and will commit if there are
> no issues left.

This is what I have applied.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Richard.

2014-07-11  Richard Biener  <rguenther@suse.de>

	PR middle-end/61473
	* builtins.c (fold_builtin_memory_op): Inline memory moves
	that can be implemented with a single load followed by a
	single store.
	(c_strlen): Only warn when only_value is not 2.

	* gcc.dg/memmove-4.c: New testcase.
	* gcc.dg/strlenopt-8.c: XFAIL.
	* gfortran.dg/coarray_lib_realloc_1.f90: Adjust.

Index: gcc/builtins.c
===================================================================
*** gcc/builtins.c.orig	2014-07-11 09:54:49.844932232 +0200
--- gcc/builtins.c	2014-07-11 13:15:35.297102917 +0200
*************** get_pointer_alignment (tree exp)
*** 535,540 ****
--- 535,544 ----
     len = c_strlen (src, 1); if (len) expand_expr (len, ...); would not
     evaluate the side-effects.
  
+    If ONLY_VALUE is two then we do not emit warnings about out-of-bound
+    accesses.  Note that this implies the result is not going to be emitted
+    into the instruction stream.
+ 
     The value returned is of type `ssizetype'.
  
     Unfortunately, string_constant can't access the values of const char
*************** c_strlen (tree src, int only_value)
*** 606,612 ****
  
    /* If the offset is known to be out of bounds, warn, and call strlen at
       runtime.  */
!   if (offset < 0 || offset > max)
      {
       /* Suppress multiple warnings for propagated constant strings.  */
        if (! TREE_NO_WARNING (src))
--- 610,617 ----
  
    /* If the offset is known to be out of bounds, warn, and call strlen at
       runtime.  */
!   if (only_value != 2
!       && (offset < 0 || offset > max))
      {
       /* Suppress multiple warnings for propagated constant strings.  */
        if (! TREE_NO_WARNING (src))
*************** fold_builtin_memory_op (location_t loc,
*** 8637,8647 ****
        unsigned int src_align, dest_align;
        tree off0;
  
!       if (endp == 3)
  	{
! 	  src_align = get_pointer_alignment (src);
! 	  dest_align = get_pointer_alignment (dest);
  
  	  /* Both DEST and SRC must be pointer types.
  	     ??? This is what old code did.  Is the testing for pointer types
  	     really mandatory?
--- 8642,8698 ----
        unsigned int src_align, dest_align;
        tree off0;
  
!       /* Build accesses at offset zero with a ref-all character type.  */
!       off0 = build_int_cst (build_pointer_type_for_mode (char_type_node,
! 							 ptr_mode, true), 0);
! 
!       /* If we can perform the copy efficiently with first doing all loads
!          and then all stores inline it that way.  Currently efficiently
! 	 means that we can load all the memory into a single integer
! 	 register which is what MOVE_MAX gives us.  */
!       src_align = get_pointer_alignment (src);
!       dest_align = get_pointer_alignment (dest);
!       if (tree_fits_uhwi_p (len)
! 	  && compare_tree_int (len, MOVE_MAX) <= 0
! 	  /* ???  Don't transform copies from strings with known length this
! 	     confuses the tree-ssa-strlen.c.  This doesn't handle
! 	     the case in gcc.dg/strlenopt-8.c which is XFAILed for that
! 	     reason.  */
! 	  && !c_strlen (src, 2))
  	{
! 	  unsigned ilen = tree_to_uhwi (len);
! 	  if (exact_log2 (ilen) != -1)
! 	    {
! 	      tree type = lang_hooks.types.type_for_size (ilen * 8, 1);
! 	      if (type
! 		  && TYPE_MODE (type) != BLKmode
! 		  && (GET_MODE_SIZE (TYPE_MODE (type)) * BITS_PER_UNIT
! 		      == ilen * 8)
! 		  /* If the pointers are not aligned we must be able to
! 		     emit an unaligned load.  */
! 		  && ((src_align >= GET_MODE_ALIGNMENT (TYPE_MODE (type))
! 		       && dest_align >= GET_MODE_ALIGNMENT (TYPE_MODE (type)))
! 		      || !SLOW_UNALIGNED_ACCESS (TYPE_MODE (type),
! 						 MIN (src_align, dest_align))))
! 		{
! 		  tree srctype = type;
! 		  tree desttype = type;
! 		  if (src_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
! 		    srctype = build_aligned_type (type, src_align);
! 		  if (dest_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
! 		    desttype = build_aligned_type (type, dest_align);
! 		  if (!ignore)
! 		    dest = builtin_save_expr (dest);
! 		  expr = build2 (MODIFY_EXPR, type,
! 				 fold_build2 (MEM_REF, desttype, dest, off0),
! 				 fold_build2 (MEM_REF, srctype, src, off0));
! 		  goto done;
! 		}
! 	    }
! 	}
  
+       if (endp == 3)
+ 	{
  	  /* Both DEST and SRC must be pointer types.
  	     ??? This is what old code did.  Is the testing for pointer types
  	     really mandatory?
*************** fold_builtin_memory_op (location_t loc,
*** 8818,8827 ****
        if (!ignore)
          dest = builtin_save_expr (dest);
  
-       /* Build accesses at offset zero with a ref-all character type.  */
-       off0 = build_int_cst (build_pointer_type_for_mode (char_type_node,
- 							 ptr_mode, true), 0);
- 
        destvar = dest;
        STRIP_NOPS (destvar);
        if (TREE_CODE (destvar) == ADDR_EXPR
--- 8869,8874 ----
*************** fold_builtin_memory_op (location_t loc,
*** 8888,8893 ****
--- 8935,8941 ----
        expr = build2 (MODIFY_EXPR, TREE_TYPE (destvar), destvar, srcvar);
      }
  
+ done:
    if (ignore)
      return expr;
  
Index: gcc/testsuite/gcc.dg/memmove-4.c
===================================================================
*** /dev/null	1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/memmove-4.c	2014-07-11 12:34:10.168274016 +0200
***************
*** 0 ****
--- 1,12 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O -fdump-tree-optimized" } */
+ 
+ typedef int w __attribute__((mode(word)));
+ 
+ void b(char *a, char *b, int i)
+ {
+   __builtin_memmove (&a[i], &b[i], sizeof(w));
+ }
+ 
+ /* { dg-final { scan-tree-dump-not "memmove" "optimized" { xfail { ! non_strict_align } } } } */
+ /* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/testsuite/gcc.dg/strlenopt-8.c
===================================================================
*** gcc/testsuite/gcc.dg/strlenopt-8.c.orig	2014-07-11 09:54:49.844932232 +0200
--- gcc/testsuite/gcc.dg/strlenopt-8.c	2014-07-11 12:34:10.168274016 +0200
*************** main ()
*** 43,50 ****
    return 0;
  }
  
! /* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen" } } */
! /* { dg-final { scan-tree-dump-times "memcpy \\(" 4 "strlen" } } */
  /* { dg-final { scan-tree-dump-times "strcpy \\(" 0 "strlen" } } */
  /* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */
  /* { dg-final { scan-tree-dump-times "strchr \\(" 0 "strlen" } } */
--- 43,50 ----
    return 0;
  }
  
! /* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen" { xfail *-*-* } } } */
! /* { dg-final { scan-tree-dump-times "memcpy \\(" 4 "strlen" { xfail *-*-* } } } */
  /* { dg-final { scan-tree-dump-times "strcpy \\(" 0 "strlen" } } */
  /* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */
  /* { dg-final { scan-tree-dump-times "strchr \\(" 0 "strlen" } } */
Index: gcc/testsuite/gfortran.dg/coarray_lib_realloc_1.f90
===================================================================
*** gcc/testsuite/gfortran.dg/coarray_lib_realloc_1.f90.orig	2014-07-11 09:54:49.844932232 +0200
--- gcc/testsuite/gfortran.dg/coarray_lib_realloc_1.f90	2014-07-11 12:34:10.178274015 +0200
*************** end
*** 30,35 ****
  ! { dg-final { scan-tree-dump-times "__builtin_malloc" 1 "original" } }
  
  ! But copy "ii" and "CAF":
! ! { dg-final { scan-tree-dump-times "__builtin_memcpy" 2 "original" } }
  
  ! { dg-final { cleanup-tree-dump "original" } }
--- 30,35 ----
  ! { dg-final { scan-tree-dump-times "__builtin_malloc" 1 "original" } }
  
  ! But copy "ii" and "CAF":
! ! { dg-final { scan-tree-dump-times "__builtin_memcpy|= MEM" 2 "original" } }
  
  ! { dg-final { cleanup-tree-dump "original" } }

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH][RFC] Fix PR61473, inline small memcpy/memmove during tree opts
  2014-07-11 13:39               ` Richard Biener
@ 2014-07-11 13:42                 ` Jakub Jelinek
  2014-07-14  7:54                   ` Richard Biener
  0 siblings, 1 reply; 13+ messages in thread
From: Jakub Jelinek @ 2014-07-11 13:42 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jeff Law, gcc-patches

On Fri, Jul 11, 2014 at 03:36:15PM +0200, Richard Biener wrote:
> *************** c_strlen (tree src, int only_value)
> *** 606,612 ****
>   
>     /* If the offset is known to be out of bounds, warn, and call strlen at
>        runtime.  */
> !   if (offset < 0 || offset > max)
>       {
>        /* Suppress multiple warnings for propagated constant strings.  */
>         if (! TREE_NO_WARNING (src))
> --- 610,617 ----
>   
>     /* If the offset is known to be out of bounds, warn, and call strlen at
>        runtime.  */
> !   if (only_value != 2
> !       && (offset < 0 || offset > max))
>       {
>        /* Suppress multiple warnings for propagated constant strings.  */
>         if (! TREE_NO_WARNING (src))

This looks wrong.  I'd say you only want to disable the warning for
only_value != 2, but still return NULL_TREE, otherwise the strlen call will
be out of bounds in the compiler.  So move only_value != 2 down to the
second if ?

	Jakub

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH][RFC] Fix PR61473, inline small memcpy/memmove during tree opts
  2014-07-11 13:42                 ` Jakub Jelinek
@ 2014-07-14  7:54                   ` Richard Biener
  2014-07-14 11:12                     ` Richard Biener
  0 siblings, 1 reply; 13+ messages in thread
From: Richard Biener @ 2014-07-14  7:54 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Jeff Law, gcc-patches

On Fri, 11 Jul 2014, Jakub Jelinek wrote:

> On Fri, Jul 11, 2014 at 03:36:15PM +0200, Richard Biener wrote:
> > *************** c_strlen (tree src, int only_value)
> > *** 606,612 ****
> >   
> >     /* If the offset is known to be out of bounds, warn, and call strlen at
> >        runtime.  */
> > !   if (offset < 0 || offset > max)
> >       {
> >        /* Suppress multiple warnings for propagated constant strings.  */
> >         if (! TREE_NO_WARNING (src))
> > --- 610,617 ----
> >   
> >     /* If the offset is known to be out of bounds, warn, and call strlen at
> >        runtime.  */
> > !   if (only_value != 2
> > !       && (offset < 0 || offset > max))
> >       {
> >        /* Suppress multiple warnings for propagated constant strings.  */
> >         if (! TREE_NO_WARNING (src))
> 
> This looks wrong.  I'd say you only want to disable the warning for
> only_value != 2, but still return NULL_TREE, otherwise the strlen call will
> be out of bounds in the compiler.  So move only_value != 2 down to the
> second if ?

Hmm, yeah.  Probably doesn't matter for this use but I'm testing the
obvious fix.

Richard.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH][RFC] Fix PR61473, inline small memcpy/memmove during tree opts
  2014-07-14  7:54                   ` Richard Biener
@ 2014-07-14 11:12                     ` Richard Biener
  0 siblings, 0 replies; 13+ messages in thread
From: Richard Biener @ 2014-07-14 11:12 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Jeff Law, gcc-patches

On Mon, 14 Jul 2014, Richard Biener wrote:

> On Fri, 11 Jul 2014, Jakub Jelinek wrote:
> 
> > On Fri, Jul 11, 2014 at 03:36:15PM +0200, Richard Biener wrote:
> > > *************** c_strlen (tree src, int only_value)
> > > *** 606,612 ****
> > >   
> > >     /* If the offset is known to be out of bounds, warn, and call strlen at
> > >        runtime.  */
> > > !   if (offset < 0 || offset > max)
> > >       {
> > >        /* Suppress multiple warnings for propagated constant strings.  */
> > >         if (! TREE_NO_WARNING (src))
> > > --- 610,617 ----
> > >   
> > >     /* If the offset is known to be out of bounds, warn, and call strlen at
> > >        runtime.  */
> > > !   if (only_value != 2
> > > !       && (offset < 0 || offset > max))
> > >       {
> > >        /* Suppress multiple warnings for propagated constant strings.  */
> > >         if (! TREE_NO_WARNING (src))
> > 
> > This looks wrong.  I'd say you only want to disable the warning for
> > only_value != 2, but still return NULL_TREE, otherwise the strlen call will
> > be out of bounds in the compiler.  So move only_value != 2 down to the
> > second if ?
> 
> Hmm, yeah.  Probably doesn't matter for this use but I'm testing the
> obvious fix.

Fixed like so, bootstrapped and tested on x86_64-unknown-linux-gnu.

Richard.

2014-07-14  Richard Biener  <rguenther@suse.de>

	* builtins.c (c_strlen): Make only_value == 2 really only
	affect warning generation.

Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c	(revision 212513)
+++ gcc/builtins.c	(working copy)
@@ -610,11 +610,11 @@ c_strlen (tree src, int only_value)
 
   /* If the offset is known to be out of bounds, warn, and call strlen at
      runtime.  */
-  if (only_value != 2
-      && (offset < 0 || offset > max))
+  if (offset < 0 || offset > max)
     {
      /* Suppress multiple warnings for propagated constant strings.  */
-      if (! TREE_NO_WARNING (src))
+      if (only_value != 2
+	  && !TREE_NO_WARNING (src))
         {
           warning_at (loc, 0, "offset outside bounds of constant string");
           TREE_NO_WARNING (src) = 1;

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2014-07-14 11:12 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-06-12 10:15 [PATCH][RFC] Fix PR61473, inline small memcpy/memmove during tree opts Richard Biener
2014-06-12 20:32 ` Jeff Law
2014-06-27 11:52   ` Richard Biener
2014-06-27 11:57     ` Jakub Jelinek
2014-06-27 12:01       ` Richard Biener
2014-07-10 14:33         ` Richard Biener
2014-07-10 15:13           ` Jakub Jelinek
2014-07-11  8:03             ` Richard Biener
2014-07-11 13:39               ` Richard Biener
2014-07-11 13:42                 ` Jakub Jelinek
2014-07-14  7:54                   ` Richard Biener
2014-07-14 11:12                     ` Richard Biener
2014-06-27 15:44       ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).