public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][RFC] Fix PR61473, inline small memcpy/memmove during tree opts
@ 2014-06-12 10:15 Richard Biener
  2014-06-12 20:32 ` Jeff Law
  0 siblings, 1 reply; 13+ messages in thread
From: Richard Biener @ 2014-06-12 10:15 UTC (permalink / raw)
  To: gcc-patches


This implements the requested inlining of memmove for possibly
overlapping arguments by doing first all loads and then all stores.
The easiest place is to do this in memory op folding where we already
perform inlining of some memcpy cases (but fail to do the equivalent
memcpy optimization - though RTL expansion later does it).

The following patch restricts us to max. word-mode size.  Ideally
we'd have a way to check for the number of real instructions needed
to load an (aligned) value of size N.  But maybe we don't care
and are fine with doing multiple loads / stores?

Anyway, the following is conservative (but maybe not enough).

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

These transforms don't really belong to GENERIC folding (they
also run at -O0 ...), similar to most builtin foldings.  But this
patch is not to change that.

Any comments on the size/cost issue?

Thanks,
Richard.

2014-06-12  Richard Biener  <rguenther@suse.de>

	PR middle-end/61473
	* builtins.c (fold_builtin_memory_op): Inline memory moves
	that can be implemented with a single load followed by a
	single store.

	* gcc.dg/memmove-4.c: New testcase.

Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c	(revision 211449)
+++ gcc/builtins.c	(working copy)
@@ -8637,11 +8637,53 @@ fold_builtin_memory_op (location_t loc,
       unsigned int src_align, dest_align;
       tree off0;
 
-      if (endp == 3)
+      /* Build accesses at offset zero with a ref-all character type.  */
+      off0 = build_int_cst (build_pointer_type_for_mode (char_type_node,
+							 ptr_mode, true), 0);
+
+      /* If we can perform the copy efficiently with first doing all loads
+         and then all stores inline it that way.  Currently efficiently
+	 means that we can load all the memory into a single integer
+	 register and thus limited to word_mode size.  Ideally we'd have
+	 a way to query the largest mode that we can load/store with
+	 a signle instruction.  */
+      src_align = get_pointer_alignment (src);
+      dest_align = get_pointer_alignment (dest);
+      if (tree_fits_uhwi_p (len)
+	  && compare_tree_int (len, BITS_PER_WORD / 8) <= 0)
 	{
-	  src_align = get_pointer_alignment (src);
-	  dest_align = get_pointer_alignment (dest);
+	  unsigned ilen = tree_to_uhwi (len);
+	  if (exact_log2 (ilen) != -1)
+	    {
+	      tree type = lang_hooks.types.type_for_size (ilen * 8, 1);
+	      if (type
+		  && TYPE_MODE (type) != BLKmode
+		  && (GET_MODE_SIZE (TYPE_MODE (type)) * BITS_PER_UNIT
+		      == ilen * 8)
+		  /* If the pointers are not aligned we must be able to
+		     emit an unaligned load.  */
+		  && ((src_align >= GET_MODE_ALIGNMENT (TYPE_MODE (type))
+		       && dest_align >= GET_MODE_ALIGNMENT (TYPE_MODE (type)))
+		      || !SLOW_UNALIGNED_ACCESS (TYPE_MODE (type),
+						 MIN (src_align, dest_align))))
+		{
+		  tree srctype = type;
+		  tree desttype = type;
+		  if (src_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
+		    srctype = build_aligned_type (type, src_align);
+		  if (dest_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
+		    desttype = build_aligned_type (type, dest_align);
+		  destvar = fold_build2 (MEM_REF, desttype, dest, off0);
+		  expr = build2 (MODIFY_EXPR, type,
+				 fold_build2 (MEM_REF, desttype, dest, off0),
+				 fold_build2 (MEM_REF, srctype, src, off0));
+		  goto done;
+		}
+	    }
+	}
 
+      if (endp == 3)
+	{
 	  /* Both DEST and SRC must be pointer types.
 	     ??? This is what old code did.  Is the testing for pointer types
 	     really mandatory?
@@ -8818,10 +8860,6 @@ fold_builtin_memory_op (location_t loc,
       if (!ignore)
         dest = builtin_save_expr (dest);
 
-      /* Build accesses at offset zero with a ref-all character type.  */
-      off0 = build_int_cst (build_pointer_type_for_mode (char_type_node,
-							 ptr_mode, true), 0);
-
       destvar = dest;
       STRIP_NOPS (destvar);
       if (TREE_CODE (destvar) == ADDR_EXPR
@@ -8888,6 +8926,7 @@ fold_builtin_memory_op (location_t loc,
       expr = build2 (MODIFY_EXPR, TREE_TYPE (destvar), destvar, srcvar);
     }
 
+done:
   if (ignore)
     return expr;
 
Index: gcc/testsuite/gcc.dg/memmove-4.c
===================================================================
--- gcc/testsuite/gcc.dg/memmove-4.c	(revision 0)
+++ gcc/testsuite/gcc.dg/memmove-4.c	(working copy)
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+typedef int w __attribute__((mode(word)));
+
+void b(char *a, char *b, int i)
+{
+  __builtin_memmove (&a[i], &b[i], sizeof(w));
+}
+
+/* { dg-final { scan-tree-dump-not "memmove" "optimized" { xfail { ! non_strict_align } } } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2014-07-14 11:12 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-06-12 10:15 [PATCH][RFC] Fix PR61473, inline small memcpy/memmove during tree opts Richard Biener
2014-06-12 20:32 ` Jeff Law
2014-06-27 11:52   ` Richard Biener
2014-06-27 11:57     ` Jakub Jelinek
2014-06-27 12:01       ` Richard Biener
2014-07-10 14:33         ` Richard Biener
2014-07-10 15:13           ` Jakub Jelinek
2014-07-11  8:03             ` Richard Biener
2014-07-11 13:39               ` Richard Biener
2014-07-11 13:42                 ` Jakub Jelinek
2014-07-14  7:54                   ` Richard Biener
2014-07-14 11:12                     ` Richard Biener
2014-06-27 15:44       ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).