public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Optimize out unnecessary __builtin_stack_{save,restore}s (PR middle-end/23848)
@ 2007-11-05  0:12 Jakub Jelinek
  2007-11-15 19:49 ` Diego Novillo
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2007-11-05  0:12 UTC (permalink / raw)
  To: gcc-patches

Hi!

This patch allows to optimize out unnecessary __builtin_stack_{save,restore}
which often means saving one call saved register (or stack slot) for each VLA
nesting for VLAs where they get out of scope at the end of the function (they
then work just like normal alloca) or when there are no function calls nor
inline asm between leaving their scope and function exit (or another
__builtin_stack_restore).  This is just a simplistic optimization, but
should always cover VLAs in the function outermost scope (which really are
supposed to work like alloca) and hit quite often in other cases as well.
At least for 4.3 I don't think we need to make this a whole new pass which
will try harder to optimize by walking possibly many BBs, checking if there
could be function calls or inline asm in between the stack restore and
return (resp. another stack restore) and if there aren't back edges.
Say the attached pr23848-1.c (foo2) before this patch on x86_64 -O2 is:
foo2:
        pushq   %rbp
        movslq  %edi,%rcx
        movl    %edi, %r8d
        addq    $30, %rcx
        leal    -1(%r8), %eax
        movq    %rsp, %rbp
        andq    $-16, %rcx
        movq    %rbx, -32(%rbp)
        movq    %r12, -24(%rbp)
        cltq
        movq    %r13, -16(%rbp)
        movq    %r14, -8(%rbp)
        subq    $32, %rsp
        movq    %rsp, %r14
        subq    %rcx, %rsp
        movq    %rsp, %r13
        leaq    15(%rsp), %rdi
        subq    %rcx, %rsp
        movq    %rsp, %r12
        leaq    15(%rsp), %rsi
        subq    %rcx, %rsp
        movq    %rsp, %rbx
        leaq    15(%rsp), %rdx
        subq    %rcx, %rsp
        leaq    15(%rsp), %rcx
        andq    $-16, %rdi
        andq    $-16, %rsi
        movb    $0, (%rdi,%rax)
        andq    $-16, %rdx
        andq    $-16, %rcx
        call    bar2
        movq    %rbx, %rsp
        movq    %r12, %rsp
        movq    %r13, %rsp
        movq    %r14, %rsp
        movq    -32(%rbp), %rbx
        movq    -24(%rbp), %r12
        movq    -16(%rbp), %r13
        movq    -8(%rbp), %r14
        leave
        ret
while with the patch just:
foo2:
        movslq  %edi,%rcx
        pushq   %rbp
        movl    %edi, %r8d
        addq    $30, %rcx
        leal    -1(%r8), %eax
        andq    $-16, %rcx
        movq    %rsp, %rbp
        subq    %rcx, %rsp
        cltq
        leaq    15(%rsp), %rdi
        subq    %rcx, %rsp
        leaq    15(%rsp), %rsi
        subq    %rcx, %rsp
        leaq    15(%rsp), %rdx
        subq    %rcx, %rsp
        andq    $-16, %rdi
        leaq    15(%rsp), %rcx
        movb    $0, (%rdi,%rax)
        andq    $-16, %rsi
        andq    $-16, %rdx
        andq    $-16, %rcx
        call    bar2
        leave
        ret

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2007-11-05  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/23848
	* tree-ssa-ccp.c (optimize_stack_restore): New function.
	(execute_fold_all_builtins): Call optimize_stack_restore for
	BUILT_IN_STACK_RESTORE.

	* gcc.dg/tree-ssa/pr23848-1.c: New test.
	* gcc.dg/tree-ssa/pr23848-2.c: New test.
	* gcc.dg/tree-ssa/pr23848-3.c: New test.
	* gcc.dg/tree-ssa/pr23848-4.c: New test.

--- gcc/tree-ssa-ccp.c.jj	2007-09-04 23:09:30.000000000 +0200
+++ gcc/tree-ssa-ccp.c	2007-11-04 22:51:10.000000000 +0100
@@ -2598,6 +2598,76 @@ fold_stmt_inplace (tree stmt)
   return changed;
 }
 \f
+/* Try to optimize out __builtin_stack_restore.  Optimize it out
+   if there is another __builtin_stack_restore in the same basic
+   block and no calls or ASM_EXPRs are in between, or if this block's
+   only outgoing edge is to EXIT_BLOCK and there are no calls or
+   ASM_EXPRs after this __builtin_stack_restore.  */
+
+static tree
+optimize_stack_restore (basic_block bb, tree call, block_stmt_iterator i)
+{
+  tree stack_save, stmt, callee;
+
+  if (TREE_CODE (call) != CALL_EXPR
+      || call_expr_nargs (call) != 1
+      || TREE_CODE (CALL_EXPR_ARG (call, 0)) != SSA_NAME
+      || !POINTER_TYPE_P (TREE_TYPE (CALL_EXPR_ARG (call, 0))))
+    return NULL_TREE;
+
+  for (bsi_next (&i); !bsi_end_p (i); bsi_next (&i))
+    {
+      tree call;
+
+      stmt = bsi_stmt (i);
+      if (TREE_CODE (stmt) == ASM_EXPR)
+	return NULL_TREE;
+      call = get_call_expr_in (stmt);
+      if (call == NULL)
+	continue;
+
+      callee = get_callee_fndecl (call);
+      if (!callee || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL)
+	return NULL_TREE;
+
+      if (DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_RESTORE)
+	break;
+    }
+
+  if (bsi_end_p (i)
+      && (! single_succ_p (bb)
+	  || single_succ_edge (bb)->dest != EXIT_BLOCK_PTR))
+    return NULL_TREE;
+
+  stack_save = SSA_NAME_DEF_STMT (CALL_EXPR_ARG (call, 0));
+  if (TREE_CODE (stack_save) != GIMPLE_MODIFY_STMT
+      || GIMPLE_STMT_OPERAND (stack_save, 0) != CALL_EXPR_ARG (call, 0)
+      || TREE_CODE (GIMPLE_STMT_OPERAND (stack_save, 1)) != CALL_EXPR
+      || tree_could_throw_p (stack_save)
+      || !has_single_use (CALL_EXPR_ARG (call, 0)))
+    return NULL_TREE;
+
+  callee = get_callee_fndecl (GIMPLE_STMT_OPERAND (stack_save, 1));
+  if (!callee
+      || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL
+      || DECL_FUNCTION_CODE (callee) != BUILT_IN_STACK_SAVE
+      || call_expr_nargs (GIMPLE_STMT_OPERAND (stack_save, 1)) != 0)
+    return NULL_TREE;
+
+  stmt = stack_save;
+  push_stmt_changes (&stmt);
+  if (!set_rhs (&stmt,
+		build_int_cst (TREE_TYPE (CALL_EXPR_ARG (call, 0)), 0)))
+    {
+      discard_stmt_changes (&stmt);
+      return NULL_TREE;
+    }
+  gcc_assert (stmt == stack_save);
+  pop_stmt_changes (&stmt);
+
+  return integer_zero_node;
+}
+\f
 /* Convert EXPR into a GIMPLE value suitable for substitution on the
    RHS of an assignment.  Insert the necessary statements before
    iterator *SI_P. 
@@ -2682,6 +2752,12 @@ execute_fold_all_builtins (void)
 		result = integer_zero_node;
 		break;
 
+	      case BUILT_IN_STACK_RESTORE:
+		result = optimize_stack_restore (bb, *stmtp, i);
+		if (result)
+		  break;
+		/* FALLTHRU */
+
 	      default:
 		bsi_next (&i);
 		continue;
--- gcc/testsuite/gcc.dg/tree-ssa/pr23848-1.c.jj	2007-11-04 22:53:03.000000000 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr23848-1.c	2007-11-04 23:03:30.000000000 +0100
@@ -0,0 +1,32 @@
+/* PR middle-end/23848 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void bar1 (char *, int);
+void foo1 (int size)
+{
+  char temp[size];
+  temp[size-1] = '\0';
+  bar1 (temp, size);
+}
+
+void bar2 (char *, char *, char *, char *, int);
+void foo2 (int size)
+{
+  char temp[size];
+  temp[size-1] = '\0';
+  {
+    char temp2[size];
+    {
+      char temp3[size];
+      {
+	char temp4[size];
+	bar2 (temp, temp2, temp3, temp4, size);
+      }
+    }
+  }
+}
+
+/* { dg-final { scan-tree-dump-not "__builtin_stack_save" "optimized"} } */
+/* { dg-final { scan-tree-dump-not "__builtin_stack_restore" "optimized"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
--- gcc/testsuite/gcc.dg/tree-ssa/pr23848-2.c.jj	2007-11-04 22:53:03.000000000 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr23848-2.c	2007-11-04 23:04:06.000000000 +0100
@@ -0,0 +1,25 @@
+/* PR middle-end/23848 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void bar (char *, char *, char *, char *, int);
+void foo (int size)
+{
+  char temp[size];
+  temp[size-1] = '\0';
+  {
+    char temp2[size];
+    {
+      char temp3[size];
+      {
+	char temp4[size];
+	bar (temp, temp2, temp3, temp4, size);
+      }
+    }
+    bar (temp, temp2, (char *) 0, (char *) 0, size);
+  }
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_stack_save" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "__builtin_stack_restore" 1 "optimized"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
--- gcc/testsuite/gcc.dg/tree-ssa/pr23848-3.c.jj	2007-11-04 22:53:03.000000000 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr23848-3.c	2007-11-04 23:03:51.000000000 +0100
@@ -0,0 +1,28 @@
+/* PR middle-end/23848 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void bar (int, char *, char *, char *, char *, int);
+void foo (int size)
+{
+  int i;
+  for (i = 0; i < size; i++)
+    {
+      char temp[size];
+      temp[size-1] = '\0';
+      {
+	char temp2[size];
+	{
+	  char temp3[size];
+	  {
+	    char temp4[size];
+	    bar (i, temp, temp2, temp3, temp4, size);
+	  }
+	}
+      }
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_stack_save" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "__builtin_stack_restore" 1 "optimized"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
--- gcc/testsuite/gcc.dg/tree-ssa/pr23848-4.c.jj	2007-11-04 22:53:03.000000000 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr23848-4.c	2007-11-04 23:04:48.000000000 +0100
@@ -0,0 +1,25 @@
+/* PR middle-end/23848 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void bar (char *, char *, char *, char *, int);
+void foo (int size)
+{
+  char temp[size];
+  temp[size-1] = '\0';
+  {
+    char temp2[size];
+    {
+      char temp3[size];
+      {
+	char temp4[size];
+	bar (temp, temp2, temp3, temp4, size);
+      }
+    }
+    __asm __volatile ("" : : "r" (&temp[0]), "r" (&temp2[0]) : "memory");
+  }
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_stack_save" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "__builtin_stack_restore" 1 "optimized"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

	Jakub

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Optimize out unnecessary __builtin_stack_{save,restore}s  (PR middle-end/23848)
  2007-11-05  0:12 [PATCH] Optimize out unnecessary __builtin_stack_{save,restore}s (PR middle-end/23848) Jakub Jelinek
@ 2007-11-15 19:49 ` Diego Novillo
  0 siblings, 0 replies; 2+ messages in thread
From: Diego Novillo @ 2007-11-15 19:49 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

Jakub Jelinek wrote:

> 2007-11-05  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR middle-end/23848
> 	* tree-ssa-ccp.c (optimize_stack_restore): New function.
> 	(execute_fold_all_builtins): Call optimize_stack_restore for
> 	BUILT_IN_STACK_RESTORE.
> 
> 	* gcc.dg/tree-ssa/pr23848-1.c: New test.
> 	* gcc.dg/tree-ssa/pr23848-2.c: New test.
> 	* gcc.dg/tree-ssa/pr23848-3.c: New test.
> 	* gcc.dg/tree-ssa/pr23848-4.c: New test.

OK.


Diego.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2007-11-15 18:02 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-11-05  0:12 [PATCH] Optimize out unnecessary __builtin_stack_{save,restore}s (PR middle-end/23848) Jakub Jelinek
2007-11-15 19:49 ` Diego Novillo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).