public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [AArch64][1/4] Enable tree-stdarg pass for AArch64 by defining counter fields
       [not found] <572CA45D.6060706@foss.arm.com>
@ 2016-05-06 15:00 ` Jiong Wang
  2016-05-26 14:16   ` James Greenhalgh
       [not found] ` <572CA702.9020808@foss.arm.com>
  1 sibling, 1 reply; 7+ messages in thread
From: Jiong Wang @ 2016-05-06 15:00 UTC (permalink / raw)
  To: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 1041 bytes --]

This patch initialize va_list_gpr_counter_field and
va_list_fpr_counter_field properly for AArch64 backend that tree-stdarg
pass will be enabled.

The "required register" analysis is largely target independent, but the
user might operate on the inner offset field in vaarg structure directly,
for example:

   d = __builtin_va_arg (ap, int);
   ap.__gr_offs += 0x20;
   e = __builtin_va_arg (ap, int);

in which case tree-stdarg require us to tell him what's the backend offset
field inside vaarg structure that it can still figure out we actually need
to save 6 general registers.

ok for upstream?

2016-05-06  Jiong Wang  <jiong.wang@arm.com>
gcc/
   * config/aarch64/aarch64.c (aarch64_build_builtin_va_list): Initialize
   va_list_gpr_counter_field and va_list_fpr_counter_field.

gcc/testsuite/
   * gcc.dg/tree-ssa/stdarg-2.c: Enable all testcases for AArch64.
   * gcc.dg/tree-ssa/stdarg-3.c: Likewise.
   * gcc.dg/tree-ssa/stdarg-4.c: Likewise.
   * gcc.dg/tree-ssa/stdarg-5.c: Likewise.
   * gcc.dg/tree-ssa/stdarg-6.c: Likewise.


[-- Attachment #2: 0001-1.patch --]
[-- Type: text/x-patch, Size: 39385 bytes --]

From 93485b0163bbaddf7fdf472aac2d3a96823bd63a Mon Sep 17 00:00:00 2001
From: "Jiong.Wang" <jiong.wang@arm.com>
Date: Fri, 6 May 2016 14:36:12 +0100
Subject: [PATCH 1/4] 1

---
 gcc/config/aarch64/aarch64.c             |  7 +++++++
 gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c | 15 +++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c | 11 +++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c |  4 ++++
 gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c |  7 +++++++
 gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c |  1 +
 6 files changed, 45 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9995494..aff4a95 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9463,6 +9463,13 @@ aarch64_build_builtin_va_list (void)
 			FIELD_DECL, get_identifier ("__vr_offs"),
 			integer_type_node);
 
+  /* Tell tree-stdarg pass what's our internal offset fields.
+     NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
+     purpose to identify whether the code is updating va_list internal
+     offset fields through irregular way.  */
+  va_list_gpr_counter_field = f_groff;
+  va_list_fpr_counter_field = f_vroff;
+
   DECL_ARTIFICIAL (f_stack) = 1;
   DECL_ARTIFICIAL (f_grtop) = 1;
   DECL_ARTIFICIAL (f_vrtop) = 1;
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c
index c73294a..0224997 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c
@@ -25,6 +25,7 @@ f1 (int i, ...)
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -45,6 +46,7 @@ f2 (int i, ...)
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -60,6 +62,7 @@ f3 (int i, ...)
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 1 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 16 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target ia64-*-* } } } */
@@ -78,6 +81,7 @@ f4 (int i, ...)
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -96,6 +100,7 @@ f5 (int i, ...)
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -116,6 +121,7 @@ f6 (int i, ...)
 /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 3 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -133,6 +139,7 @@ f7 (int i, ...)
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -152,6 +159,7 @@ f8 (int i, ...)
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -169,6 +177,7 @@ f9 (int i, ...)
 /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -188,6 +197,7 @@ f10 (int i, ...)
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -208,6 +218,7 @@ f11 (int i, ...)
 /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 3 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -228,6 +239,7 @@ f12 (int i, ...)
 /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and 3 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and 48 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -248,6 +260,7 @@ f13 (int i, ...)
 /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
 /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and 3 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and 48 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -268,6 +281,7 @@ f14 (int i, ...)
 /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
 /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 24 GPR units and 3" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 1 GPR units and 2 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -291,6 +305,7 @@ f15 (int i, ...)
 /* { dg-final { scan-tree-dump "f15: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f15: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
 /* { dg-final { scan-tree-dump "f15: va_list escapes 0, needs to save 1 GPR units and 2 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 
 /* We may be able to improve upon this after fixing PR66010/PR66013.  */
 /* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c
index 9367707..d044654 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c
@@ -24,6 +24,7 @@ f1 (int i, ...)
 /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -39,6 +40,7 @@ f2 (int i, ...)
 /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -57,6 +59,7 @@ f3 (int i, ...)
 /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -73,6 +76,7 @@ f4 (int i, ...)
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -89,6 +93,7 @@ f5 (int i, ...)
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -107,6 +112,7 @@ f6 (int i, ...)
 /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -123,6 +129,7 @@ f7 (int i, ...)
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -139,6 +146,7 @@ f8 (int i, ...)
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -155,6 +163,7 @@ f10 (int i, ...)
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -171,6 +180,7 @@ f11 (int i, ...)
 /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -187,6 +197,7 @@ f12 (int i, ...)
 /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c
index 94e35a8..1a637d6 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c
@@ -27,6 +27,7 @@ f1 (int i, ...)
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -44,6 +45,7 @@ f2 (int i, ...)
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -67,6 +69,7 @@ f3 (int i, ...)
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -88,6 +91,7 @@ f4 (int i, ...)
 /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and 1 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and 16 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c
index b816d93..c8ad4fe 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c
@@ -25,6 +25,7 @@ f1 (int i, ...)
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 
 void
 f2 (int i, ...)
@@ -38,6 +39,7 @@ f2 (int i, ...)
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 
 /* Here va_arg can be executed at most as many times as va_start.  */
 void
@@ -56,6 +58,7 @@ f3 (int i, ...)
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 
 void
 f4 (int i, ...)
@@ -74,6 +77,7 @@ f4 (int i, ...)
 /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 16 GPR units and 16 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 2 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 
 void
 f5 (int i, ...)
@@ -88,6 +92,7 @@ f5 (int i, ...)
 /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 16 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save (4|2) GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 16 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 
 void
 f6 (int i, ...)
@@ -102,6 +107,7 @@ f6 (int i, ...)
 /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 32 GPR units and 3" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|2) GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target aarch64*-*-* } } } */
 
 void
 f7 (int i, ...)
@@ -116,3 +122,4 @@ f7 (int i, ...)
 /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 0 GPR units and 64 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 32 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 2 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 0 GPR units and 64 FPR units" "stdarg" { target aarch64*-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c
index c21dc17..be7bc0d 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c
@@ -30,6 +30,7 @@ bar (int x, char const *y, ...)
 /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
 /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
-- 
1.9.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [AArch64][3/4] Don't generate redundant checks when there is no composite arg
       [not found]   ` <572CA787.8050107@foss.arm.com>
@ 2016-05-06 15:00     ` Jiong Wang
  2016-05-31 19:20       ` James Greenhalgh
       [not found]     ` <572CA834.2020800@foss.arm.com>
  1 sibling, 1 reply; 7+ messages in thread
From: Jiong Wang @ 2016-05-06 15:00 UTC (permalink / raw)
  To: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 1556 bytes --]

AArch64 va_arg gimplify hook is generating redundant instructions.

The current va_arg fetch logic is:

1  if (va_arg offset shows the arg is saved at reg_save_area)
2     if ((va_arg_offset + va_arg_type_size) <= 0)
3        fetch va_arg from reg_save_area.
4     else
5        fetch va_arg from incoming_stack.
6  else
7    fetch va_arg from incoming_stack.

The logic hunk "fetch va_arg from incoming_stack" will be generated
*twice*, thus cause redundance.

There is a particular further "if" check at line 2 because for composite
argument, we don't support argument split, so it's either passed
entirely from reg_save_area, or entirely from incoming_stack area.

Thus, we need the further check at A to decide whether the left space at
reg_save_area is enough, if not, then it's passed from incoming_stack.

While this complex logic is only necessary for composite types, not for
others.

this patch thus *let those redundance only generated for composite types*,
while for basic types like "int", "float" etc, we could just simplify it
into:

   if (va_arg_offset < 0)
     fetch va_arg from reg_save_area.
   else
     fetch va_arg from incoming_stack.

And this simplified version actually is the most usual case.

For example, this patch reduced this instructions number from about 130 to
100 for the included testcase.

ok for trunk?

2016-05-06  Jiong Wang  <jiong.wang@arm.com>

gcc/
   * config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Avoid
   duplicated check code.

gcc/testsuite/
   * gcc.target/aarch64/va_arg_4.c: New testcase.

[-- Attachment #2: 0003-3.patch --]
[-- Type: text/x-patch, Size: 6589 bytes --]

From b92a4c4b8e52a9a952e91f307836022f667ab403 Mon Sep 17 00:00:00 2001
From: "Jiong.Wang" <jiong.wang@arm.com>
Date: Fri, 6 May 2016 14:37:37 +0100
Subject: [PATCH 3/4] 3

---
 gcc/config/aarch64/aarch64.c                | 94 ++++++++++++++++++++---------
 gcc/testsuite/gcc.target/aarch64/va_arg_4.c | 23 +++++++
 2 files changed, 87 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_4.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b1a0287..06904d5 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9587,6 +9587,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
   bool indirect_p;
   bool is_ha;		/* is HFA or HVA.  */
   bool dw_align;	/* double-word align.  */
+  bool composite_type_p;
   machine_mode ag_mode = VOIDmode;
   int nregs;
   machine_mode mode;
@@ -9594,13 +9595,14 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
   tree stack, f_top, f_off, off, arg, roundup, on_stack;
   HOST_WIDE_INT size, rsize, adjust, align;
-  tree t, u, cond1, cond2;
+  tree t, t1, u, cond1, cond2;
 
   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
   if (indirect_p)
     type = build_pointer_type (type);
 
   mode = TYPE_MODE (type);
+  composite_type_p = aarch64_composite_type_p (type, mode);
 
   f_stack = TYPE_FIELDS (va_list_type_node);
   f_grtop = DECL_CHAIN (f_stack);
@@ -9671,35 +9673,38 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
 	      build_int_cst (TREE_TYPE (off), 0));
   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
 
-  if (dw_align)
+  if (composite_type_p)
     {
-      /* Emit: offs = (offs + 15) & -16.  */
-      t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
-		  build_int_cst (TREE_TYPE (off), 15));
-      t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
-		  build_int_cst (TREE_TYPE (off), -16));
-      roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
-    }
-  else
-    roundup = NULL;
+      if (dw_align)
+	{
+	  /* Emit: offs = (offs + 15) & -16.  */
+	  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
+		      build_int_cst (TREE_TYPE (off), 15));
+	  t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
+		      build_int_cst (TREE_TYPE (off), -16));
+	  roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
+	}
+      else
+	roundup = NULL;
 
-  /* Update ap.__[g|v]r_offs  */
-  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
-	      build_int_cst (TREE_TYPE (off), rsize));
-  t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
+      /* Update ap.__[g|v]r_offs  */
+      t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
+		  build_int_cst (TREE_TYPE (off), rsize));
+      t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
 
-  /* String up.  */
-  if (roundup)
-    t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
+      /* String up.  */
+      if (roundup)
+	t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
 
-  /* [cond2] if (ap.__[g|v]r_offs > 0)  */
-  u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
-	      build_int_cst (TREE_TYPE (f_off), 0));
-  cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
+      /* [cond2] if (ap.__[g|v]r_offs > 0)  */
+      u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
+		  build_int_cst (TREE_TYPE (f_off), 0));
+      cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
 
-  /* String up: make sure the assignment happens before the use.  */
-  t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
-  COND_EXPR_ELSE (cond1) = t;
+      /* String up: make sure the assignment happens before the use.  */
+      t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
+      COND_EXPR_ELSE (cond1) = t;
+    }
 
   /* Prepare the trees handling the argument that is passed on the stack;
      the top level node will store in ON_STACK.  */
@@ -9739,13 +9744,34 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
   }
 
-  COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
-  COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
+  if (composite_type_p)
+    {
+      COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
+      COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
+
+      t = off;
+    }
+  else
+    {
+      COND_EXPR_THEN (cond1) = on_stack;
+      if (dw_align)
+	{
+	  /* Emit: offs = (offs + 15) & -16.  */
+	  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
+		      build_int_cst (TREE_TYPE (off), 15));
+	  t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
+		      build_int_cst (TREE_TYPE (off), -16));
+	  roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
+	}
+      else
+	roundup = off;
+
+      t = roundup;
+    }
 
   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
-  t = off;
   if (adjust)
-    t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
+    t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), composite_type_p ? off : t,
 		build_int_cst (TREE_TYPE (off), adjust));
 
   t = fold_convert (sizetype, t);
@@ -9827,7 +9853,15 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
     }
 
-  COND_EXPR_ELSE (cond2) = t;
+  if (composite_type_p)
+    COND_EXPR_ELSE (cond2) = t;
+  else
+    {
+      t1 = build2 (PLUS_EXPR, TREE_TYPE (off), roundup,
+		   build_int_cst (TREE_TYPE (off), rsize));
+      t1 = build2 (MODIFY_EXPR, TREE_TYPE (f_off), f_off, t1);
+      COND_EXPR_ELSE (cond1) = build2 (COMPOUND_EXPR, TREE_TYPE (t), t1, t);
+    }
   addr = fold_convert (build_pointer_type (type), cond1);
   addr = build_va_arg_indirect_ref (addr);
 
diff --git a/gcc/testsuite/gcc.target/aarch64/va_arg_4.c b/gcc/testsuite/gcc.target/aarch64/va_arg_4.c
new file mode 100644
index 0000000..35232c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/va_arg_4.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fdump-tree-lower_vaarg" } */
+
+int d2i (double a);
+
+int
+foo(char *fmt, ...)
+{
+  int d, e;
+  double f, g;
+  __builtin_va_list ap;
+
+  __builtin_va_start (ap, fmt);
+  d = __builtin_va_arg (ap, int);
+  f = __builtin_va_arg (ap, double);
+  g = __builtin_va_arg (ap, double);
+  d += d2i (f);
+  d += d2i (g);
+  __builtin_va_end (ap);
+
+  /* { dg-final { scan-tree-dump-times "ap.__stack =" 3 "lower_vaarg"} } */
+  return d;
+}
-- 
1.9.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [AArch64][2/4] PR63596, honor tree-stdarg analysis result to improve VAARG codegen
       [not found] ` <572CA702.9020808@foss.arm.com>
@ 2016-05-06 15:00   ` Jiong Wang
  2016-05-26 14:48     ` James Greenhalgh
       [not found]   ` <572CA787.8050107@foss.arm.com>
  1 sibling, 1 reply; 7+ messages in thread
From: Jiong Wang @ 2016-05-06 15:00 UTC (permalink / raw)
  To: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 1277 bytes --]

This patch fixes PR63596.

There is no need to push/pop all arguments registers. We only need to
push and pop those registers used. These use info is calculated by a
dedicated vaarg optimization tree pass "tree-stdarg", the backend should
honor it's analysis result.

For a simple testcase where vaarg declared but actually not used:

int
f (int a, ...)
{
   return a;
}

before this patch, we are generating:

f:
         sub     sp, sp, #192
         stp     x1, x2, [sp, 136]
         stp     x3, x4, [sp, 152]
	stp     x5, x6, [sp, 168]
         str     x7, [sp, 184]
         str     q0, [sp]
         str     q1, [sp, 16]
         str     q2, [sp, 32]
         str     q3, [sp, 48]
	str     q4, [sp, 64]
         str     q5, [sp, 80]
         str     q6, [sp, 96]
         str     q7, [sp, 112]
	add     sp, sp, 192
	ret

after this patch, it's optimized into:

f:
   	ret

OK for trunk?

2016-05-06  Jiong Wang  <jiong.wang@arm.com>
gcc/
   PR63596
   * config/aarch64/aarch64.c (aarch64_expand_builtin_va_start): Honor
   tree-stdarg analysis results.
   (aarch64_setup_incoming_varargs): Likewise.

gcc/testsuite/
   PR63596
   * gcc.target/aarch64/va_arg_1.c: New testcase.
   * gcc.target/aarch64/va_arg_2.c: Likewise.
   * gcc.target/aarch64/va_arg_3.c: Likewise.


[-- Attachment #2: 0002-2.patch --]
[-- Type: text/x-patch, Size: 5888 bytes --]

From dfcfe78511047501ed4b2f323b190c1290314104 Mon Sep 17 00:00:00 2001
From: "Jiong.Wang" <jiong.wang@arm.com>
Date: Fri, 6 May 2016 14:36:42 +0100
Subject: [PATCH 2/4] 2

---
 gcc/config/aarch64/aarch64.c                | 35 ++++++++++++++++++-----------
 gcc/testsuite/gcc.target/aarch64/va_arg_1.c | 11 +++++++++
 gcc/testsuite/gcc.target/aarch64/va_arg_2.c | 18 +++++++++++++++
 gcc/testsuite/gcc.target/aarch64/va_arg_3.c | 26 +++++++++++++++++++++
 4 files changed, 77 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_3.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index aff4a95..b1a0287 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9502,15 +9502,17 @@ aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
   tree stack, grtop, vrtop, groff, vroff;
   tree t;
-  int gr_save_area_size;
-  int vr_save_area_size;
+  int gr_save_area_size = cfun->va_list_gpr_size;
+  int vr_save_area_size = cfun->va_list_fpr_size;
   int vr_offset;
 
   cum = &crtl->args.info;
-  gr_save_area_size
-    = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
-  vr_save_area_size
-    = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
+  if (cfun->va_list_gpr_size)
+    gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
+			     cfun->va_list_gpr_size);
+  if (cfun->va_list_fpr_size)
+    vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
+			     * UNITS_PER_VREG, cfun->va_list_fpr_size);
 
   if (!TARGET_FLOAT)
     {
@@ -9844,7 +9846,8 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
 {
   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   CUMULATIVE_ARGS local_cum;
-  int gr_saved, vr_saved;
+  int gr_saved = cfun->va_list_gpr_size;
+  int vr_saved = cfun->va_list_fpr_size;
 
   /* The caller has advanced CUM up to, but not beyond, the last named
      argument.  Advance a local copy of CUM past the last "real" named
@@ -9852,9 +9855,14 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
   local_cum = *cum;
   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
 
-  /* Found out how many registers we need to save.  */
-  gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
-  vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
+  /* Found out how many registers we need to save.
+     Honor tree-stdvar analysis results.  */
+  if (cfun->va_list_gpr_size)
+    gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
+		    cfun->va_list_gpr_size / UNITS_PER_WORD);
+  if (cfun->va_list_fpr_size)
+    vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
+		    cfun->va_list_fpr_size / UNITS_PER_VREG);
 
   if (!TARGET_FLOAT)
     {
@@ -9882,7 +9890,7 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
 	  /* We can't use move_block_from_reg, because it will use
 	     the wrong mode, storing D regs only.  */
 	  machine_mode mode = TImode;
-	  int off, i;
+	  int off, i, vr_start;
 
 	  /* Set OFF to the offset from virtual_incoming_args_rtx of
 	     the first vector register.  The VR save area lies below
@@ -9891,14 +9899,15 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
 			   STACK_BOUNDARY / BITS_PER_UNIT);
 	  off -= vr_saved * UNITS_PER_VREG;
 
-	  for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
+	  vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
+	  for (i = 0; i < vr_saved; ++i)
 	    {
 	      rtx ptr, mem;
 
 	      ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
 	      mem = gen_frame_mem (mode, ptr);
 	      set_mem_alias_set (mem, get_varargs_alias_set ());
-	      aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
+	      aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
 	      off += UNITS_PER_VREG;
 	    }
 	}
diff --git a/gcc/testsuite/gcc.target/aarch64/va_arg_1.c b/gcc/testsuite/gcc.target/aarch64/va_arg_1.c
new file mode 100644
index 0000000..e8e3cda
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/va_arg_1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 --save-temps" } */
+
+int
+f (int a, ...)
+{
+  /* { dg-final { scan-assembler-not "str" } } */
+  return a;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/va_arg_2.c b/gcc/testsuite/gcc.target/aarch64/va_arg_2.c
new file mode 100644
index 0000000..f5c46cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/va_arg_2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 --save-temps" } */
+
+int
+foo (char *fmt, ...)
+{
+  int d;
+  __builtin_va_list ap;
+
+  __builtin_va_start (ap, fmt);
+  d = __builtin_va_arg (ap, int);
+  __builtin_va_end (ap);
+
+  /* { dg-final { scan-assembler-not "x7" } } */
+  return d;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/va_arg_3.c b/gcc/testsuite/gcc.target/aarch64/va_arg_3.c
new file mode 100644
index 0000000..7f7601a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/va_arg_3.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 --save-temps" } */
+
+int d2i (double a);
+
+int
+foo (char *fmt, ...)
+{
+  int d, e;
+  double f, g;
+  __builtin_va_list ap;
+
+  __builtin_va_start (ap, fmt);
+  d = __builtin_va_arg (ap, int);
+  f = __builtin_va_arg (ap, double);
+  g = __builtin_va_arg (ap, double);
+  d += d2i (f);
+  d += d2i (g);
+  __builtin_va_end (ap);
+
+  /* { dg-final { scan-assembler-not "x7" } } */
+  /* { dg-final { scan-assembler-not "q7" } } */
+  return d;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
-- 
1.9.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [AArch64][4/4] Simplify cfg during vaarg gimplification
       [not found]     ` <572CA834.2020800@foss.arm.com>
@ 2016-05-06 15:01       ` Jiong Wang
  0 siblings, 0 replies; 7+ messages in thread
From: Jiong Wang @ 2016-05-06 15:01 UTC (permalink / raw)
  To: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 1240 bytes --]

Based on patch [3/4], we can further optimize the vaarg gimplification
logic, this time not for redundant checks, but for redundant basic
blocks. Thus we can simplify the control graph and eventually generate
less branch instructions.

The current gimplification logic requires three basic blocks:

  // check if we already stepped into stack area
  if (vaarg_offset >= 0)
    {
      // we still in register area, but composite type will not
      // be passed partly in registers and partly on stack, make
      // sure the left register area is not left empty by composite
      // type. if it is, then skip them, and fetch from stack.
      if (vaarg_offset + arg_size > 0)
        fetch from stack
      else
        fetch from register
    }
else
   fetch from register

while we can further optimize the logic into the following to reduce BB
number into two:

if (vaarg_offset < 0 || (vaarg_offset + arg_size > 0))
    fetch from stack
  else
    fetch from register

OK for trunk?

2016-05-06 Alan Lawrence  <alan.lawrence@arm.com>
            Jiong Wang  <jiong.wang@arm.com>

gcc/
   * config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Use
   TRUTH_ORIF_EXPR.

gcc/testsuite/
   * gcc.target/aarch64/va_arg_5.c: New test.


[-- Attachment #2: 0004-4.patch --]
[-- Type: text/x-patch, Size: 4995 bytes --]

From d742eaa3469f28e4207034f3fe4ebd4d54b3dd42 Mon Sep 17 00:00:00 2001
From: "Jiong.Wang" <jiong.wang@arm.com>
Date: Fri, 6 May 2016 14:38:00 +0100
Subject: [PATCH 4/4] 4

---
 gcc/config/aarch64/aarch64.c                | 53 +++++++++++++++++++++--------
 gcc/testsuite/gcc.target/aarch64/va_arg_5.c | 20 +++++++++++
 2 files changed, 58 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_5.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 06904d5..bd4a9fe 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9577,7 +9577,32 @@ aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
 }
 
-/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
+/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.
+   The VA_ARG gimplify logic was:
+
+     // check if we already stepped into stack area
+     if (vaarg_offset >= 0)
+       {
+	 // we still in register area, but composite type will not
+	 // be passed partly in registers and partly on stack, make
+	 // sure the left register area is not left empty by composite
+	 // type. if it is, then skip them, and fetch from stack.
+	 if (vaarg_offset + arg_size > 0)
+	   fetch from stack
+	 else
+	   fetch from register
+       }
+    else
+      fetch from register
+
+   we can further optimize the logic into the following to reduce BB.
+
+     if (vaarg_offset < 0 || (vaarg_offset + arg_size > 0))
+       fetch from stack
+     else
+       fetch from register
+
+   the tree node TRUTH_ORIF_EXPR can express the condition we want.  */
 
 static tree
 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
@@ -9595,7 +9620,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
   tree stack, f_top, f_off, off, arg, roundup, on_stack;
   HOST_WIDE_INT size, rsize, adjust, align;
-  tree t, t1, u, cond1, cond2;
+  tree t, t1, u, cond1, pred1, pred2;
 
   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
   if (indirect_p)
@@ -9669,9 +9694,8 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
   off = get_initialized_tmp_var (f_off, pre_p, NULL);
 
   /* Emit code to branch if off >= 0.  */
-  t = build2 (GE_EXPR, boolean_type_node, off,
-	      build_int_cst (TREE_TYPE (off), 0));
-  cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
+  pred1 = build2 (GE_EXPR, boolean_type_node, off,
+		  build_int_cst (TREE_TYPE (off), 0));
 
   if (composite_type_p)
     {
@@ -9696,16 +9720,16 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
       if (roundup)
 	t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
 
-      /* [cond2] if (ap.__[g|v]r_offs > 0)  */
-      u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
-		  build_int_cst (TREE_TYPE (f_off), 0));
-      cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
+      /* [pred2] if (ap.__[g|v]r_offs > 0)  */
+      pred2 = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
+		      build_int_cst (TREE_TYPE (f_off), 0));
+      pred2 = build2 (COMPOUND_EXPR, TREE_TYPE (pred2), t, pred2);
 
-      /* String up: make sure the assignment happens before the use.  */
-      t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
-      COND_EXPR_ELSE (cond1) = t;
+      pred1 = build2 (TRUTH_ORIF_EXPR, boolean_type_node, pred1, pred2);
     }
 
+  cond1 = build3 (COND_EXPR, ptr_type_node, pred1, NULL_TREE, NULL_TREE);
+
   /* Prepare the trees handling the argument that is passed on the stack;
      the top level node will store in ON_STACK.  */
   arg = get_initialized_tmp_var (stack, pre_p, NULL);
@@ -9746,8 +9770,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
 
   if (composite_type_p)
     {
-      COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
-      COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
+      COND_EXPR_THEN (cond1) = on_stack;
 
       t = off;
     }
@@ -9854,7 +9877,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
     }
 
   if (composite_type_p)
-    COND_EXPR_ELSE (cond2) = t;
+    COND_EXPR_ELSE (cond1) = t;
   else
     {
       t1 = build2 (PLUS_EXPR, TREE_TYPE (off), roundup,
diff --git a/gcc/testsuite/gcc.target/aarch64/va_arg_5.c b/gcc/testsuite/gcc.target/aarch64/va_arg_5.c
new file mode 100644
index 0000000..0d6daef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/va_arg_5.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fdump-tree-lower_vaarg" } */
+
+typedef struct A {
+    float a;
+} T;
+
+T
+foo (char *fmt, ...)
+{
+  T a;
+  __builtin_va_list ap;
+
+  __builtin_va_start (ap, fmt);
+  a = __builtin_va_arg (ap, T);
+  __builtin_va_end (ap);
+
+  /* { dg-final { scan-tree-dump-times "ap.__stack =" 1 "lower_vaarg"} } */
+  return a;
+}
-- 
1.9.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [AArch64][1/4] Enable tree-stdarg pass for AArch64 by defining counter fields
  2016-05-06 15:00 ` [AArch64][1/4] Enable tree-stdarg pass for AArch64 by defining counter fields Jiong Wang
@ 2016-05-26 14:16   ` James Greenhalgh
  0 siblings, 0 replies; 7+ messages in thread
From: James Greenhalgh @ 2016-05-26 14:16 UTC (permalink / raw)
  To: Jiong Wang; +Cc: GCC Patches, nd

On Fri, May 06, 2016 at 04:00:13PM +0100, Jiong Wang wrote:
> This patch initialize va_list_gpr_counter_field and
> va_list_fpr_counter_field properly for AArch64 backend that tree-stdarg
> pass will be enabled.
> 
> The "required register" analysis is largely target independent, but the
> user might operate on the inner offset field in vaarg structure directly,
> for example:
> 
>   d = __builtin_va_arg (ap, int);
>   ap.__gr_offs += 0x20;
>   e = __builtin_va_arg (ap, int);
> 
> in which case tree-stdarg require us to tell him what's the backend offset
> field inside vaarg structure that it can still figure out we actually need
> to save 6 general registers.
> 
> ok for upstream?

I have a small comment issue for you to fix, otherwise this is OK.

> 2016-05-06  Jiong Wang  <jiong.wang@arm.com>
> gcc/
>   * config/aarch64/aarch64.c (aarch64_build_builtin_va_list): Initialize
>   va_list_gpr_counter_field and va_list_fpr_counter_field.
> 
> gcc/testsuite/
>   * gcc.dg/tree-ssa/stdarg-2.c: Enable all testcases for AArch64.
>   * gcc.dg/tree-ssa/stdarg-3.c: Likewise.
>   * gcc.dg/tree-ssa/stdarg-4.c: Likewise.
>   * gcc.dg/tree-ssa/stdarg-5.c: Likewise.
>   * gcc.dg/tree-ssa/stdarg-6.c: Likewise.
> 
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 9995494..aff4a95 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -9463,6 +9463,13 @@ aarch64_build_builtin_va_list (void)
>  			FIELD_DECL, get_identifier ("__vr_offs"),
>  			integer_type_node);
>  
> +  /* Tell tree-stdarg pass what's our internal offset fields.

This doesn't read quite right, how about something like:

  "Tell tree-stdarg pass about our internal offset fields."

Thanks,
James

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [AArch64][2/4] PR63596, honor tree-stdarg analysis result to improve VAARG codegen
  2016-05-06 15:00   ` [AArch64][2/4] PR63596, honor tree-stdarg analysis result to improve VAARG codegen Jiong Wang
@ 2016-05-26 14:48     ` James Greenhalgh
  0 siblings, 0 replies; 7+ messages in thread
From: James Greenhalgh @ 2016-05-26 14:48 UTC (permalink / raw)
  To: Jiong Wang; +Cc: GCC Patches, nd

On Fri, May 06, 2016 at 04:00:28PM +0100, Jiong Wang wrote:
> This patch fixes PR63596.
> 
> There is no need to push/pop all arguments registers. We only need to
> push and pop those registers used. These use info is calculated by a
> dedicated vaarg optimization tree pass "tree-stdarg", the backend should
> honor it's analysis result.
> 
> For a simple testcase where vaarg declared but actually not used:
> 
> int
> f (int a, ...)
> {
>   return a;
> }
> 
> before this patch, we are generating:
> 
> f:
>         sub     sp, sp, #192
>         stp     x1, x2, [sp, 136]
>         stp     x3, x4, [sp, 152]
> 	stp     x5, x6, [sp, 168]
>         str     x7, [sp, 184]
>         str     q0, [sp]
>         str     q1, [sp, 16]
>         str     q2, [sp, 32]
>         str     q3, [sp, 48]
> 	str     q4, [sp, 64]
>         str     q5, [sp, 80]
>         str     q6, [sp, 96]
>         str     q7, [sp, 112]
> 	add     sp, sp, 192
> 	ret
> 
> after this patch, it's optimized into:
> 
> f:
>   	ret

Can't argue with that! Nice!

> OK for trunk?

OK.

Thanks,
James


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [AArch64][3/4] Don't generate redundant checks when there is no composite arg
  2016-05-06 15:00     ` [AArch64][3/4] Don't generate redundant checks when there is no composite arg Jiong Wang
@ 2016-05-31 19:20       ` James Greenhalgh
  0 siblings, 0 replies; 7+ messages in thread
From: James Greenhalgh @ 2016-05-31 19:20 UTC (permalink / raw)
  To: Jiong Wang; +Cc: GCC Patches, nd, marcus.shawcroft, richard.earnshaw

On Fri, May 06, 2016 at 04:00:40PM +0100, Jiong Wang wrote:
> 2016-05-06  Jiong Wang  <jiong.wang@arm.com>
> 
> gcc/
>   * config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Avoid
>   duplicated check code.
> 
> gcc/testsuite/
>   * gcc.target/aarch64/va_arg_4.c: New testcase.

I wonder whether this is safe for the int128_t/uint128_t data types and
overaligned data types? My concern is that something with alignment of
16-bytes may still need the check to skip the final hard register slot
on the stack.

I'm not sure here, so I'll need some more time to think this through, or
for Richard or Marcus to review this and help me understand why there
is nothing to worry about.

Thanks,
James

> From b92a4c4b8e52a9a952e91f307836022f667ab403 Mon Sep 17 00:00:00 2001
> From: "Jiong.Wang" <jiong.wang@arm.com>
> Date: Fri, 6 May 2016 14:37:37 +0100
> Subject: [PATCH 3/4] 3
> 
> ---
>  gcc/config/aarch64/aarch64.c                | 94 ++++++++++++++++++++---------
>  gcc/testsuite/gcc.target/aarch64/va_arg_4.c | 23 +++++++
>  2 files changed, 87 insertions(+), 30 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_4.c
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index b1a0287..06904d5 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -9587,6 +9587,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
>    bool indirect_p;
>    bool is_ha;		/* is HFA or HVA.  */
>    bool dw_align;	/* double-word align.  */
> +  bool composite_type_p;
>    machine_mode ag_mode = VOIDmode;
>    int nregs;
>    machine_mode mode;
> @@ -9594,13 +9595,14 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
>    tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
>    tree stack, f_top, f_off, off, arg, roundup, on_stack;
>    HOST_WIDE_INT size, rsize, adjust, align;
> -  tree t, u, cond1, cond2;
> +  tree t, t1, u, cond1, cond2;
>  
>    indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
>    if (indirect_p)
>      type = build_pointer_type (type);
>  
>    mode = TYPE_MODE (type);
> +  composite_type_p = aarch64_composite_type_p (type, mode);
>  
>    f_stack = TYPE_FIELDS (va_list_type_node);
>    f_grtop = DECL_CHAIN (f_stack);
> @@ -9671,35 +9673,38 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
>  	      build_int_cst (TREE_TYPE (off), 0));
>    cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
>  
> -  if (dw_align)
> +  if (composite_type_p)
>      {
> -      /* Emit: offs = (offs + 15) & -16.  */
> -      t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
> -		  build_int_cst (TREE_TYPE (off), 15));
> -      t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
> -		  build_int_cst (TREE_TYPE (off), -16));
> -      roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
> -    }
> -  else
> -    roundup = NULL;
> +      if (dw_align)
> +	{
> +	  /* Emit: offs = (offs + 15) & -16.  */
> +	  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
> +		      build_int_cst (TREE_TYPE (off), 15));
> +	  t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
> +		      build_int_cst (TREE_TYPE (off), -16));
> +	  roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
> +	}
> +      else
> +	roundup = NULL;
>  
> -  /* Update ap.__[g|v]r_offs  */
> -  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
> -	      build_int_cst (TREE_TYPE (off), rsize));
> -  t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
> +      /* Update ap.__[g|v]r_offs  */
> +      t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
> +		  build_int_cst (TREE_TYPE (off), rsize));
> +      t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
>  
> -  /* String up.  */
> -  if (roundup)
> -    t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
> +      /* String up.  */
> +      if (roundup)
> +	t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
>  
> -  /* [cond2] if (ap.__[g|v]r_offs > 0)  */
> -  u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
> -	      build_int_cst (TREE_TYPE (f_off), 0));
> -  cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
> +      /* [cond2] if (ap.__[g|v]r_offs > 0)  */
> +      u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
> +		  build_int_cst (TREE_TYPE (f_off), 0));
> +      cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
>  
> -  /* String up: make sure the assignment happens before the use.  */
> -  t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
> -  COND_EXPR_ELSE (cond1) = t;
> +      /* String up: make sure the assignment happens before the use.  */
> +      t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
> +      COND_EXPR_ELSE (cond1) = t;
> +    }
>  
>    /* Prepare the trees handling the argument that is passed on the stack;
>       the top level node will store in ON_STACK.  */
> @@ -9739,13 +9744,34 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
>      on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
>    }
>  
> -  COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
> -  COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
> +  if (composite_type_p)
> +    {
> +      COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
> +      COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
> +
> +      t = off;
> +    }
> +  else
> +    {
> +      COND_EXPR_THEN (cond1) = on_stack;
> +      if (dw_align)
> +	{
> +	  /* Emit: offs = (offs + 15) & -16.  */
> +	  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
> +		      build_int_cst (TREE_TYPE (off), 15));
> +	  t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
> +		      build_int_cst (TREE_TYPE (off), -16));
> +	  roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
> +	}
> +      else
> +	roundup = off;
> +
> +      t = roundup;
> +    }
>  
>    /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
> -  t = off;
>    if (adjust)
> -    t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
> +    t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), composite_type_p ? off : t,
>  		build_int_cst (TREE_TYPE (off), adjust));
>  
>    t = fold_convert (sizetype, t);
> @@ -9827,7 +9853,15 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
>        t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
>      }
>  
> -  COND_EXPR_ELSE (cond2) = t;
> +  if (composite_type_p)
> +    COND_EXPR_ELSE (cond2) = t;
> +  else
> +    {
> +      t1 = build2 (PLUS_EXPR, TREE_TYPE (off), roundup,
> +		   build_int_cst (TREE_TYPE (off), rsize));
> +      t1 = build2 (MODIFY_EXPR, TREE_TYPE (f_off), f_off, t1);
> +      COND_EXPR_ELSE (cond1) = build2 (COMPOUND_EXPR, TREE_TYPE (t), t1, t);
> +    }
>    addr = fold_convert (build_pointer_type (type), cond1);
>    addr = build_va_arg_indirect_ref (addr);
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/va_arg_4.c b/gcc/testsuite/gcc.target/aarch64/va_arg_4.c
> new file mode 100644
> index 0000000..35232c9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/va_arg_4.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -fdump-tree-lower_vaarg" } */
> +
> +int d2i (double a);
> +
> +int
> +foo(char *fmt, ...)
> +{
> +  int d, e;
> +  double f, g;
> +  __builtin_va_list ap;
> +
> +  __builtin_va_start (ap, fmt);
> +  d = __builtin_va_arg (ap, int);
> +  f = __builtin_va_arg (ap, double);
> +  g = __builtin_va_arg (ap, double);
> +  d += d2i (f);
> +  d += d2i (g);
> +  __builtin_va_end (ap);
> +
> +  /* { dg-final { scan-tree-dump-times "ap.__stack =" 3 "lower_vaarg"} } */
> +  return d;
> +}
> -- 
> 1.9.1
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-05-31 17:07 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <572CA45D.6060706@foss.arm.com>
2016-05-06 15:00 ` [AArch64][1/4] Enable tree-stdarg pass for AArch64 by defining counter fields Jiong Wang
2016-05-26 14:16   ` James Greenhalgh
     [not found] ` <572CA702.9020808@foss.arm.com>
2016-05-06 15:00   ` [AArch64][2/4] PR63596, honor tree-stdarg analysis result to improve VAARG codegen Jiong Wang
2016-05-26 14:48     ` James Greenhalgh
     [not found]   ` <572CA787.8050107@foss.arm.com>
2016-05-06 15:00     ` [AArch64][3/4] Don't generate redundant checks when there is no composite arg Jiong Wang
2016-05-31 19:20       ` James Greenhalgh
     [not found]     ` <572CA834.2020800@foss.arm.com>
2016-05-06 15:01       ` [AArch64][4/4] Simplify cfg during vaarg gimplification Jiong Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).