public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 10/12] [i386] Add ms2sysv pro/epilogue stubs to libgcc
  2017-04-27  8:05 [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
                   ` (5 preceding siblings ...)
  2017-04-27  8:05 ` [PATCH 02/12] [i386] Keep stack pointer valid after after re-alignment Daniel Santos
@ 2017-04-27  8:05 ` Daniel Santos
  2017-04-27  8:05 ` [PATCH 08/12] [i386] Modify ix86_compute_frame_layout for -mcall-ms2sysv-xlogues Daniel Santos
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 41+ messages in thread
From: Daniel Santos @ 2017-04-27  8:05 UTC (permalink / raw)
  To: gcc-patches, Uros Bizjak, Jan Hubicka

Add new header libgcc/config/i386/i386-asm.h to manage common cpp and
gas macros.  Add new stubs.  Stubs use the following naming convention:

  __<sav|res>ms64[f][x]_<n>

    <sav|res>   Save or restore
    ms64        Avoid possible name collisions with future stubs
		(specific to 64-bit msabi --> sysv scenario)
    [f]         Variant for hard frame pointer (and stack realignment)
    [x]         Tail-call variant (is the return from function)
    <n>         The number of registers to save.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
 libgcc/config.host             |  2 +-
 libgcc/config/i386/i386-asm.h  | 82 ++++++++++++++++++++++++++++++++++++++++++
 libgcc/config/i386/resms64.S   | 57 +++++++++++++++++++++++++++++
 libgcc/config/i386/resms64f.S  | 55 ++++++++++++++++++++++++++++
 libgcc/config/i386/resms64fx.S | 57 +++++++++++++++++++++++++++++
 libgcc/config/i386/resms64x.S  | 59 ++++++++++++++++++++++++++++++
 libgcc/config/i386/savms64.S   | 57 +++++++++++++++++++++++++++++
 libgcc/config/i386/savms64f.S  | 55 ++++++++++++++++++++++++++++
 libgcc/config/i386/t-msabi     |  7 ++++
 9 files changed, 430 insertions(+), 1 deletion(-)
 create mode 100644 libgcc/config/i386/i386-asm.h
 create mode 100644 libgcc/config/i386/resms64.S
 create mode 100644 libgcc/config/i386/resms64f.S
 create mode 100644 libgcc/config/i386/resms64fx.S
 create mode 100644 libgcc/config/i386/resms64x.S
 create mode 100644 libgcc/config/i386/savms64.S
 create mode 100644 libgcc/config/i386/savms64f.S
 create mode 100644 libgcc/config/i386/t-msabi

diff --git a/libgcc/config.host b/libgcc/config.host
index b279a6458f9..b6d10951f3f 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -1351,7 +1351,7 @@ case ${host} in
 i[34567]86-*-linux* | x86_64-*-linux* | \
   i[34567]86-*-kfreebsd*-gnu | x86_64-*-kfreebsd*-gnu | \
   i[34567]86-*-gnu*)
-	tmake_file="${tmake_file} t-tls i386/t-linux t-slibgcc-libgcc"
+	tmake_file="${tmake_file} t-tls i386/t-linux i386/t-msabi t-slibgcc-libgcc"
 	if test "$libgcc_cv_cfi" = "yes"; then
 		tmake_file="${tmake_file} t-stack i386/t-stack-i386"
 	fi
diff --git a/libgcc/config/i386/i386-asm.h b/libgcc/config/i386/i386-asm.h
new file mode 100644
index 00000000000..c613e9fd83d
--- /dev/null
+++ b/libgcc/config/i386/i386-asm.h
@@ -0,0 +1,82 @@
+/* Defines common perprocessor and assembly macros for use by various stubs.
+   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+   Contributed by Daniel Santos <daniel.santos@pobox.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef I386_ASM_H
+#define I386_ASM_H
+
+#ifdef __ELF__
+# define ELFFN(fn) .type fn,@function
+#else
+# define ELFFN(fn)
+#endif
+
+#define FUNC_START(fn)	\
+	.global fn;	\
+	ELFFN (fn);	\
+fn:
+
+#define HIDDEN_FUNC(fn)\
+	FUNC_START (fn)	\
+	.hidden fn;	\
+
+#define FUNC_END(fn) .size fn,.-fn
+
+#ifdef __SSE2__
+# ifdef __AVX__
+#  define MOVAPS vmovaps
+# else
+#  define MOVAPS movaps
+# endif
+
+/* Save SSE registers 6-15. off is the offset of rax to get to xmm6.  */
+.macro SSE_SAVE off=0
+	MOVAPS %xmm15,(\off - 0x90)(%rax)
+	MOVAPS %xmm14,(\off - 0x80)(%rax)
+	MOVAPS %xmm13,(\off - 0x70)(%rax)
+	MOVAPS %xmm12,(\off - 0x60)(%rax)
+	MOVAPS %xmm11,(\off - 0x50)(%rax)
+	MOVAPS %xmm10,(\off - 0x40)(%rax)
+	MOVAPS %xmm9, (\off - 0x30)(%rax)
+	MOVAPS %xmm8, (\off - 0x20)(%rax)
+	MOVAPS %xmm7, (\off - 0x10)(%rax)
+	MOVAPS %xmm6, \off(%rax)
+.endm
+
+/* Restore SSE registers 6-15. off is the offset of rsi to get to xmm6.  */
+.macro SSE_RESTORE off=0
+	MOVAPS (\off - 0x90)(%rsi), %xmm15
+	MOVAPS (\off - 0x80)(%rsi), %xmm14
+	MOVAPS (\off - 0x70)(%rsi), %xmm13
+	MOVAPS (\off - 0x60)(%rsi), %xmm12
+	MOVAPS (\off - 0x50)(%rsi), %xmm11
+	MOVAPS (\off - 0x40)(%rsi), %xmm10
+	MOVAPS (\off - 0x30)(%rsi), %xmm9
+	MOVAPS (\off - 0x20)(%rsi), %xmm8
+	MOVAPS (\off - 0x10)(%rsi), %xmm7
+	MOVAPS \off(%rsi), %xmm6
+.endm
+
+#endif /* __SSE2__ */
+#endif /* I386_ASM_H */
diff --git a/libgcc/config/i386/resms64.S b/libgcc/config/i386/resms64.S
new file mode 100644
index 00000000000..f47e2f066fb
--- /dev/null
+++ b/libgcc/config/i386/resms64.S
@@ -0,0 +1,57 @@
+/* Epilogue stub for 64-bit ms/sysv clobbers: restore
+   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+   Contributed by Daniel Santos <daniel.santos@pobox.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef __x86_64__
+#include "i386-asm.h"
+
+/* Epilogue routine for restoring 64-bit ms/sysv registers.  */
+
+	.text
+HIDDEN_FUNC(__resms64_18)
+	mov	-0x70(%rsi),%r15
+HIDDEN_FUNC(__resms64_17)
+	mov	-0x68(%rsi),%r14
+HIDDEN_FUNC(__resms64_16)
+	mov	-0x60(%rsi),%r13
+HIDDEN_FUNC(__resms64_15)
+	mov	-0x58(%rsi),%r12
+HIDDEN_FUNC(__resms64_14)
+	mov	-0x50(%rsi),%rbp
+HIDDEN_FUNC(__resms64_13)
+	mov	-0x48(%rsi),%rbx
+HIDDEN_FUNC(__resms64_12)
+	mov	-0x40(%rsi),%rdi
+	SSE_RESTORE off=0x60
+	mov	-0x38(%rsi),%rsi
+	ret
+FUNC_END(__resms64_12)
+FUNC_END(__resms64_13)
+FUNC_END(__resms64_14)
+FUNC_END(__resms64_15)
+FUNC_END(__resms64_16)
+FUNC_END(__resms64_17)
+FUNC_END(__resms64_18)
+
+#endif /* __x86_64__ */
diff --git a/libgcc/config/i386/resms64f.S b/libgcc/config/i386/resms64f.S
new file mode 100644
index 00000000000..817da60cf15
--- /dev/null
+++ b/libgcc/config/i386/resms64f.S
@@ -0,0 +1,55 @@
+/* Epilogue stub for 64-bit ms/sysv clobbers: restore (with hard frame pointer)
+   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+   Contributed by Daniel Santos <daniel.santos@pobox.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef __x86_64__
+#include "i386-asm.h"
+
+/* Epilogue routine for restoring 64-bit ms/sysv registers when hard frame
+   pointer is used.  */
+
+	.text
+HIDDEN_FUNC(__resms64f_17)
+	mov	-0x68(%rsi),%r15
+HIDDEN_FUNC(__resms64f_16)
+	mov	-0x60(%rsi),%r14
+HIDDEN_FUNC(__resms64f_15)
+	mov	-0x58(%rsi),%r13
+HIDDEN_FUNC(__resms64f_14)
+	mov	-0x50(%rsi),%r12
+HIDDEN_FUNC(__resms64f_13)
+	mov	-0x48(%rsi),%rbx
+HIDDEN_FUNC(__resms64f_12)
+	mov	-0x40(%rsi),%rdi
+	SSE_RESTORE off=0x60
+	mov	-0x38(%rsi),%rsi
+	ret
+FUNC_END(__resms64f_12)
+FUNC_END(__resms64f_13)
+FUNC_END(__resms64f_14)
+FUNC_END(__resms64f_15)
+FUNC_END(__resms64f_16)
+FUNC_END(__resms64f_17)
+
+#endif /* __x86_64__ */
diff --git a/libgcc/config/i386/resms64fx.S b/libgcc/config/i386/resms64fx.S
new file mode 100644
index 00000000000..5dba5848dee
--- /dev/null
+++ b/libgcc/config/i386/resms64fx.S
@@ -0,0 +1,57 @@
+/* Epilogue stub for 64-bit ms/sysv clobbers: restore, leave and return
+   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+   Contributed by Daniel Santos <daniel.santos@pobox.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef __x86_64__
+#include "i386-asm.h"
+
+/* Epilogue routine for 64-bit ms/sysv registers when hard frame pointer
+ * used -- restores registers, restores frame pointer and then returns
+ * from the function.  */
+
+	.text
+HIDDEN_FUNC(__resms64fx_17)
+	mov	-0x68(%rsi),%r15
+HIDDEN_FUNC(__resms64fx_16)
+	mov	-0x60(%rsi),%r14
+HIDDEN_FUNC(__resms64fx_15)
+	mov	-0x58(%rsi),%r13
+HIDDEN_FUNC(__resms64fx_14)
+	mov	-0x50(%rsi),%r12
+HIDDEN_FUNC(__resms64fx_13)
+	mov	-0x48(%rsi),%rbx
+HIDDEN_FUNC(__resms64fx_12)
+	mov	-0x40(%rsi),%rdi
+	SSE_RESTORE off=0x60
+	mov	-0x38(%rsi),%rsi
+	leaveq
+	ret
+FUNC_END(__resms64fx_12)
+FUNC_END(__resms64fx_13)
+FUNC_END(__resms64fx_14)
+FUNC_END(__resms64fx_15)
+FUNC_END(__resms64fx_16)
+FUNC_END(__resms64fx_17)
+
+#endif /* __x86_64__ */
diff --git a/libgcc/config/i386/resms64x.S b/libgcc/config/i386/resms64x.S
new file mode 100644
index 00000000000..7770447cf38
--- /dev/null
+++ b/libgcc/config/i386/resms64x.S
@@ -0,0 +1,59 @@
+/* Epilogue stub for 64-bit ms/sysv clobbers: restore and return
+   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+   Contributed by Daniel Santos <daniel.santos@pobox.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef __x86_64__
+#include "i386-asm.h"
+
+/* Epilogue routine for restoring 64-bit ms/sysv registers and returning from
+ * function.  */
+
+	.text
+HIDDEN_FUNC(__resms64x_18)
+	mov	-0x70(%rsi),%r15
+HIDDEN_FUNC(__resms64x_17)
+	mov	-0x68(%rsi),%r14
+HIDDEN_FUNC(__resms64x_16)
+	mov	-0x60(%rsi),%r13
+HIDDEN_FUNC(__resms64x_15)
+	mov	-0x58(%rsi),%r12
+HIDDEN_FUNC(__resms64x_14)
+	mov	-0x50(%rsi),%rbp
+HIDDEN_FUNC(__resms64x_13)
+	mov	-0x48(%rsi),%rbx
+HIDDEN_FUNC(__resms64x_12)
+	mov	-0x40(%rsi),%rdi
+	SSE_RESTORE off=0x60
+	mov	-0x38(%rsi),%rsi
+	mov	%r10,%rsp
+	ret
+FUNC_END(__resms64x_12)
+FUNC_END(__resms64x_13)
+FUNC_END(__resms64x_14)
+FUNC_END(__resms64x_15)
+FUNC_END(__resms64x_16)
+FUNC_END(__resms64x_17)
+FUNC_END(__resms64x_18)
+
+#endif /* __x86_64__ */
diff --git a/libgcc/config/i386/savms64.S b/libgcc/config/i386/savms64.S
new file mode 100644
index 00000000000..2067dd8614f
--- /dev/null
+++ b/libgcc/config/i386/savms64.S
@@ -0,0 +1,57 @@
+/* Prologue stub for 64-bit ms/sysv clobbers: save
+   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+   Contributed by Daniel Santos <daniel.santos@pobox.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef __x86_64__
+#include "i386-asm.h"
+
+/* Prologue routine for saving 64-bit ms/sysv registers.  */
+
+	.text
+HIDDEN_FUNC(__savms64_18)
+	mov	%r15,-0x70(%rax)
+HIDDEN_FUNC(__savms64_17)
+	mov	%r14,-0x68(%rax)
+HIDDEN_FUNC(__savms64_16)
+	mov	%r13,-0x60(%rax)
+HIDDEN_FUNC(__savms64_15)
+	mov	%r12,-0x58(%rax)
+HIDDEN_FUNC(__savms64_14)
+	mov	%rbp,-0x50(%rax)
+HIDDEN_FUNC(__savms64_13)
+	mov	%rbx,-0x48(%rax)
+HIDDEN_FUNC(__savms64_12)
+	mov	%rdi,-0x40(%rax)
+	mov	%rsi,-0x38(%rax)
+	SSE_SAVE off=0x60
+	ret
+FUNC_END(__savms64_12)
+FUNC_END(__savms64_13)
+FUNC_END(__savms64_14)
+FUNC_END(__savms64_15)
+FUNC_END(__savms64_16)
+FUNC_END(__savms64_17)
+FUNC_END(__savms64_18)
+
+#endif /* __x86_64__ */
diff --git a/libgcc/config/i386/savms64f.S b/libgcc/config/i386/savms64f.S
new file mode 100644
index 00000000000..81583b6eb68
--- /dev/null
+++ b/libgcc/config/i386/savms64f.S
@@ -0,0 +1,55 @@
+/* Prologue stub for 64-bit ms/sysv clobbers: save (with hard frame pointer)
+   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+   Contributed by Daniel Santos <daniel.santos@pobox.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef __x86_64__
+#include "i386-asm.h"
+
+/* Prologue routine for saving 64-bit ms/sysv registers when realignment is
+ * needed or hard frame pointer used.  */
+
+	.text
+HIDDEN_FUNC(__savms64f_17)
+	mov	%r15,-0x68(%rax)
+HIDDEN_FUNC(__savms64f_16)
+	mov	%r14,-0x60(%rax)
+HIDDEN_FUNC(__savms64f_15)
+	mov	%r13,-0x58(%rax)
+HIDDEN_FUNC(__savms64f_14)
+	mov	%r12,-0x50(%rax)
+HIDDEN_FUNC(__savms64f_13)
+	mov	%rbx,-0x48(%rax)
+HIDDEN_FUNC(__savms64f_12)
+	mov	%rdi,-0x40(%rax)
+	mov	%rsi,-0x38(%rax)
+	SSE_SAVE off=0x60
+	ret
+FUNC_END(__savms64f_12)
+FUNC_END(__savms64f_13)
+FUNC_END(__savms64f_14)
+FUNC_END(__savms64f_15)
+FUNC_END(__savms64f_16)
+FUNC_END(__savms64f_17)
+
+#endif /* __x86_64__ */
diff --git a/libgcc/config/i386/t-msabi b/libgcc/config/i386/t-msabi
new file mode 100644
index 00000000000..f9806a611aa
--- /dev/null
+++ b/libgcc/config/i386/t-msabi
@@ -0,0 +1,7 @@
+# Makefile fragment to support -mcall-ms2sysv-xlogues
+LIB2ADD_ST += $(srcdir)/config/i386/savms64.S \
+	      $(srcdir)/config/i386/resms64.S \
+	      $(srcdir)/config/i386/resms64x.S \
+	      $(srcdir)/config/i386/savms64f.S \
+	      $(srcdir)/config/i386/resms64f.S \
+	      $(srcdir)/config/i386/resms64fx.S
-- 
2.11.0

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCH 05/12] [i386] Add option -mcall-ms2sysv-xlogues
  2017-04-27  8:05 [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
                   ` (3 preceding siblings ...)
  2017-04-27  8:05 ` [PATCH 03/12] [i386] Use re-aligned stack pointer for aligned SSE movs Daniel Santos
@ 2017-04-27  8:05 ` Daniel Santos
  2017-04-28  6:00   ` Sandra Loosemore
  2017-04-27  8:05 ` [PATCH 02/12] [i386] Keep stack pointer valid after after re-alignment Daniel Santos
                   ` (9 subsequent siblings)
  14 siblings, 1 reply; 41+ messages in thread
From: Daniel Santos @ 2017-04-27  8:05 UTC (permalink / raw)
  To: gcc-patches, Uros Bizjak, Jan Hubicka, Gerald Pfeifer,
	Joseph Myers, Sandra Loosemore

Adds the options -mcall-ms2sysv-xlogues to i386.opt and i386.c and
documentation to invoke.texi.  Using -mcall-ms2sysv-xlogues on SEH
targets is currently unsupported and will result in a sorry ().  SEH
targets can be supported, but it would require adding support for
CFA_REG_EXPRESSION to the SEH unwind emit code in
gcc/config/i386/winnt.c -- this is the same for use of aligned SSE MOVs
after a realigned stack pointer.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
 gcc/config/i386/i386.c   |  6 +++++-
 gcc/config/i386/i386.opt |  4 ++++
 gcc/doc/invoke.texi      | 13 ++++++++++++-
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 113f83742c2..521116195cb 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -4508,7 +4508,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
     { "-mstv",				MASK_STV },
     { "-mavx256-split-unaligned-load",	MASK_AVX256_SPLIT_UNALIGNED_LOAD },
     { "-mavx256-split-unaligned-store",	MASK_AVX256_SPLIT_UNALIGNED_STORE },
-    { "-mprefer-avx128",		MASK_PREFER_AVX128 }
+    { "-mprefer-avx128",		MASK_PREFER_AVX128 },
+    { "-mcall-ms2sysv-xlogues",		MASK_CALL_MS2SYSV_XLOGUES }
   };
 
   /* Additional flag options.  */
@@ -6319,6 +6320,9 @@ ix86_option_override_internal (bool main_args_p,
 #endif
    }
 
+  if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES)
+    sorry ("-mcall-ms2sysv-xlogues isn%'t currently supported with SEH");
+
   if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
     opts->x_target_flags |= MASK_VZEROUPPER;
   if (!(opts_set->x_target_flags & MASK_STV))
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 9384e29b1de..65b228544a5 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -538,6 +538,10 @@ Enum(calling_abi) String(sysv) Value(SYSV_ABI)
 EnumValue
 Enum(calling_abi) String(ms) Value(MS_ABI)
 
+mcall-ms2sysv-xlogues
+Target Report Mask(CALL_MS2SYSV_XLOGUES) Save
+Use libgcc stubs to save and restore registers clobbered by 64-bit Microsoft to System V ABI calls.
+
 mveclibabi=
 Target RejectNegative Joined Var(ix86_veclibabi_type) Enum(ix86_veclibabi) Init(ix86_veclibabi_type_none)
 Vector library ABI to use.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 0eeea7b3b87..c9e565a9216 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1209,7 +1209,7 @@ See RS/6000 and PowerPC Options.
 -msse2avx  -mfentry  -mrecord-mcount  -mnop-mcount  -m8bit-idiv @gol
 -mavx256-split-unaligned-load  -mavx256-split-unaligned-store @gol
 -malign-data=@var{type}  -mstack-protector-guard=@var{guard} @gol
--mmitigate-rop  -mgeneral-regs-only}
+-mmitigate-rop  -mgeneral-regs-only  -mcall-ms2sysv-xlogues}
 
 @emph{x86 Windows Options}
 @gccoptlist{-mconsole  -mcygwin  -mno-cygwin  -mdll @gol
@@ -25308,6 +25308,17 @@ You can control this behavior for specific functions by
 using the function attributes @code{ms_abi} and @code{sysv_abi}.
 @xref{Function Attributes}.
 
+@item -mcall-ms2sysv-xlogues
+@opindex mcall-ms2sysv-xlogues
+@opindex mno-call-ms2sysv-xlogues
+Due to differences in 64-bit ABIs, any Microsoft ABI function that calls a
+System V ABI function must consider RSI, RDI and XMM6-15 as clobbered.  By
+default, the code for saving and restoring these registers is emitted inline,
+resulting in fairly lengthy prologues and epilogues.  Using
+@option{-mcall-ms2sysv-xlogues} emits prologues and epilogues that
+use stubs in the static portion of libgcc to perform these saves & restores,
+thus reducing function size at the cost of a few extra instructions.
+
 @item -mtls-dialect=@var{type}
 @opindex mtls-dialect
 Generate code to access thread-local storage using the @samp{gnu} or
-- 
2.11.0

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCH 01/12] [i386] Re-align stack frame prior to SSE saves.
  2017-04-27  8:05 [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
                   ` (7 preceding siblings ...)
  2017-04-27  8:05 ` [PATCH 08/12] [i386] Modify ix86_compute_frame_layout for -mcall-ms2sysv-xlogues Daniel Santos
@ 2017-04-27  8:05 ` Daniel Santos
  2017-04-27  8:23 ` [PATCH 04/12] [i386] Minor refactoring Daniel Santos
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 41+ messages in thread
From: Daniel Santos @ 2017-04-27  8:05 UTC (permalink / raw)
  To: gcc-patches, Uros Bizjak, Jan Hubicka

Add new fields to struct ix86_frame to track where we started the stack
re-alignment and what we need to allocate prior to re-alignment.  In
ix86_compute_frame_layout, we do the stack frame re-alignment
computation prior to computing the SSE save area so that it we have an
aligned SSE save area.

This new also assures that the SSE save area is properly aligned when
DRAP is used.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
 gcc/config/i386/i386.c | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d9856573db7..31f69c92968 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2455,7 +2455,7 @@ struct GTY(()) stack_local_entry {
    [saved regs]
 					<- regs_save_offset
    [padding0]
-
+					<- stack_realign_offset
    [saved SSE regs]
 					<- sse_regs_save_offset
    [padding1]          |
@@ -2481,6 +2481,8 @@ struct ix86_frame
   HOST_WIDE_INT stack_pointer_offset;
   HOST_WIDE_INT hfp_save_offset;
   HOST_WIDE_INT reg_save_offset;
+  HOST_WIDE_INT stack_realign_allocate_offset;
+  HOST_WIDE_INT stack_realign_offset;
   HOST_WIDE_INT sse_reg_save_offset;
 
   /* When save_regs_using_mov is set, emit prologue using
@@ -12636,28 +12638,36 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
   if (TARGET_SEH)
     frame->hard_frame_pointer_offset = offset;
 
+  /* When re-aligning the stack frame, but not saving SSE registers, this
+     is the offset we want adjust the stack pointer to.  */
+  frame->stack_realign_allocate_offset = offset;
+
+  /* The re-aligned stack starts here.  Values before this point are not
+     directly comparable with values below this point.  Use sp_valid_at
+     to determine if the stack pointer is valid for a given offset and
+     fp_valid_at for the frame pointer.  */
+  if (stack_realign_fp)
+    offset = ROUND_UP (offset, stack_alignment_needed);
+  frame->stack_realign_offset = offset;
+
   /* Align and set SSE register save area.  */
   if (frame->nsseregs)
     {
       /* The only ABI that has saved SSE registers (Win64) also has a
-	 16-byte aligned default stack, and thus we don't need to be
-	 within the re-aligned local stack frame to save them.  In case
-	 incoming stack boundary is aligned to less than 16 bytes,
-	 unaligned move of SSE register will be emitted, so there is
-	 no point to round up the SSE register save area outside the
-	 re-aligned local stack frame to 16 bytes.  */
-      if (ix86_incoming_stack_boundary >= 128)
+	 16-byte aligned default stack.  However, many programs violate
+	 the ABI, and Wine64 forces stack realignment to compensate.
+
+	 If the incoming stack boundary is at least 16 bytes, or DRAP is
+	 required and the DRAP re-alignment boundary is at least 16 bytes,
+	 then we want the SSE register save area properly aligned.  */
+      if (ix86_incoming_stack_boundary >= 128
+	       || (stack_realign_drap && stack_alignment_needed >= 16))
 	offset = ROUND_UP (offset, 16);
       offset += frame->nsseregs * 16;
+      frame->stack_realign_allocate_offset = offset;
     }
-  frame->sse_reg_save_offset = offset;
 
-  /* The re-aligned stack starts here.  Values before this point are not
-     directly comparable with values below this point.  In order to make
-     sure that no value happens to be the same before and after, force
-     the alignment computation below to add a non-zero value.  */
-  if (stack_realign_fp)
-    offset = ROUND_UP (offset, stack_alignment_needed);
+  frame->sse_reg_save_offset = offset;
 
   /* Va-arg area */
   frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
-- 
2.11.0

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCH 02/12] [i386] Keep stack pointer valid after after re-alignment.
  2017-04-27  8:05 [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
                   ` (4 preceding siblings ...)
  2017-04-27  8:05 ` [PATCH 05/12] [i386] Add option -mcall-ms2sysv-xlogues Daniel Santos
@ 2017-04-27  8:05 ` Daniel Santos
  2017-04-27  8:05 ` [PATCH 10/12] [i386] Add ms2sysv pro/epilogue stubs to libgcc Daniel Santos
                   ` (8 subsequent siblings)
  14 siblings, 0 replies; 41+ messages in thread
From: Daniel Santos @ 2017-04-27  8:05 UTC (permalink / raw)
  To: gcc-patches, Uros Bizjak, Jan Hubicka

Add the fields sp_realigned and sp_realigned_offset to struct
machine_frame_state.  We now have the concept of the stack pointer being
re-aligned rather than invalid.  The inline functions sp_valid_at and
fp_valid_at are added to test if a given location relative to the CFA
can be accessed with the stack or frame pointer, respectively.

Stack allocation prior to re-alignment is modified so that we allocate
what is needed, but don't allocate unneeded space in the event that no
SSE registers are saved, but frame.sse_reg_save_offset is increased for
alignment.

As this change only alters how SSE registers are saved, moving the
re-alignment AND should not hinder parallelization of int register saves.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
 gcc/config/i386/i386.c | 74 +++++++++++++++++++++++++++++++++++++-------------
 gcc/config/i386/i386.h | 11 ++++++++
 2 files changed, 66 insertions(+), 19 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 31f69c92968..7923486157d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12783,6 +12783,24 @@ choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
   return len;
 }
 
+/* Determine if the stack pointer is valid for accessing the cfa_offset.  */
+
+static inline bool sp_valid_at (HOST_WIDE_INT cfa_offset)
+{
+  const struct machine_frame_state &fs = cfun->machine->fs;
+  return fs.sp_valid && !(fs.sp_realigned
+			  && cfa_offset < fs.sp_realigned_offset);
+}
+
+/* Determine if the frame pointer is valid for accessing the cfa_offset.  */
+
+static inline bool fp_valid_at (HOST_WIDE_INT cfa_offset)
+{
+  const struct machine_frame_state &fs = cfun->machine->fs;
+  return fs.fp_valid && !(fs.sp_valid && fs.sp_realigned
+			  && cfa_offset >= fs.sp_realigned_offset);
+}
+
 /* Return an RTX that points to CFA_OFFSET within the stack frame.
    The valid base registers are taken from CFUN->MACHINE->FS.  */
 
@@ -13081,15 +13099,18 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
     {
       HOST_WIDE_INT ooffset = m->fs.sp_offset;
       bool valid = m->fs.sp_valid;
+      bool realigned = m->fs.sp_realigned;
 
       if (src == hard_frame_pointer_rtx)
 	{
 	  valid = m->fs.fp_valid;
+	  realigned = false;
 	  ooffset = m->fs.fp_offset;
 	}
       else if (src == crtl->drap_reg)
 	{
 	  valid = m->fs.drap_valid;
+	  realigned = false;
 	  ooffset = 0;
 	}
       else
@@ -13103,6 +13124,7 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
 
       m->fs.sp_offset = ooffset - INTVAL (offset);
       m->fs.sp_valid = valid;
+      m->fs.sp_realigned = realigned;
     }
 }
 
@@ -13852,6 +13874,7 @@ ix86_expand_prologue (void)
      this is fudged; we're interested to offsets within the local frame.  */
   m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
   m->fs.sp_valid = true;
+  m->fs.sp_realigned = false;
 
   ix86_compute_frame_layout (&frame);
 
@@ -14068,11 +14091,10 @@ ix86_expand_prologue (void)
 	 that we must allocate the size of the register save area before
 	 performing the actual alignment.  Otherwise we cannot guarantee
 	 that there's enough storage above the realignment point.  */
-      if (m->fs.sp_offset != frame.sse_reg_save_offset)
+      allocate = frame.stack_realign_allocate_offset - m->fs.sp_offset;
+      if (allocate)
         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
-				   GEN_INT (m->fs.sp_offset
-					    - frame.sse_reg_save_offset),
-				   -1, false);
+				   GEN_INT (-allocate), -1, false);
 
       /* Align the stack.  */
       insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
@@ -14080,11 +14102,19 @@ ix86_expand_prologue (void)
 					GEN_INT (-align_bytes)));
 
       /* For the purposes of register save area addressing, the stack
-         pointer is no longer valid.  As for the value of sp_offset,
-	 see ix86_compute_frame_layout, which we need to match in order
-	 to pass verification of stack_pointer_offset at the end.  */
+	 pointer can no longer be used to access anything in the frame
+	 below m->fs.sp_realigned_offset and the frame pointer cannot be
+	 used for anything at or above.  */
       m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
-      m->fs.sp_valid = false;
+      m->fs.sp_realigned = true;
+      m->fs.sp_realigned_offset = m->fs.sp_offset - frame.nsseregs * 16;
+      gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
+      /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
+	 is needed to describe where a register is saved using a realigned
+	 stack pointer, so we need to invalidate the stack pointer for that
+	 target.  */
+      if (TARGET_SEH)
+	m->fs.sp_valid = false;
     }
 
   allocate = frame.stack_pointer_offset - m->fs.sp_offset;
@@ -14423,6 +14453,7 @@ ix86_emit_leave (void)
 
   gcc_assert (m->fs.fp_valid);
   m->fs.sp_valid = true;
+  m->fs.sp_realigned = false;
   m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
   m->fs.fp_valid = false;
 
@@ -14523,9 +14554,10 @@ ix86_expand_epilogue (int style)
   ix86_finalize_stack_realign_flags ();
   ix86_compute_frame_layout (&frame);
 
-  m->fs.sp_valid = (!frame_pointer_needed
-		    || (crtl->sp_is_unchanging
-			&& !stack_realign_fp));
+  m->fs.sp_realigned = stack_realign_fp;
+  m->fs.sp_valid = stack_realign_fp
+		   || !frame_pointer_needed
+		   || crtl->sp_is_unchanging;
   gcc_assert (!m->fs.sp_valid
 	      || m->fs.sp_offset == frame.stack_pointer_offset);
 
@@ -14575,10 +14607,10 @@ ix86_expand_epilogue (int style)
   /* SEH requires the use of pops to identify the epilogue.  */
   else if (TARGET_SEH)
     restore_regs_via_mov = false;
-  /* If we're only restoring one register and sp is not valid then
+  /* If we're only restoring one register and sp cannot be used then
      using a move instruction to restore the register since it's
      less work than reloading sp and popping the register.  */
-  else if (!m->fs.sp_valid && frame.nregs <= 1)
+  else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
     restore_regs_via_mov = true;
   else if (TARGET_EPILOGUE_USING_MOVE
 	   && cfun->machine->use_fast_prologue_epilogue
@@ -14603,7 +14635,7 @@ ix86_expand_epilogue (int style)
 	 the stack pointer, if we will restore via sp.  */
       if (TARGET_64BIT
 	  && m->fs.sp_offset > 0x7fffffff
-	  && !(m->fs.fp_valid || m->fs.drap_valid)
+	  && !(fp_valid_at (frame.stack_realign_offset) || m->fs.drap_valid)
 	  && (frame.nsseregs + frame.nregs) != 0)
 	{
 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
@@ -14689,6 +14721,7 @@ ix86_expand_epilogue (int style)
 	    }
 	  m->fs.sp_offset = UNITS_PER_WORD;
 	  m->fs.sp_valid = true;
+	  m->fs.sp_realigned = false;
 	}
     }
   else
@@ -14710,10 +14743,11 @@ ix86_expand_epilogue (int style)
 	}
 
       /* First step is to deallocate the stack frame so that we can
-	 pop the registers.  Also do it on SEH target for very large
-	 frame as the emitted instructions aren't allowed by the ABI in
-	 epilogues.  */
-      if (!m->fs.sp_valid
+	 pop the registers.  If the stack pointer was realigned, it needs
+	 to be restored now.  Also do it on SEH target for very large
+	 frame as the emitted instructions aren't allowed by the ABI
+	 in epilogues.  */
+      if (!m->fs.sp_valid || m->fs.sp_realigned
  	  || (TARGET_SEH
 	      && (m->fs.sp_offset - frame.reg_save_offset
 		  >= SEH_MAX_FRAME_SIZE)))
@@ -14741,7 +14775,8 @@ ix86_expand_epilogue (int style)
     {
       /* If the stack pointer is valid and pointing at the frame
 	 pointer store address, then we only need a pop.  */
-      if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
+      if (sp_valid_at (frame.hfp_save_offset)
+	  && m->fs.sp_offset == frame.hfp_save_offset)
 	ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
       /* Leave results in shorter dependency chains on CPUs that are
 	 able to grok it fast.  */
@@ -14795,6 +14830,7 @@ ix86_expand_epilogue (int style)
      be possible to merge the local stack deallocation with the
      deallocation forced by ix86_static_chain_on_stack.   */
   gcc_assert (m->fs.sp_valid);
+  gcc_assert (!m->fs.sp_realigned);
   gcc_assert (!m->fs.fp_valid);
   gcc_assert (!m->fs.realigned);
   if (m->fs.sp_offset != UNITS_PER_WORD)
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 9e5f4d857d9..4e4cb7ca7e3 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2482,6 +2482,17 @@ struct GTY(()) machine_frame_state
      set, the SP/FP offsets above are relative to the aligned frame
      and not the CFA.  */
   BOOL_BITFIELD realigned : 1;
+
+  /* Indicates whether the stack pointer has been re-aligned.  When set,
+     SP/FP continue to be relative to the CFA, but the stack pointer
+     should only be used for offsets >= sp_realigned_offset, while
+     the frame pointer should be used for offsets < sp_realigned_offset.
+     The flags realigned and sp_realigned are mutually exclusive.  */
+  BOOL_BITFIELD sp_realigned : 1;
+
+  /* If sp_realigned is set, this is the offset from the CFA that the
+     stack pointer was realigned to.  */
+  HOST_WIDE_INT sp_realigned_offset;
 };
 
 /* Private to winnt.c.  */
-- 
2.11.0

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
@ 2017-04-27  8:05 Daniel Santos
  2017-04-27  8:05 ` [PATCH 12/12] [i386,testsuite] Test program for ms to sysv abi function calls Daniel Santos
                   ` (14 more replies)
  0 siblings, 15 replies; 41+ messages in thread
From: Daniel Santos @ 2017-04-27  8:05 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak, Jan Hubicka

All of patches are concerned with 64-bit Microsoft ABI functions that 
call System V ABI function which clobbers RSI, RDI and XMM6-15 and are 
aimed at improving performance and .text size of Wine 64. I had 
previously submitted these as separate patch sets, but have combined 
them for simplicity. (Does this make the ChangeLogs too big? Please let 
me know if you want me to break these back apart.) Below are the 
included patchsets and a summary of changes since the previous post(s):

1.) PR78962 Use aligned SSE movs for re-aligned MS ABI pro/epilogues. 
https://gcc.gnu.org/ml/gcc-patches/2016-12/msg01859.html

Changes:

  * The SEH unwind emit code (in winnt.c) does not currently support
    CFA_REG_EXPRESSION, which is required to make this work, so I have
    disabled it on SEH targets.
  * Updated comments on CFA_REG_EXPRESSION in winnt.c.


2.) Add option to call out-of-line stubs instead of emitting inline 
saves and restores. https://gcc.gnu.org/ml/gcc-patches/2017-02/msg00548.html

Changes:

  * Renamed option from -moutline-msabi-xlogues to -mcall-ms2sysv-xlogues
  * Since this patch set depends upon aligned SSE MOVs after stack
    realignment, I have disabled it on SEH targets with a sorry().
  * I was previously trying to cache the rtx for symbols to the libgcc
    stubs instead of creating new ones, but this caused problems in
    subsequent passes and it was disabled with a "TODO" comment. I have
    removed this code, as well as the rtx cache that was just wasting
    memory in class xlogue_layout.
  * Improved comment documentation.


3.) A comprehensive test program to validate correct behavior in these 
pro- and epilogues. https://gcc.gnu.org/ml/gcc-patches/2017-02/msg00542.html

Changes:

  * The previous version repeated all tests for each -j<jobs> instead of
    running in parallel. I have fixed this implementing a primitive but
    effective file-based parallelization scheme.
  * I noticed that there was gcc/testsuite/gcc.target/x86_64/abi
    directory for tests specific to testing 64-bit abi issues, so I've
    moved my tests to an "ms-sysv" subdirectory of that (instead of
    gcc/testsuite/gcc.target/i386/msabi).
  * Fixed breakages on Cygwin.
  * Corrected a bad "_noinfo" optimization barrier (function call by
    volatile pointer).
  * Minor cleanup/improvements.


  gcc/Makefile.in                                    |   2 +
  gcc/config/i386/i386.c                             | 916 +++++++++++++++++++--
  gcc/config/i386/i386.h                             |  33 +-
  gcc/config/i386/i386.opt                           |   4 +
  gcc/config/i386/predicates.md                      | 155 ++++
  gcc/config/i386/sse.md                             |  37 +
  gcc/config/i386/winnt.c                            |   3 +-
  gcc/doc/invoke.texi                                |  13 +-
  .../gcc.target/x86_64/abi/ms-sysv/do-test.S        | 163 ++++
  gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/gen.cc | 807 ++++++++++++++++++
  .../gcc.target/x86_64/abi/ms-sysv/ms-sysv.c        | 373 +++++++++
  .../gcc.target/x86_64/abi/ms-sysv/ms-sysv.exp      | 178 ++++
  libgcc/config.host                                 |   2 +-
  libgcc/config/i386/i386-asm.h                      |  82 ++
  libgcc/config/i386/resms64.S                       |  57 ++
  libgcc/config/i386/resms64f.S                      |  55 ++
  libgcc/config/i386/resms64fx.S                     |  57 ++
  libgcc/config/i386/resms64x.S                      |  59 ++
  libgcc/config/i386/savms64.S                       |  57 ++
  libgcc/config/i386/savms64f.S                      |  55 ++
  libgcc/config/i386/t-msabi                         |   7 +
  21 files changed, 3020 insertions(+), 95 deletions(-)	


gcc/ChangeLog:

2017-04-25  Daniel Santos<daniel.santos@pobox.com>

	* config/i386/i386.opt: Add option -mcall-ms2sysv-xlogues.
	* config/i386/i386.h
	(x86_64_ms_sysv_extra_clobbered_registers): Change type to unsigned.
	(NUM_X86_64_MS_CLOBBERED_REGS): New macro.
	(struct machine_function): Add new members call_ms2sysv,
	call_ms2sysv_pad_in, call_ms2sysv_pad_out and call_ms2sysv_extra_regs.
	(struct machine_frame_state): New fields sp_realigned and
	sp_realigned_offset.
	* config/i386/i386.c
	(enum xlogue_stub): New enum.
	(enum xlogue_stub_sets): New enum.
	(class xlogue_layout): New class.
	(struct ix86_frame): New fields stack_realign_allocate_offset,
	stack_realign_offset and outlined_save_offset.  Modify comments to
	detail stack layout when using out-of-line stubs.
	(ix86_target_string): Add -mcall-ms2sysv-xlogues option.
	(ix86_option_override_internal): Add sorry() for TARGET_SEH and
	-mcall-ms2sysv-xlogues.
	(stub_managed_regs): New static variable.
	(ix86_save_reg): Add new parameter ignore_outlined to optionally omit
	registers managed by out-of-line stub.
	(disable_call_ms2sysv_xlogues): New function.
	(ix86_compute_frame_layout): Modify re-alignment calculations, disable
	m->call_ms2sysv when appropriate and compute frame layout for
	out-of-line stubs.
	(sp_valid_at, fp_valid_at): New inline functions.
	(choose_basereg): New function.
	(choose_baseaddr): Add align parameter, use choose_basereg and modify
	all callers.
	(ix86_emit_save_reg_using_mov, ix86_emit_restore_sse_regs_using_mov):
	Use align parameter of choose_baseaddr to generated aligned SSE movs
	when possible.
	(pro_epilogue_adjust_stack): Modify to track
	machine_frame_state::sp_realigned.
	(ix86_nsaved_regs): Modify to accommodate changes to ix86_save_reg.
	(ix86_nsaved_sseregs): Likewise.
	(ix86_emit_save_regs): Likewise.
	(ix86_emit_save_regs_using_mov): Likewise.
	(ix86_emit_save_sse_regs_using_mov): Likewise.
	(get_scratch_register_on_entry): Likewise.
	(gen_frame_set): New function.
	(gen_frame_load): Likewise.
	(gen_frame_store): Likewise.
	(emit_outlined_ms2sysv_save): Likewise.
	(emit_outlined_ms2sysv_restore): Likewise.
	(ix86_expand_prologue): Modify stack re-alignment code and call
	emit_outlined_ms2sysv_save when appropriate.
	(ix86_emit_leave): Clear machine_frame_state::sp_realigned.  Add
	parameter rtx_insn *insn, which allows the function to be used to only
	generate the notes.
	(ix86_expand_epilogue): Modify validity checks of frame and stack
	pointers, and call emit_outlined_ms2sysv_restore when appropriate.
	(ix86_expand_call): Modify to enable m->call_ms2sysv when appropriate.
	* config/i386/predicates.md
	(save_multiple): New predicate.
	(restore_multiple): Likewise.
	* config/i386/sse.md
	(save_multiple<mode>): New pattern.
	(save_multiple_realign<mode>): Likewise.
	(restore_multiple<mode>): Likewise.
	(restore_multiple_and_return<mode>): Likewise.
	(restore_multiple_leave_return<mode>): Likewise.
	* Makefile.in: Export HOSTCXX and HOSTCXXFLAGS to site.exp

gcc/testsuite/ChangeLog:

2017-04-25  Daniel Santos<daniel.santos@pobox.com>

	* config.host: Add i386/t-msabi to i386/t-linux file list.
	* config/i386/i386-asm.h: New file.
	* config/i386/resms64.S: New file.
	* config/i386/resms64f.S: New file.
	* config/i386/resms64fx.S: New file.
	* config/i386/resms64x.S: New file.
	* config/i386/savms64.S: New file.
	* config/i386/savms64f.S: New file.
	* config/i386/t-msabi: New file.

libgcc/ChangeLog:

2017-04-25  Daniel Santos<daniel.santos@pobox.com>

	* gcc.target/x86_64/abi/ms-sysv/do-test.S: New file.
	* gcc.target/x86_64/abi/ms-sysv/gen.cc: Likewise.
	* gcc.target/x86_64/abi/ms-sysv/ms-sysv.c: Likewise.
	* gcc.target/x86_64/abi/ms-sysv/ms-sysv.exp: Likewise.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCH 08/12] [i386] Modify ix86_compute_frame_layout for -mcall-ms2sysv-xlogues
  2017-04-27  8:05 [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
                   ` (6 preceding siblings ...)
  2017-04-27  8:05 ` [PATCH 10/12] [i386] Add ms2sysv pro/epilogue stubs to libgcc Daniel Santos
@ 2017-04-27  8:05 ` Daniel Santos
  2017-04-27  8:05 ` [PATCH 01/12] [i386] Re-align stack frame prior to SSE saves Daniel Santos
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 41+ messages in thread
From: Daniel Santos @ 2017-04-27  8:05 UTC (permalink / raw)
  To: gcc-patches, Uros Bizjak, Jan Hubicka

ix86_compute_frame_layout will now populate fields added to structs
machine_function and ix86_frame and modify the frame layout specifics to
facilitate the use of save & restore stubs.  This is also where we init
stub_managed_regs to track which register saves & restores are being
managed by the out-of-line stub and which are being managed inline, as
it is possible to have registers being managed both inline and
out-of-line when inline asm explicitly clobbers a register.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
 gcc/config/i386/i386.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 90 insertions(+), 4 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 4f0cb7dd6cc..debfe457d97 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2715,12 +2715,29 @@ struct GTY(()) stack_local_entry {
    saved frame pointer			if frame_pointer_needed
 					<- HARD_FRAME_POINTER
    [saved regs]
-					<- regs_save_offset
+					<- reg_save_offset
    [padding0]
 					<- stack_realign_offset
    [saved SSE regs]
+	OR
+   [stub-saved registers for ms x64 --> sysv clobbers
+			<- Start of out-of-line, stub-saved/restored regs
+			   (see libgcc/config/i386/(sav|res)ms64*.S)
+     [XMM6-15]
+     [RSI]
+     [RDI]
+     [?RBX]		only if RBX is clobbered
+     [?RBP]		only if RBP and RBX are clobbered
+     [?R12]		only if R12 and all previous regs are clobbered
+     [?R13]		only if R13 and all previous regs are clobbered
+     [?R14]		only if R14 and all previous regs are clobbered
+     [?R15]		only if R15 and all previous regs are clobbered
+			<- end of stub-saved/restored regs
+     [padding1]
+   ]
+					<- outlined_save_offset
 					<- sse_regs_save_offset
-   [padding1]          |
+   [padding2]
 		       |		<- FRAME_POINTER
    [va_arg registers]  |
 		       |
@@ -2745,6 +2762,7 @@ struct ix86_frame
   HOST_WIDE_INT reg_save_offset;
   HOST_WIDE_INT stack_realign_allocate_offset;
   HOST_WIDE_INT stack_realign_offset;
+  HOST_WIDE_INT outlined_save_offset;
   HOST_WIDE_INT sse_reg_save_offset;
 
   /* When save_regs_using_mov is set, emit prologue using
@@ -12802,6 +12820,15 @@ ix86_builtin_setjmp_frame_value (void)
   return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
 }
 
+/* Disables out-of-lined msabi to sysv pro/epilogues and emits a warning if
+   warn_once is null, or *warn_once is zero.  */
+static void disable_call_ms2sysv_xlogues (const char *feature)
+{
+  cfun->machine->call_ms2sysv = false;
+  warning (OPT_mcall_ms2sysv_xlogues, "not currently compatible with %s.",
+	   feature);
+}
+
 /* When using -fsplit-stack, the allocation routines set a field in
    the TCB to the bottom of the stack plus this much space, measured
    in bytes.  */
@@ -12820,9 +12847,50 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
   HOST_WIDE_INT size = get_frame_size ();
   HOST_WIDE_INT to_allocate;
 
+  CLEAR_HARD_REG_SET (stub_managed_regs);
+
+  /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
+   * ms_abi functions that call a sysv function.  We now need to prune away
+   * cases where it should be disabled.  */
+  if (TARGET_64BIT && m->call_ms2sysv)
+  {
+    gcc_assert (TARGET_64BIT_MS_ABI);
+    gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
+    gcc_assert (!TARGET_SEH);
+
+    if (!TARGET_SSE)
+      m->call_ms2sysv = false;
+
+    /* Don't break hot-patched functions.  */
+    else if (ix86_function_ms_hook_prologue (current_function_decl))
+      m->call_ms2sysv = false;
+
+    /* TODO: Cases not yet examined.  */
+    else if (crtl->calls_eh_return)
+      disable_call_ms2sysv_xlogues ("__builtin_eh_return");
+
+    else if (ix86_static_chain_on_stack)
+      disable_call_ms2sysv_xlogues ("static call chains");
+
+    else if (ix86_using_red_zone ())
+      disable_call_ms2sysv_xlogues ("red zones");
+
+    else if (flag_split_stack)
+      disable_call_ms2sysv_xlogues ("split stack");
+
+    /* Finally, compute which registers the stub will manage.  */
+    else
+      {
+	unsigned count = xlogue_layout
+			 ::compute_stub_managed_regs (stub_managed_regs);
+	m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
+      }
+  }
+
   frame->nregs = ix86_nsaved_regs ();
   frame->nsseregs = ix86_nsaved_sseregs ();
-  CLEAR_HARD_REG_SET (stub_managed_regs);
+  m->call_ms2sysv_pad_in = 0;
+  m->call_ms2sysv_pad_out = 0;
 
   /* 64-bit MS ABI seem to require stack alignment to be always 16,
      except for function prologues, leaf functions and when the defult
@@ -12926,8 +12994,26 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
     offset = ROUND_UP (offset, stack_alignment_needed);
   frame->stack_realign_offset = offset;
 
+  if (TARGET_64BIT && m->call_ms2sysv)
+    {
+      gcc_assert (stack_alignment_needed >= 16);
+      gcc_assert (!frame->nsseregs);
+
+      m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
+
+      /* Select an appropriate layout for incoming stack offset.  */
+      const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
+
+      if ((offset + xlogue.get_stack_space_used ()) & UNITS_PER_WORD)
+	m->call_ms2sysv_pad_out = 1;
+
+      offset += xlogue.get_stack_space_used ();
+      gcc_assert (!(offset & 0xf));
+      frame->outlined_save_offset = offset;
+    }
+
   /* Align and set SSE register save area.  */
-  if (frame->nsseregs)
+  else if (frame->nsseregs)
     {
       /* The only ABI that has saved SSE registers (Win64) also has a
 	 16-byte aligned default stack.  However, many programs violate
-- 
2.11.0

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCH 09/12] [i386] Add patterns and predicates foutline-msabi-xlouges
  2017-04-27  8:05 [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
  2017-04-27  8:05 ` [PATCH 12/12] [i386,testsuite] Test program for ms to sysv abi function calls Daniel Santos
  2017-04-27  8:05 ` [PATCH 11/12] [i386] Add remainder of -mcall-ms2sysv-xlogues implementation Daniel Santos
@ 2017-04-27  8:05 ` Daniel Santos
  2017-05-01 11:18   ` Uros Bizjak
  2017-05-04 21:35   ` [PATCH 09/12 rev1] [i386] Add patterns and predicates mcall-ms2sysv-xlogues Daniel Santos
  2017-04-27  8:05 ` [PATCH 03/12] [i386] Use re-aligned stack pointer for aligned SSE movs Daniel Santos
                   ` (11 subsequent siblings)
  14 siblings, 2 replies; 41+ messages in thread
From: Daniel Santos @ 2017-04-27  8:05 UTC (permalink / raw)
  To: gcc-patches, Uros Bizjak, Jan Hubicka

Adds the predicates save_multiple and restore_multiple to predicates.md,
which are used by following patterns in sse.md:

* save_multiple - insn that calls a save stub
* restore_multiple - call_insn that calls a save stub and returns to the
  function to allow a sibling call (which should typically offer better
  optimization than the restore stub as the tail call)
* restore_multiple_and_return - a jump_insn that returns from the
  function as a tail-call.
* restore_multiple_leave_return - like the above, but restores the frame
  pointer before returning.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
 gcc/config/i386/predicates.md | 155 ++++++++++++++++++++++++++++++++++++++++++
 gcc/config/i386/sse.md        |  37 ++++++++++
 2 files changed, 192 insertions(+)

diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 8f250a2e720..36fe8abc3f4 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1657,3 +1657,158 @@
   (ior (match_operand 0 "register_operand")
        (and (match_code "const_int")
 	    (match_test "op == constm1_rtx"))))
+
+;; Return true if:
+;; 1. first op is a symbol reference,
+;; 2. >= 13 operands, and
+;; 3. operands 2 to end is one of:
+;;   a. save a register to a memory location, or
+;;   b. restore stack pointer.
+(define_predicate "save_multiple"
+  (match_code "parallel")
+{
+  const unsigned nregs = XVECLEN (op, 0);
+  rtx head = XVECEXP (op, 0, 0);
+  unsigned i;
+
+  if (GET_CODE (head) != USE)
+    return false;
+  else
+    {
+      rtx op0 = XEXP (head, 0);
+      if (op0 == NULL_RTX || GET_CODE (op0) != SYMBOL_REF)
+	return false;
+    }
+
+  if (nregs < 13)
+    return false;
+
+  for (i = 2; i < nregs; i++)
+    {
+      rtx e, src, dest;
+
+      e = XVECEXP (op, 0, i);
+
+      switch (GET_CODE (e))
+	{
+	  case SET:
+	    src  = SET_SRC (e);
+	    dest = SET_DEST (e);
+
+	    /* storing a register to memory.  */
+	    if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
+	      {
+		rtx addr = XEXP (dest, 0);
+
+		/* Good if dest address is in RAX.  */
+		if (GET_CODE (addr) == REG
+		    && REGNO (addr) == AX_REG)
+		  continue;
+
+		/* Good if dest address is offset of RAX.  */
+		if (GET_CODE (addr) == PLUS
+		    && GET_CODE (XEXP (addr, 0)) == REG
+		    && REGNO (XEXP (addr, 0)) == AX_REG)
+		  continue;
+	      }
+	    break;
+
+	  default:
+	    break;
+	}
+	return false;
+    }
+  return true;
+})
+
+;; Return true if:
+;; * first op is (return) or a a use (symbol reference),
+;; * >= 14 operands, and
+;; * operands 2 to end are one of:
+;;   - restoring a register from a memory location that's an offset of RSI.
+;;   - clobbering a reg
+;;   - adjusting SP
+(define_predicate "restore_multiple"
+  (match_code "parallel")
+{
+  const unsigned nregs = XVECLEN (op, 0);
+  rtx head = XVECEXP (op, 0, 0);
+  unsigned i;
+
+  switch (GET_CODE (head))
+    {
+      case RETURN:
+	i = 3;
+	break;
+
+      case USE:
+      {
+	rtx op0 = XEXP (head, 0);
+
+	if (op0 == NULL_RTX || GET_CODE (op0) != SYMBOL_REF)
+	  return false;
+
+	i = 1;
+	break;
+      }
+
+      default:
+	return false;
+    }
+
+  if (nregs < i + 12)
+    return false;
+
+  for (; i < nregs; i++)
+    {
+      rtx e, src, dest;
+
+      e = XVECEXP (op, 0, i);
+
+      switch (GET_CODE (e))
+	{
+	  case CLOBBER:
+	    continue;
+
+	  case SET:
+	    src  = SET_SRC (e);
+	    dest = SET_DEST (e);
+
+	    /* Restoring a register from memory.  */
+	    if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
+	      {
+		rtx addr = XEXP (src, 0);
+
+		/* Good if src address is in RSI.  */
+		if (GET_CODE (addr) == REG
+		    && REGNO (addr) == SI_REG)
+		  continue;
+
+		/* Good if src address is offset of RSI.  */
+		if (GET_CODE (addr) == PLUS
+		    && GET_CODE (XEXP (addr, 0)) == REG
+		    && REGNO (XEXP (addr, 0)) == SI_REG)
+		  continue;
+
+		/* Good if adjusting stack pointer.  */
+		if (GET_CODE (dest) == REG
+		    && REGNO (dest) == SP_REG
+		    && GET_CODE (src) == PLUS
+		    && GET_CODE (XEXP (src, 0)) == REG
+		    && REGNO (XEXP (src, 0)) == SP_REG)
+		  continue;
+	      }
+
+	    /* Restoring stack pointer from another register.  */
+	    if (GET_CODE (dest) == REG && REGNO (dest) == SP_REG
+		&& GET_CODE (src) == REG)
+	      continue;
+	    break;
+
+	  default:
+	    break;
+	}
+	return false;
+    }
+  return true;
+})
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e8ccb1e10c3..c9fe7274def 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -19997,3 +19997,40 @@
           (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512VPOPCNTDQ"
   "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
+
+;; Save multiple registers out-of-line.
+(define_insn "save_multiple<mode>"
+  [(match_parallel 0 "save_multiple"
+    [(use (match_operand:P 1 "symbol_operand"))
+     (const_int 0)
+    ])]
+  "TARGET_SSE && TARGET_64BIT"
+  "call\t%P1")
+
+;; Restore multiple registers out-of-line.
+(define_insn "restore_multiple<mode>"
+  [(match_parallel 0 "restore_multiple"
+    [(use (match_operand:P 1 "symbol_operand"))])]
+  "TARGET_SSE && TARGET_64BIT"
+  "call\t%P1")
+
+;; Restore multiple registers out-of-line and return.
+(define_insn "restore_multiple_and_return<mode>"
+  [(match_parallel 0 "restore_multiple"
+    [(return)
+     (use (match_operand:P 1 "symbol_operand"))
+     (const_int 0)
+    ])]
+  "TARGET_SSE && TARGET_64BIT"
+  "jmp\t%P1")
+
+;; Restore multiple registers out-of-line when hard frame pointer is used,
+;; perform the leave operation prior to returning (from the function).
+(define_insn "restore_multiple_leave_return<mode>"
+  [(match_parallel 0 "restore_multiple"
+    [(return)
+     (use (match_operand:P 1 "symbol_operand"))
+     (const_int 1)
+    ])]
+  "TARGET_SSE && TARGET_64BIT"
+  "jmp\t%P1")
-- 
2.11.0

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCH 03/12] [i386] Use re-aligned stack pointer for aligned SSE movs
  2017-04-27  8:05 [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
                   ` (2 preceding siblings ...)
  2017-04-27  8:05 ` [PATCH 09/12] [i386] Add patterns and predicates foutline-msabi-xlouges Daniel Santos
@ 2017-04-27  8:05 ` Daniel Santos
  2017-04-27  8:05 ` [PATCH 05/12] [i386] Add option -mcall-ms2sysv-xlogues Daniel Santos
                   ` (10 subsequent siblings)
  14 siblings, 0 replies; 41+ messages in thread
From: Daniel Santos @ 2017-04-27  8:05 UTC (permalink / raw)
  To: gcc-patches, Uros Bizjak, Jan Hubicka

Add an optional `align' parameter to choose_baseaddr, allowing the
caller to request an address that is aligned to some boundary.  Modify
ix86_emit_save_regs_using_mov and ix86_emit_restore_regs_using_mov use
optimally aligned memory when such a base register is available.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
 gcc/config/i386/i386.c  | 111 ++++++++++++++++++++++++++++++++++++++----------
 gcc/config/i386/winnt.c |   3 +-
 2 files changed, 90 insertions(+), 24 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7923486157d..e8a4ba6fe8d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12801,15 +12801,39 @@ static inline bool fp_valid_at (HOST_WIDE_INT cfa_offset)
 			  && cfa_offset >= fs.sp_realigned_offset);
 }
 
-/* Return an RTX that points to CFA_OFFSET within the stack frame.
-   The valid base registers are taken from CFUN->MACHINE->FS.  */
+/* Choose a base register based upon alignment requested, speed and/or
+   size.  */
 
-static rtx
-choose_baseaddr (HOST_WIDE_INT cfa_offset)
+static void choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
+			    HOST_WIDE_INT &base_offset,
+			    unsigned int align_reqested, unsigned int *align)
 {
   const struct machine_function *m = cfun->machine;
-  rtx base_reg = NULL;
-  HOST_WIDE_INT base_offset = 0;
+  unsigned int hfp_align;
+  unsigned int drap_align;
+  unsigned int sp_align;
+  bool hfp_ok  = fp_valid_at (cfa_offset);
+  bool drap_ok = m->fs.drap_valid;
+  bool sp_ok   = sp_valid_at (cfa_offset);
+
+  hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
+
+  /* Filter out any registers that don't meet the requested alignment
+     criteria.  */
+  if (align_reqested)
+    {
+      if (m->fs.realigned)
+	hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
+      /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
+	 notes (which we would need to use a realigned stack pointer),
+	 so disable on SEH targets.  */
+      else if (m->fs.sp_realigned)
+	sp_align = crtl->stack_alignment_needed;
+
+      hfp_ok = hfp_ok && hfp_align >= align_reqested;
+      drap_ok = drap_ok && drap_align >= align_reqested;
+      sp_ok = sp_ok && sp_align >= align_reqested;
+    }
 
   if (m->use_fast_prologue_epilogue)
     {
@@ -12818,17 +12842,17 @@ choose_baseaddr (HOST_WIDE_INT cfa_offset)
          while DRAP must be reloaded within the epilogue.  But choose either
          over the SP due to increased encoding size.  */
 
-      if (m->fs.fp_valid)
+      if (hfp_ok)
 	{
 	  base_reg = hard_frame_pointer_rtx;
 	  base_offset = m->fs.fp_offset - cfa_offset;
 	}
-      else if (m->fs.drap_valid)
+      else if (drap_ok)
 	{
 	  base_reg = crtl->drap_reg;
 	  base_offset = 0 - cfa_offset;
 	}
-      else if (m->fs.sp_valid)
+      else if (sp_ok)
 	{
 	  base_reg = stack_pointer_rtx;
 	  base_offset = m->fs.sp_offset - cfa_offset;
@@ -12841,13 +12865,13 @@ choose_baseaddr (HOST_WIDE_INT cfa_offset)
 
       /* Choose the base register with the smallest address encoding.
          With a tie, choose FP > DRAP > SP.  */
-      if (m->fs.sp_valid)
+      if (sp_ok)
 	{
 	  base_reg = stack_pointer_rtx;
 	  base_offset = m->fs.sp_offset - cfa_offset;
           len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
 	}
-      if (m->fs.drap_valid)
+      if (drap_ok)
 	{
 	  toffset = 0 - cfa_offset;
 	  tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
@@ -12858,7 +12882,7 @@ choose_baseaddr (HOST_WIDE_INT cfa_offset)
 	      len = tlen;
 	    }
 	}
-      if (m->fs.fp_valid)
+      if (hfp_ok)
 	{
 	  toffset = m->fs.fp_offset - cfa_offset;
 	  tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
@@ -12870,8 +12894,40 @@ choose_baseaddr (HOST_WIDE_INT cfa_offset)
 	    }
 	}
     }
-  gcc_assert (base_reg != NULL);
 
+    /* Set the align return value.  */
+    if (align)
+      {
+	if (base_reg == stack_pointer_rtx)
+	  *align = sp_align;
+	else if (base_reg == crtl->drap_reg)
+	  *align = drap_align;
+	else if (base_reg == hard_frame_pointer_rtx)
+	  *align = hfp_align;
+      }
+}
+
+/* Return an RTX that points to CFA_OFFSET within the stack frame and
+   the alignment of address.  If align is non-null, it should point to
+   an alignment value (in bits) that is preferred or zero and will
+   recieve the alignment of the base register that was selected.  The
+   valid base registers are taken from CFUN->MACHINE->FS.  */
+
+static rtx
+choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align)
+{
+  rtx base_reg = NULL;
+  HOST_WIDE_INT base_offset = 0;
+
+  /* If a specific alignment is requested, try to get a base register
+     with that alignment first.  */
+  if (align && *align)
+    choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
+
+  if (!base_reg)
+    choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
+
+  gcc_assert (base_reg != NULL);
   return plus_constant (Pmode, base_reg, base_offset);
 }
 
@@ -12900,13 +12956,14 @@ ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
   struct machine_function *m = cfun->machine;
   rtx reg = gen_rtx_REG (mode, regno);
   rtx mem, addr, base, insn;
-  unsigned int align;
+  unsigned int align = GET_MODE_ALIGNMENT (mode);
 
-  addr = choose_baseaddr (cfa_offset);
+  addr = choose_baseaddr (cfa_offset, &align);
   mem = gen_frame_mem (mode, addr);
 
-  /* The location is aligned up to INCOMING_STACK_BOUNDARY.  */
-  align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY);
+  /* The location aligment depends upon the base register.  */
+  align = MIN (GET_MODE_ALIGNMENT (mode), align);
+  gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
   set_mem_align (mem, align);
 
   insn = emit_insn (gen_rtx_SET (mem, reg));
@@ -12946,6 +13003,13 @@ ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
 	}
     }
 
+  else if (base == stack_pointer_rtx && m->fs.sp_realigned
+	   && cfa_offset >= m->fs.sp_realigned_offset)
+    {
+      gcc_checking_assert (stack_realign_fp);
+      add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
+    }
+
   /* The memory may not be relative to the current CFA register,
      which means that we may need to generate a new pattern for
      use by the unwind info.  */
@@ -14350,7 +14414,7 @@ ix86_expand_prologue (void)
       /* vDRAP is setup but after reload it turns out stack realign
          isn't necessary, here we will emit prologue to setup DRAP
          without stack realign adjustment */
-      t = choose_baseaddr (0);
+      t = choose_baseaddr (0, NULL);
       emit_insn (gen_rtx_SET (crtl->drap_reg, t));
     }
 
@@ -14487,7 +14551,7 @@ ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
 	rtx mem;
 	rtx_insn *insn;
 
-	mem = choose_baseaddr (cfa_offset);
+	mem = choose_baseaddr (cfa_offset, NULL);
 	mem = gen_frame_mem (word_mode, mem);
 	insn = emit_move_insn (reg, mem);
 
@@ -14524,13 +14588,14 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
       {
 	rtx reg = gen_rtx_REG (V4SFmode, regno);
 	rtx mem;
-	unsigned int align;
+	unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
 
-	mem = choose_baseaddr (cfa_offset);
+	mem = choose_baseaddr (cfa_offset, &align);
 	mem = gen_rtx_MEM (V4SFmode, mem);
 
-	/* The location is aligned up to INCOMING_STACK_BOUNDARY.  */
-	align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY);
+	/* The location aligment depends upon the base register.  */
+	align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
+	gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
 	set_mem_align (mem, align);
 	emit_insn (gen_rtx_SET (reg, mem));
 
diff --git a/gcc/config/i386/winnt.c b/gcc/config/i386/winnt.c
index f89e7d00fe2..8272c7fddc1 100644
--- a/gcc/config/i386/winnt.c
+++ b/gcc/config/i386/winnt.c
@@ -1128,7 +1128,8 @@ i386_pe_seh_unwind_emit (FILE *asm_out_file, rtx_insn *insn)
 
 	case REG_CFA_DEF_CFA:
 	case REG_CFA_EXPRESSION:
-	  /* Only emitted with DRAP, which we disable.  */
+	  /* Only emitted with DRAP and aligned memory access using a
+	     realigned SP, both of which we disable.  */
 	  gcc_unreachable ();
 	  break;
 
-- 
2.11.0

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCH 12/12] [i386,testsuite] Test program for ms to sysv abi function calls.
  2017-04-27  8:05 [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
@ 2017-04-27  8:05 ` Daniel Santos
  2017-05-17  9:52   ` Thomas Preudhomme
  2017-04-27  8:05 ` [PATCH 11/12] [i386] Add remainder of -mcall-ms2sysv-xlogues implementation Daniel Santos
                   ` (13 subsequent siblings)
  14 siblings, 1 reply; 41+ messages in thread
From: Daniel Santos @ 2017-04-27  8:05 UTC (permalink / raw)
  To: gcc-patches, Uros Bizjak, Jan Hubicka, Mike Stump

A comprehensive program for testing x86_64 ms_abi functions that call
sysv_abi functions to help validate -mcall-ms2sysv-xlogues and use of
aligned SSE MOVs after a (non-DRAP) realigned stack.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
 gcc/Makefile.in                                    |   2 +
 .../gcc.target/x86_64/abi/ms-sysv/do-test.S        | 163 +++++
 gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/gen.cc | 807 +++++++++++++++++++++
 .../gcc.target/x86_64/abi/ms-sysv/ms-sysv.c        | 373 ++++++++++
 .../gcc.target/x86_64/abi/ms-sysv/ms-sysv.exp      | 178 +++++
 5 files changed, 1523 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/do-test.S
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/gen.cc
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/ms-sysv.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/ms-sysv.exp

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index f675e073ecc..7f7c238127b 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -3807,7 +3807,9 @@ site.exp: ./config.status Makefile
 	@echo "set CFLAGS \"\"" >> ./site.tmp
 	@echo "set CXXFLAGS \"\"" >> ./site.tmp
 	@echo "set HOSTCC \"$(CC)\"" >> ./site.tmp
+	@echo "set HOSTCXX \"$(CXX)\"" >> ./site.tmp
 	@echo "set HOSTCFLAGS \"$(CFLAGS)\"" >> ./site.tmp
+	@echo "set HOSTCXXFLAGS \"$(CXXFLAGS)\"" >> ./site.tmp
 # TEST_ALWAYS_FLAGS are flags that should be passed to every compilation.
 # They are passed first to allow individual tests to override them.
 	@echo "set TEST_ALWAYS_FLAGS \"$(SYSROOT_CFLAGS_FOR_TARGET)\"" >> ./site.tmp
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/do-test.S b/gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/do-test.S
new file mode 100644
index 00000000000..1395235fd1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/do-test.S
@@ -0,0 +1,163 @@
+/* Assembly proxy functions for ms_abi tests.
+   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+   Contributed by Daniel Santos <daniel.santos@pobox.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef __x86_64__
+
+# ifdef __ELF__
+#  define ELFFN_BEGIN(fn)       .type fn,@function
+#  define ELFFN_END(fn)         .size fn,.-fn
+# else
+#  define ELFFN_BEGIN(fn)
+#  define ELFFN_END(fn)
+# endif
+
+# define FUNC(fn)		\
+	.global fn;		\
+	ELFFN_BEGIN(fn);	\
+fn:
+
+#define FUNC_END(fn) ELFFN_END(fn)
+
+# ifdef __AVX__
+#  define MOVAPS vmovaps
+# else
+#  define MOVAPS movaps
+# endif
+
+/* TODO: Is there a cleaner way to provide these offsets?  */
+	.struct 0
+test_data_save:
+
+	.struct test_data_save + 224
+test_data_input:
+
+	.struct test_data_save + 448
+test_data_output:
+
+	.struct test_data_save + 672
+test_data_fn:
+
+	.struct test_data_save + 680
+test_data_retaddr:
+
+	.text
+
+regs_to_mem:
+	MOVAPS	%xmm6, (%rax)
+	MOVAPS	%xmm7, 0x10(%rax)
+	MOVAPS	%xmm8, 0x20(%rax)
+	MOVAPS	%xmm9, 0x30(%rax)
+	MOVAPS	%xmm10, 0x40(%rax)
+	MOVAPS	%xmm11, 0x50(%rax)
+	MOVAPS	%xmm12, 0x60(%rax)
+	MOVAPS	%xmm13, 0x70(%rax)
+	MOVAPS	%xmm14, 0x80(%rax)
+	MOVAPS	%xmm15, 0x90(%rax)
+	mov	%rsi, 0xa0(%rax)
+	mov	%rdi, 0xa8(%rax)
+	mov	%rbx, 0xb0(%rax)
+	mov	%rbp, 0xb8(%rax)
+	mov	%r12, 0xc0(%rax)
+	mov	%r13, 0xc8(%rax)
+	mov	%r14, 0xd0(%rax)
+	mov	%r15, 0xd8(%rax)
+	retq
+
+mem_to_regs:
+	MOVAPS	(%rax), %xmm6
+	MOVAPS	0x10(%rax),%xmm7
+	MOVAPS	0x20(%rax),%xmm8
+	MOVAPS	0x30(%rax),%xmm9
+	MOVAPS	0x40(%rax),%xmm10
+	MOVAPS	0x50(%rax),%xmm11
+	MOVAPS	0x60(%rax),%xmm12
+	MOVAPS	0x70(%rax),%xmm13
+	MOVAPS	0x80(%rax),%xmm14
+	MOVAPS	0x90(%rax),%xmm15
+	mov	0xa0(%rax),%rsi
+	mov	0xa8(%rax),%rdi
+	mov	0xb0(%rax),%rbx
+	mov	0xb8(%rax),%rbp
+	mov	0xc0(%rax),%r12
+	mov	0xc8(%rax),%r13
+	mov	0xd0(%rax),%r14
+	mov	0xd8(%rax),%r15
+	retq
+
+# NOTE: Not MT safe
+FUNC(do_test_unaligned)
+	.cfi_startproc
+	# The below alignment checks are to verify correctness of the test
+	# its self.
+
+	# Verify that incoming stack is aligned + 8
+	pushf
+	test	$0x8, %rsp
+	jne	L0
+	int	$3		# Stack not unaligned
+
+FUNC(do_test_aligned)
+	# Verify that incoming stack is aligned
+	pushf
+	test	$0xf, %rsp
+	je	L0
+	int	$3		# Stack not aligned
+L0:
+	popf
+
+	# Save registers
+	lea	test_data(%rip), %rax
+	call	regs_to_mem
+
+	# Load register with random data
+	lea	test_data + test_data_input(%rip), %rax
+	call	mem_to_regs
+
+	# Save original return address
+	pop	%rax
+	movq    %rax, test_data + test_data_retaddr(%rip)
+
+	# Call the test function
+	call	*test_data + test_data_fn(%rip)
+
+	# Restore the original return address
+	movq    test_data + test_data_retaddr(%rip), %rcx
+	push	%rcx
+
+	# Save test function return value and store resulting register values
+	push	%rax
+	lea	test_data + test_data_output(%rip), %rax
+	call	regs_to_mem
+
+	# Restore registers
+	lea	test_data(%rip), %rax
+	call	mem_to_regs
+	pop	%rax
+	retq
+        .cfi_endproc
+FUNC_END(do_test_aligned)
+FUNC_END(do_test_unaligned)
+
+#endif /* __x86_64__ */
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/gen.cc b/gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/gen.cc
new file mode 100644
index 00000000000..947a12bf2ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/gen.cc
@@ -0,0 +1,807 @@
+/* Test program generator for 64-bit Microsoft ABI.
+   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+   Contributed by Daniel Santos <daniel.santos@pobox.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include <cstdio>
+#include <cassert>
+#include <vector>
+#include <string>
+#include <cstring>
+#include <iostream>
+#include <algorithm>
+#include <ios>
+#include <iomanip>
+#include <sstream>
+#include <fstream>
+#include <memory>
+#include <regex>
+#include <stdexcept>
+
+#include <unistd.h>
+#include <getopt.h>
+
+using namespace std;
+
+/* A basic Effective C++ Item 6. */
+class uncopyable
+{
+private:
+  uncopyable (const uncopyable &) = delete;
+  const uncopyable& operator= (const uncopyable &) = delete;
+
+protected:
+  uncopyable() {}
+  ~uncopyable() {}
+};
+
+/* A simple class for adding text delimiters.  */
+class list_delimiter : protected uncopyable
+{
+  int m_pos;
+  string m_delim;
+  static string s_empty;
+
+  list_delimiter ();
+
+public:
+  list_delimiter (const char *delim, int init_pos = 0)
+      : m_pos (init_pos), m_delim(delim) {}
+  const string &get ()	{return m_pos++ ? m_delim : s_empty;}
+  void reset () 	{m_pos = 0;}
+  int get_pos ()	{return m_pos;}
+};
+
+string list_delimiter::s_empty = "";
+
+/* Bitmasks for representing non-volatile retisters of an ms_abi call that
+   are not already clobbered by a sysv_abi call.  */
+enum optional_regs
+{
+  OPTIONAL_REG_RBX = 0x01,
+  OPTIONAL_REG_RBP = 0x02,
+  OPTIONAL_REG_R12 = 0x04,
+  OPTIONAL_REG_R13 = 0x08,
+  OPTIONAL_REG_R14 = 0x10,
+  OPTIONAL_REG_R15 = 0x20,
+
+  OPTIONAL_REG_ALL = 0x3f,
+  OPTIONAL_REG_HFP_ALL = OPTIONAL_REG_ALL & (~OPTIONAL_REG_RBP)
+};
+
+static const char * const optional_regs_str[] = {
+  "rbx",
+  "rbp",
+  "r12",
+  "r13",
+  "r14",
+  "r15",
+};
+
+/* A simple type & name representation of a function parameter.  */
+class arg
+{
+  string name;
+  string type;
+  bool type_is_integral:1;
+
+public:
+  arg(const char *name, const char *type, bool type_is_integral);
+
+  bool is_type_integral () const	{return type_is_integral;}
+  const string &get_name () const	{return name;}
+  const string &get_type () const	{return type;}
+};
+
+arg::arg(const char *name, const char *type, bool type_is_integral)
+    : name (name), type (type), type_is_integral (type_is_integral)
+{
+}
+
+/* A stupid operator<< implementation for arg objects.  */
+template<class T> T &operator<< (T &out, const arg &a)
+{
+  return out << a.get_type () << " " << a.get_name ();
+}
+
+/* Bitmask representation of all possible varients of a test function. The
+   value FN_VAR_MSABI is only used internally to distinguish between an
+   ms_abi and sysv_abi function.  */
+enum fn_variants {
+  FN_VAR_MSABI		= 0x01,
+  FN_VAR_HFP		= 0x02,
+  FN_VAR_REALIGN	= 0x04,
+  FN_VAR_ALLOCA		= 0x08,
+  FN_VAR_VARARGS	= 0x10,
+  FN_VAR_SIBCALL	= 0x20,
+  FN_VAR_SHRINK_WRAP	= 0x40,
+
+  FN_VAR_HFP_OR_REALIGN	= FN_VAR_HFP | FN_VAR_REALIGN,
+  FN_VAR_MASK		= 0x7f,
+  FN_VAR_COUNT		= 7
+};
+
+/* Representation of a Microsoft or System V ABI function with varying
+   parameters, quirks and optimization goals.
+
+   Function name nomenclature:
+     (msabi|sysv)_[xx_][r|f][a][v][s][w]<n>
+      |            |    |    |  |  |  |  |
+      |            |    |    |  |  |  |  Number of extra (long) parameters
+      |            |    |    |  |  |  shrink wrap
+      |            |    |    |  |  sibling call
+      |            |    |    |  varargs
+      |            |    |    alloca
+      |            |    Forced realignment or hard frame pointer
+      |            Explicit clobbers (hexidecimal mask, ms_abi only)
+      Calling Convention  */
+class fn : protected uncopyable
+{
+private:
+  const vector<arg> &m_args;
+  string m_name;
+  string m_attr_decl_str;
+  string m_attr_def_str;
+  int m_clobbers:FN_VAR_COUNT;
+  int m_var;
+
+public:
+  fn (const vector<arg> &args, int clobbers, int var);
+
+  void print_params (ostream &out) const;
+  void print_decl (ostream &out, bool for_def = false) const;
+  void print_noinfo_def (ostream &out) const;
+  void print_def (ostream &out) const;
+  const string &get_name () const	{return m_name;}
+  const vector<arg> &get_args () const	{return m_args;}
+
+  bool get_hfp_or_realign () const	{return m_var & FN_VAR_HFP_OR_REALIGN;}
+  bool get_msabi () const		{return m_var & FN_VAR_MSABI;}
+  bool get_hfp () const			{return m_var & FN_VAR_HFP;}
+  bool get_realign () const		{return m_var & FN_VAR_REALIGN;}
+  bool get_alloca () const		{return m_var & FN_VAR_ALLOCA;}
+  bool get_varargs () const		{return m_var & FN_VAR_VARARGS;}
+  bool get_sibcall () const		{return m_var & FN_VAR_SIBCALL;}
+  bool get_shrink_wrap () const		{return m_var & FN_VAR_SHRINK_WRAP;}
+};
+
+fn::fn (const vector<arg> &args, int clobbers, int var)
+    : m_args (args)
+    , m_name ()
+    , m_attr_decl_str ()
+    , m_attr_def_str ("noinline")
+    , m_clobbers (clobbers)
+    , m_var (var)
+{
+  assert (!(var & ~FN_VAR_MASK));
+
+  if (get_hfp () && get_realign ())
+    throw invalid_argument ("`hfp' with `realign' does nothing.");
+
+  if (get_varargs () && args.empty ())
+    throw invalid_argument ("Need at least one normal argument to use varargs");
+
+  assert (!(get_hfp () || get_realign ()) || !(clobbers & OPTIONAL_REG_RBP));
+
+  stringstream name;
+  name << (get_msabi () ? "msabi_" : "sysv_");
+  if (get_msabi ())
+    name << setfill('0') << setw(2) << hex << m_clobbers << "_";
+  name << (get_realign () ? "r" : (get_hfp () ? "f" : ""))
+       << (get_alloca () ? "a" : "")
+       << (get_varargs () ? "v" : "")
+       << (get_sibcall () ? "s" : "")
+       << (get_shrink_wrap () ? "w" : "")
+       << setw(0) << dec << (unsigned)args.size();
+  m_name = name.str();
+
+  list_delimiter decl_comma (", ", !m_attr_decl_str.empty ());
+  list_delimiter def_comma (", ", !m_attr_def_str.empty ());
+  if (get_msabi ())
+    {
+	m_attr_decl_str += decl_comma.get ();
+	m_attr_decl_str += "ms_abi";
+	m_attr_def_str += def_comma.get ();
+	m_attr_def_str += "ms_abi";
+    }
+
+  if (get_realign ())
+    {
+      m_attr_def_str += def_comma.get();
+      m_attr_def_str += "__force_align_arg_pointer__";
+    }
+  else if (get_hfp ())
+    {
+      m_attr_def_str += def_comma.get();
+      m_attr_def_str += "optimize (\"no-omit-frame-pointer\")";
+    }
+}
+
+/* Print the parameters for a function declaration.  */
+void fn::print_params (ostream &out) const
+{
+  list_delimiter comma (", ");
+
+  vector<arg>::const_iterator i;
+  if (get_alloca () && !get_msabi ())
+    out << comma.get () << "void *alloca_mem";
+  for (i = m_args.begin(); i != m_args.end(); ++i)
+    out << comma.get () << *i;
+
+  if (get_varargs ())
+    out << comma.get () << (get_msabi () ? "..." : "va_list argptr");
+}
+
+/* Print the declaration for a function.  */
+void fn::print_decl (ostream &out, bool for_def) const
+{
+  const string &attr_str = (for_def ? m_attr_def_str : m_attr_decl_str);
+  if (!for_def)
+    out << "extern ";
+
+  if (!attr_str.empty ())
+    out << "__attribute__ ((" << attr_str << ")) ";
+
+  out << "long " << m_name << " (";
+  print_params (out);
+  out << ")";
+  if (!for_def)
+    out << ";" << endl;
+}
+
+/* Output a volatile "_noinfo" function pointer definition.  */
+void fn::print_noinfo_def (ostream &out) const
+{
+  out << "static ";
+  if (!m_attr_decl_str.empty ())
+    out << "__attribute__ ((" << m_attr_decl_str << ")) ";
+  out << "long (*const volatile " << m_name << "_noinfo) (";
+  print_params (out);
+  out << ") = " << m_name << ";" << endl;
+}
+
+/* Print the definition of a function.  */
+void fn::print_def (ostream &out) const
+{
+  vector<arg>::const_iterator i;
+
+  print_decl (out, true);
+  out << endl << "{" << endl;
+
+  if (get_msabi () && get_alloca ())
+    {
+      const char *size_str = m_args.empty () ? "42" : "a";
+      out << "  void *alloca_mem = alloca (8 + " << size_str << ");" << endl
+	  << "  *(long*)alloca_mem = FLAG_ALLOCA;" << endl;
+    }
+  if (get_msabi () && get_varargs ())
+    out << "  va_list argptr;" << endl;
+  if (get_shrink_wrap ())
+    out << "  if (shrink_wrap_global == FLAG_SHRINK_WRAP_FAST_PATH)" << endl
+	<< "    return FLAG_SHRINK_WRAP_FAST_PATH;" << endl;
+
+  list_delimiter comma (", ");
+  if (m_clobbers)
+    {
+      out << "  __asm__ __volatile__ (\"\" :::";
+      unsigned c;
+      unsigned mask = m_clobbers;
+      comma.reset ();
+      for (c = 0, mask = m_clobbers; mask; ++c, mask >>= 1)
+	if (mask & 1)
+	  out << comma.get () << "\"" << optional_regs_str[c] << "\"";
+      out << ");" << endl;
+    }
+
+  if (get_msabi () && get_varargs ())
+    {
+      assert (!m_args.empty ());
+      out << "  va_start(argptr, " << m_args.back ().get_name () << ");" << endl;
+    }
+
+  out << "  return ";
+  if (get_msabi ())
+    {
+      if (get_sibcall ())
+	out << "do_sibcall_noinfo (";
+
+      comma.reset ();
+      out << "sysv_"
+	  << (get_alloca () ? "a" : "")
+	  << (get_varargs () ? "v" : "")
+	  << m_args.size ()
+	  << "_noinfo (";
+
+      if (get_alloca ())
+	out << comma.get () << "alloca_mem";
+      for (i = m_args.begin(); i != m_args.end(); ++i)
+	out << comma.get () << i->get_name ();
+      if (get_varargs ())
+	out << comma.get () << "argptr";
+      out << ")";
+      if (get_shrink_wrap ())
+	out << " + FLAG_SHRINK_WRAP_SLOW_PATH";
+      if (get_sibcall ())
+	out << ")";
+    }
+  else
+    {
+      list_delimiter plus (" + ");
+      for (i = m_args.begin(); i != m_args.end(); ++i)
+	  if (i->is_type_integral ())
+	    out << plus.get () << i->get_name ();
+      if (get_alloca ())
+	out << plus.get () << "*(long*)alloca_mem";
+      if (!plus.get_pos ())
+	out << "0";
+    }
+  out << ";" << endl;
+  if (get_msabi () && get_varargs ())
+    out << "  va_end(argptr);" << endl;
+  out << "}" << endl << endl;
+}
+
+/* Global variables.  */
+string argv0;
+string out_file_name;
+unsigned int extra_params_min = 0;
+unsigned int extra_params_max = 5;
+unsigned fn_variant_mask = FN_VAR_MASK;
+bool omit_rbp_clobbers = false;
+vector<class fn*> sysv_funcs;
+vector<class fn*> msabi_funcs;
+
+
+/* Emit extern for do_test_aligned and do_test_unaligned (defined in do_test.S)
+   followed by all of the various do_test* function function pointers that
+   are just aliases of them.  */
+static void make_do_tests_decl (const vector<class arg> &args, ostream &out)
+{
+  vector<class arg>::const_iterator ai;
+  unsigned i, varargs, unaligned;
+
+  out << "extern __attribute__ ((ms_abi)) long do_test_aligned ();" << endl
+      << "extern __attribute__ ((ms_abi)) long do_test_unaligned ();" << endl;
+
+  list_delimiter comma (", ");
+  for (i = extra_params_min; i <= args.size (); ++i)
+    for (unaligned = 0; unaligned <= 1; ++unaligned)
+      for (varargs = 0; varargs <= 1; ++varargs)
+	{
+	  if (!i && varargs)  /* skip varargs version when no other args */
+	    continue;
+
+	  comma.reset ();
+	  out << "static __attribute__ ((ms_abi)) long (*const do_test_"
+	      << (unaligned ? "u" : "")
+	      << (varargs ? "v" : "") << i << ") (";
+
+	  unsigned j;
+	  for (j = 0, ai = args.begin (); j < i; ++j, ++ai)
+	    out << comma.get () << ai->get_type () << " "
+		<< ai->get_name ();
+	  if (varargs)
+	    out << comma.get () << "...";
+	  out << ") = (void*)do_test_" << (unaligned ? "un" : "")
+	      << "aligned;" << endl;
+	}
+}
+
+/* Generate do_tests function.  We actually break it up into multiple
+   do_test_xxxx functions to keep compile times down (with just one large
+   function, it is a very slow build).  */
+void make_do_test (const vector<class arg> &args,
+		   const vector<class fn*> &msabi_funcs,
+		   ostream &out)
+{
+  const unsigned TESTS_PER_FN_MAX = 64;
+  unsigned i;
+  vector<string> do_tests_fn_names;
+  unsigned fn_count = 0;
+  unsigned test_count = TESTS_PER_FN_MAX;
+  string params_str;
+  string param_names_str;
+  string param_types_str;
+
+  /* Init some commonly used strings.  */
+  {
+    stringstream s1, s2, s3;
+    list_delimiter comma(", ");
+    for (auto arg : args)
+      {
+	const string &c = comma.get ();
+	s1 << c << arg;
+	s2 << c << arg.get_name ();
+	s3 << c << arg.get_type ();
+      }
+    params_str = s1.str ();
+    param_names_str = s2.str ();
+    param_types_str = s3.str ();
+  }
+
+  vector<class fn*>::const_iterator fi;
+  for (fi = msabi_funcs.begin(); fi != msabi_funcs.end(); ++fi)
+    {
+      const fn &f = **fi;
+      unsigned unaligned, shrink_wrap;
+
+      for (unaligned = 0; unaligned <= !!f.get_realign (); ++unaligned)
+	for (shrink_wrap = 0; shrink_wrap <= !!f.get_shrink_wrap ();
+	     ++shrink_wrap)
+	  {
+	    const vector<class arg> &fargs = f.get_args ();
+
+	    /* To prevent unwieldy build times, we split up tests to 64-ish per
+	       function.  */
+	    if (++test_count > TESTS_PER_FN_MAX)
+	      {
+		test_count = 1;
+		if (fn_count > 0) {
+		  out << "}" << endl << endl;
+		}
+
+		stringstream fn_name;
+		fn_name << "do_tests_" << setfill('0') << setw(4) << hex
+		     << fn_count++;
+		do_tests_fn_names.push_back (fn_name.str ());
+
+		out << "static __attribute__((noinline)) void "
+		    << fn_name.str () << " (" << params_str << ")" << endl
+		    << "{" << endl
+		    << "  long ret;" << endl;
+	      }
+
+	    /* Call init_test.  */
+	    out << endl
+		<< "  init_test (" << f.get_name () << ", \""
+		<< f.get_name () << "\", ";
+
+	    if (f.get_realign ())
+	      out << (unaligned ? "ALIGNMENT_MISALIGNED"
+				: "ALIGNMENT_ALIGNED");
+	    else
+	      out << "ALIGNMENT_NOT_TESTED";
+
+	    out << ", ";
+	    if (f.get_shrink_wrap ())
+	      out << (shrink_wrap ? "SHRINK_WRAP_SLOW_PATH"
+				  : "SHRINK_WRAP_FAST_PATH");
+	    else
+	      out << "SHRINK_WRAP_NONE";
+	    out << ", ";
+
+	    /* Calculated the expected return value.  */
+	    if (f.get_shrink_wrap () && shrink_wrap == 0)
+	      out << "FLAG_SHRINK_WRAP_FAST_PATH";
+	    else
+	      {
+		list_delimiter plus (" + ");
+		for (auto const &arg : fargs)
+		  out << plus.get () << arg.get_name ();
+		if (f.get_sibcall ())
+		  out << plus.get () << "FLAG_SIBCALL";
+		if (f.get_alloca ())
+		  out << plus.get () << "FLAG_ALLOCA";
+		if (f.get_shrink_wrap () && shrink_wrap == 1)
+		  out << plus.get () << "FLAG_SHRINK_WRAP_SLOW_PATH";
+		if (!plus.get_pos ())
+		  out << "0";
+	      }
+	    out << ");" << endl;
+	    /* End if init_test call.  */
+
+	    if (f.get_realign () && unaligned == 1)
+	      out << "  __asm__ __volatile__ (\"subq $8,%%rsp\":::\"cc\");"
+		  << endl;
+
+	    out << "  ret = do_test_"
+		<< (f.get_realign () && unaligned == 1 ? "u" : "")
+		<< (f.get_varargs () ? "v" : "")
+		<< fargs.size () << " (";
+
+	    list_delimiter comma (", ");
+	    for (auto const &arg : fargs)
+	      out << comma.get () << arg.get_name ();
+	    out << ");" << endl;
+
+	    if (f.get_realign () && unaligned == 1)
+	      out << "  __asm__ __volatile__ (\"addq $8,%%rsp\":::\"cc\");"
+		  << endl;
+
+	    out << "  check_results (ret);" << endl;
+	  }
+    }
+
+  /* Close the last function and define the main do_tests function.  */
+  out << "}" << endl << endl;
+
+  /* Define _noinfo pointers to each do_tests_* function.  */
+  for (auto const &fn_name : do_tests_fn_names)
+    out << "  static void (*volatile " << fn_name << "_noinfo) ("
+	<< param_types_str << ") = " << fn_name << ";" << endl;
+
+  /* Define main do_tests () function.  */
+  out << endl
+      << "void do_tests ()" << endl
+      << "{" << endl;
+  i = 1;
+  for (auto const &arg : args)
+    {
+      out << "  " << arg.get_type () << " " << arg.get_name () << " = " << i
+	  << ";" << endl;
+      i <<= 1;
+    }
+  out << endl;
+
+  /* Call do_tests_*_noinfo functions.  */
+  for (auto const &fn_name : do_tests_fn_names)
+    out << "  " << fn_name << "_noinfo (" << param_names_str << ");" << endl;
+  out << "}" << endl << endl;
+}
+
+/* Generate output file.  */
+void generate_header (const string &args)
+{
+  vector<class arg> all_args;
+  vector<vector<class arg> > arg_sets;
+
+  ofstream out;
+  out.exceptions (ios::failbit | ios::badbit);
+  out.open (out_file_name);
+  out << "/* Generated with " << args << " */" << endl << endl;
+
+  assert (extra_params_max < 26);
+
+  /* Build the extra argument array.  */
+  for (unsigned int i = 0; i < extra_params_max; ++i)
+    {
+      char name[2] = "a";
+      name[0] += i;
+      class arg myarg (name, "long", true);
+
+      all_args.push_back (myarg);
+    }
+
+  arg_sets.resize (extra_params_max - extra_params_min + 1);
+  for (unsigned int i = 0; i < arg_sets.size (); ++i)
+      arg_sets[i].insert (arg_sets[i].end(), all_args.begin(),
+			  all_args.begin () + i + extra_params_min);
+
+  /* Print sysv functions */
+  for (const vector<class arg> &as : arg_sets)
+    {
+      const int alloca_max = !!(fn_variant_mask & FN_VAR_MSABI);
+      const int varargs_max = !!(fn_variant_mask & FN_VAR_VARARGS);
+      fn *fn;
+      for (int _alloca = 0; _alloca <= alloca_max; ++_alloca)
+	for (int varargs = 0; varargs <= varargs_max; ++varargs)
+	{
+	  try {
+	    int var = (_alloca ? FN_VAR_ALLOCA : 0)
+		    | (varargs ? FN_VAR_VARARGS : 0);
+	    fn = new ::fn (as, 0, var);
+	  } catch (invalid_argument) {
+	    continue;
+	  }
+	  sysv_funcs.push_back (fn);
+	  fn->print_def (out);
+	}
+    }
+
+  /* Print _noinfo function pointers for sysv functions.  */
+  for (const fn *f : sysv_funcs)
+    f->print_noinfo_def (out);
+
+  /* Print ms_abi functions.  */
+  unsigned int var;
+  for (var = 0; var <= FN_VAR_MASK; ++var)
+    {
+      /* We only want ms_abi fns for this.  */
+      if (! (var & FN_VAR_MSABI))
+	continue;
+
+      /*  */
+      if ((var & fn_variant_mask) != var)
+	continue;
+
+      unsigned clobbers;
+      for (clobbers = 0; clobbers <= OPTIONAL_REG_ALL; ++clobbers)
+	{
+	  /* Skip clobbers that would be invalid.  */
+	  if (clobbers & OPTIONAL_REG_RBP)
+	    {
+	      /* Whole program built with hard frame pointer.  */
+	      if (omit_rbp_clobbers)
+		continue;
+
+	      /* Uses BP explicitly.  */
+	      if (var & FN_VAR_HFP_OR_REALIGN)
+		continue;
+
+	      /* Alloca seems to require DRAP, which uses BP.  */
+	      if (var & FN_VAR_ALLOCA)
+		continue;
+	    }
+
+	  for (auto const &as : arg_sets)
+	    {
+	      fn *fn;
+	      try {
+		fn = new ::fn (as, clobbers, var);
+	      } catch (invalid_argument) {
+		continue;
+	      }
+
+	      msabi_funcs.push_back (fn);
+	      fn->print_def (out);
+	    }
+	}
+    }
+
+  out << endl;
+  make_do_tests_decl (all_args, out);
+  out << endl;
+
+  make_do_test (all_args, msabi_funcs, out);
+  out.close ();
+}
+
+/* Parse a string into a long and return true upon success.  */
+static bool long_optarg (const char *optarg, long &dest)
+{
+  char *end;
+
+  errno = 0;
+  dest = strtol(optarg, &end, 0);
+  if (errno)
+    cerr << strerror(errno) << endl;
+
+  while (isspace(*end))
+    ++end;
+
+  /* Error if errno non-zero or junk at end of string.  */
+  return errno || *end;
+}
+
+void usage ()
+{
+  cerr
+<< "Usage: " << argv0 << " [options] <output_file>" << endl
+<< endl
+<< "    -p <n|n-n>, --max-extra-params <expr>" << endl
+<< "        A single or range of extra parameters" << endl
+<< "        Examples:" << endl
+<< "            -p0-5" << endl
+<< "            -p12" << endl
+<< endl
+<< "    -v <n>, --variant-mask <n>" << endl
+<< "        Set mask of test variants (see enum fn_variants for values," << endl
+<< "        defaults to 0x" << hex << FN_VAR_MASK << " [FN_VAR_MASK])" << endl
+<< endl
+<< "    -0, --omit-rbp-clobbers" << endl
+<< "        Omit tests that clobber RBP." << endl;
+  exit (-1);
+}
+
+/* Parse string representing a number range or a list of numbers.  */
+void set_extra_param_counts (const char *str)
+{
+  char copy[0x40];
+  char *max_str;
+  bool bad = false;
+  long int min, max;
+
+  strncpy (copy, str, sizeof (copy) - 1);
+  max_str = strchr(copy, '-');
+  if (max_str)
+      *max_str++ = 0;
+
+  bad = long_optarg (copy, min);
+  if (max_str)
+    bad = bad || long_optarg (max_str, max);
+  else
+    max = min;
+
+  if (min > max)
+    usage ();
+
+  extra_params_min = min;
+  extra_params_max = max;
+}
+
+int main (int argc, char *argv[])
+{
+  argv0 = argv[0];
+  const char *short_options = "p:v:0";
+  const struct option long_options[] = {
+    {"extra-params",		required_argument, 0, 'p'},
+    {"variant-mask",		required_argument, 0, 'v'},
+    {"omit-rbp-clobbers",	no_argument,	   0, '0'},
+    {"help",			no_argument,	   0, 'h'},
+    {0, 0, 0, 0},
+  };
+
+  int option_index = 0;
+  int c;
+  while ((c = getopt_long (argc, argv, short_options, long_options,
+			   &option_index)) != -1)
+    {
+      switch (c)
+	{
+	long l;
+
+	case 'p':
+	  set_extra_param_counts (optarg);
+	  break;
+
+	case 'v':
+	  if (long_optarg (optarg, l) || (l & ~FN_VAR_MASK))
+	  {
+	    cerr << "ERROR: Bad value for -v: `" << optarg <<  "`" << endl;
+	    usage ();
+	  }
+	  fn_variant_mask = (unsigned)l;
+	  break;
+
+	case '0':
+	  omit_rbp_clobbers = true;
+	  break;
+
+	case 'h':
+	default:
+	  usage ();
+	}
+    }
+
+  if (argc - optind != 1)
+    usage ();
+  out_file_name = argv[optind];
+
+  /* Can't skip msabi funcions.  */
+  fn_variant_mask |= FN_VAR_MSABI;
+
+  /* If whole program has HFP, explicit tests that enable it are redundant.  */
+  if (omit_rbp_clobbers)
+    fn_variant_mask &= ~FN_VAR_HFP;
+
+  stringstream argv_str;
+
+  for (int i = 0; i < argc; ++i)
+    argv_str << (i ? " " : "") << argv[i];
+
+  int ret = 0;
+  try
+    {
+      generate_header (argv_str.str());
+    }
+  catch (exception &e)
+    {
+      cerr << "ERROR: While writing `" << out_file_name << "': "
+	   << strerror(errno) << endl;
+      ret = 1;
+    }
+  for_each (sysv_funcs.begin (), sysv_funcs.end (), default_delete<fn> ());
+  for_each (msabi_funcs.begin (), msabi_funcs.end (), default_delete<fn> ());
+
+  return ret;
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/ms-sysv.c b/gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/ms-sysv.c
new file mode 100644
index 00000000000..2a011f5103d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/ms-sysv.c
@@ -0,0 +1,373 @@
+/* Test program for 64-Bit Microsoft to System V function calls.
+   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+   Contributed by Daniel Santos <daniel.santos@pobox.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This is a single-threaded test program for Microsoft 64-bit ABI functions.
+   It is aimed at verifying correctness of pro/epilogues of ms_abi functions
+   that call sysv_abi functions to assure clobbered registers are properly
+   saved and restored and attempt to detect any flaws in the behavior of these
+   functions.  The following variants are tested:
+
+   * Either uses hard frame pointer, re-aligns the stack or neither,
+   * Uses alloca (and thus DRAP) or not,
+   * Uses sibling call optimization or not,
+   * Uses variable argument list or not, and
+   * Has shrink-wrapped code or not.
+
+  In addition, an ms_abi function is generated for each of these combinations
+  clobbering each unique combination additional registers (excluding BP when
+  a frame pointer is used). Shrink-wrap variants are called in a way that
+  both the fast and slow path are used. Re-aligned variants are called with
+  an aligned and mis-aligned stack.
+
+  Each ms_abi function is called via an assembly stub that first saves all
+  volatile registers and fills them with random values. The ms_abi function
+  is then called.  After the function returns, the value of all volatile
+  registers is verified against the random data and then restored.  */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <alloca.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <errno.h>
+#include <ctype.h>
+
+#ifndef __x86_64__
+# error Test only valid on x86_64
+#endif
+
+enum reg_data_sets
+{
+  REG_SET_SAVE,
+  REG_SET_INPUT,
+  REG_SET_OUTPUT,
+
+  REG_SET_COUNT
+};
+
+enum flags
+{
+  FLAG_ALLOCA			= 0x01000000,
+  FLAG_SIBCALL			= 0x02000000,
+  FLAG_SHRINK_WRAP_FAST_PATH	= 0x08000000,
+  FLAG_SHRINK_WRAP_SLOW_PATH	= 0x0c000000,
+};
+
+enum alignment_option
+{
+  ALIGNMENT_NOT_TESTED,
+  ALIGNMENT_ALIGNED,
+  ALIGNMENT_MISALIGNED,
+
+  ALIGNMENT_COUNT,
+};
+
+enum shrink_wrap_option
+{
+  SHRINK_WRAP_NONE,
+  SHRINK_WRAP_FAST_PATH,
+  SHRINK_WRAP_SLOW_PATH,
+
+  SHRINK_WRAP_COUNT
+};
+
+union regdata {
+  struct {
+    __uint128_t	sseregs[10];
+    union {
+      uint64_t	intregs[8];
+      struct {
+	uint64_t	rsi;
+	uint64_t	rdi;
+	uint64_t	rbx;
+	uint64_t	rbp;
+	uint64_t	r12;
+	uint64_t	r13;
+	uint64_t	r14;
+	uint64_t	r15;
+      };
+    };
+  };
+  uint32_t		u32_arr[56];
+} __attribute__((aligned (16)));
+
+struct test_data
+{
+  union regdata regdata[REG_SET_COUNT];
+  void *fn;
+  void *retaddr;
+  const char *name;
+  enum alignment_option alignment;
+  enum shrink_wrap_option shrink_wrap;
+  long ret_expected;
+} test_data;
+
+static int shrink_wrap_global;
+static void __attribute((sysv_abi)) do_tests ();
+static void init_test (void *fn, const char *name,
+		       enum alignment_option alignment,
+		       enum shrink_wrap_option shrink_wrap, long ret_expected);
+static void check_results (long ret);
+static __attribute__((ms_abi)) long do_sibcall (long arg);
+static __attribute__((ms_abi)) long
+(*const volatile do_sibcall_noinfo) (long) = do_sibcall;
+
+/* Defines do_tests ().  */
+#include "ms-sysv-generated.h"
+
+static int arbitrarily_fail;
+static const char *argv0;
+
+static void __attribute__((noinline))
+init_test (void *fn, const char *name, enum alignment_option alignment,
+	   enum shrink_wrap_option shrink_wrap, long ret_expected)
+{
+  int i;
+  union regdata *data = &test_data.regdata[REG_SET_INPUT];
+
+  assert (alignment < ALIGNMENT_COUNT);
+  assert (shrink_wrap < SHRINK_WRAP_COUNT);
+
+  memset (&test_data, 0, sizeof (test_data));
+  for (i = 55; i >= 0; --i)
+    data->u32_arr[i] = (uint32_t)lrand48 ();
+  test_data.fn = fn;
+  test_data.name = name;
+  test_data.alignment = alignment;
+  test_data.shrink_wrap = shrink_wrap;
+  test_data.ret_expected = ret_expected;
+
+  switch (shrink_wrap)
+  {
+    case SHRINK_WRAP_NONE:
+    case SHRINK_WRAP_COUNT:
+      break;
+    case SHRINK_WRAP_FAST_PATH:
+      shrink_wrap_global = FLAG_SHRINK_WRAP_FAST_PATH;
+      break;
+    case SHRINK_WRAP_SLOW_PATH:
+      shrink_wrap_global = FLAG_SHRINK_WRAP_SLOW_PATH;
+      break;
+  }
+}
+
+static const char *alignment_str[ALIGNMENT_COUNT] =
+{
+  "", "aligned", "misaligned"
+};
+
+static const char *shrink_wrap_str[SHRINK_WRAP_COUNT] =
+{
+  "", "shrink-wrap fast path", "shrink-wrap slow path"
+};
+
+static const char *test_descr ()
+{
+  static char buffer[0x400];
+
+  if (test_data.alignment || test_data.shrink_wrap)
+    snprintf (buffer, sizeof (buffer) - 1, "`%s' (%s%s%s)",
+	      test_data.name,
+	      alignment_str[test_data.alignment],
+	      (test_data.alignment && test_data.shrink_wrap ? ", " : ""),
+	      shrink_wrap_str[test_data.shrink_wrap]);
+  else
+    snprintf (buffer, sizeof (buffer) - 1, "`%s'", test_data.name);
+
+  return buffer;
+}
+
+static const char *regnames[] = {
+  "XMM6",
+  "XMM7",
+  "XMM8",
+  "XMM9",
+  "XMM10",
+  "XMM11",
+  "XMM12",
+  "XMM13",
+  "XMM14",
+  "XMM15",
+  "RSI",
+  "RDI",
+  "RBX",
+  "RBP",
+  "R12",
+  "R13",
+  "R14",
+  "R15",
+};
+
+static void print_header (int *header_printed)
+{
+  if (!*header_printed)
+    fprintf (stderr, "       %-35s    %-35s\n", "Expected", "Got");
+  *header_printed = 1;
+}
+
+static int compare_reg128 (const __uint128_t *a, const __uint128_t *b,
+			   const char *name, int *header_printed)
+{
+  if (!memcmp (a, b, sizeof (*a)))
+    return 0;
+  else
+    {
+      long ha = *((long*)a);
+      long la = *((long*)a + 16);
+      long hb = *((long*)b);
+      long lb = *((long*)a + 16);
+      print_header (header_printed);
+      fprintf (stderr, "%-5s: 0x%016lx %016lx != 0x%016lx %016lx\n",
+	       name, ha, la, hb, lb);
+      return 1;
+    }
+}
+
+static int compare_reg64 (long a, long b, const char *name,
+			  int *header_printed)
+{
+  if (a == b)
+    return 0;
+  else
+    {
+      print_header (header_printed);
+      fprintf (stderr, "%s: 0x%016lx != 0x%016lx\n", name, a, b);
+      return 1;
+    }
+}
+
+
+static void __attribute__((noinline)) check_results (long ret)
+{
+  unsigned i;
+  unsigned bad = 0;
+  int header_printed = 0;
+
+  union regdata *a = &test_data.regdata[REG_SET_INPUT];
+  union regdata *b = &test_data.regdata[REG_SET_OUTPUT];
+
+  a = __builtin_assume_aligned(a, 16);
+  b = __builtin_assume_aligned(b, 16);
+
+  if (arbitrarily_fail) {
+    uint64_t u64 = lrand48 ();
+    if (u64 % 100 == 0)
+      b->u32_arr[u64 % 56] = 0xfdfdfdfd;
+  }
+
+  for (i = 0; i < 10; ++i)
+    bad |= compare_reg128 (&a->sseregs[i], &b->sseregs[i], regnames[i],
+			   &header_printed);
+
+  for (i = 0; i < 8; ++i)
+    bad |= compare_reg64 (a->intregs[i], b->intregs[i], regnames[i + 10],
+			  &header_printed);
+
+  if (ret != test_data.ret_expected)
+    {
+      fprintf (stderr, "Wrong return value: got 0x%016lx, expected 0x%016lx\n",
+	       ret, test_data.ret_expected);
+      bad = 1;
+    }
+
+  if (bad)
+    {
+      fprintf (stderr, "Failed on test function %s\n", test_descr ());
+      raise (SIGTRAP);
+      exit (-1);
+    }
+}
+
+static __attribute__((ms_abi, noinline)) long do_sibcall (long arg) {
+  return arg + FLAG_SIBCALL;
+}
+
+void usage ()
+{
+  fprintf (stderr, "Usage: %s [-s <seed>] [-f]\n", argv0);
+  exit (-1);
+}
+
+static long long_optarg (const char *optarg, const char *optstr)
+{
+  char *end;
+  long ret;
+
+  errno = 0;
+  ret = strtol(optarg, &end, 0);
+
+  while (isspace (*end))
+    ++end;
+
+  if (errno || *end)
+    {
+      fprintf (stderr, "ERROR: Bad value for %s: `%s`\n", optstr, optarg);
+      if (errno)
+	fprintf (stderr, "%s\n", strerror (errno));
+      exit (-1);
+    }
+
+  return ret;
+}
+
+int main (int argc, char *argv[])
+{
+  long seed = 0;
+  int c;
+  argv0 = argv[0];
+
+  assert (!((long)&test_data.regdata[REG_SET_SAVE] & 15));
+  assert (!((long)&test_data.regdata[REG_SET_INPUT] & 15));
+  assert (!((long)&test_data.regdata[REG_SET_OUTPUT] & 15));
+
+  while ((c = getopt (argc, argv, "s:f")) != -1)
+    {
+      switch (c)
+	{
+	case 's':
+	  seed = long_optarg (optarg, "-s");
+	  break;
+
+	case 'f':
+	  arbitrarily_fail = 1;
+	  fprintf (stderr, "NOTE: Aribrary failure enabled (-f).\n");
+	  break;
+	}
+    }
+
+  srand48 (seed);
+  do_tests ();
+
+  /* Just in case we don't have enough tests to randomly trigger the
+     failure.  */
+  if (arbitrarily_fail)
+    return -1;
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/ms-sysv.exp b/gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/ms-sysv.exp
new file mode 100644
index 00000000000..e317af9bd85
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/ms-sysv.exp
@@ -0,0 +1,178 @@
+# Tests for ms_abi to sysv_abi calls.
+# Copyright (C) 2016-2017 Free Software Foundation, Inc.
+# Contributed by Daniel Santos <daniel.santos@pobox.com>
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# Exit immediately if this isn't a native x86_64 target.
+if { (![istarget x86_64-*-*] && ![istarget i?86-*-*])
+     || ![is-effective-target lp64] || ![isnative] } then {
+    unsupported "$subdir"
+    return
+}
+
+global GCC_RUNTEST_PARALLELIZE_DIR
+
+load_lib gcc-dg.exp
+
+proc runtest_ms_sysv { cflags generator_args } {
+    global GCC_UNDER_TEST HOSTCXX HOSTCXXFLAGS tmpdir srcdir subdir \
+	   parallel_dir next_test
+
+    set objdir "$tmpdir/ms-sysv"
+    set generator "$tmpdir/ms-sysv-generate.exe"
+    set generated_header "$objdir/ms-sysv-generated.h"
+    set do_test_o "$objdir/do-test.o"
+    set ms_sysv_o "$objdir/ms-sysv.o"
+    set ms_sysv_exe "$objdir/ms-sysv.exe"
+    set status 0
+    set warn_flags "-Wall"
+    set this_test $next_test
+    incr next_test
+
+    # Do parallelization here
+    if [catch {set fd [open "$parallel_dir/$this_test" \
+			    [list RDWR CREAT EXCL]]} ] {
+	if { [lindex $::errorCode 1] eq "EEXIST" } then {
+	    # Another job is running this test
+	    return
+	} else {
+	    error "Failed to open $parallel_dir/$this_test: $::errorCode"
+	    set status 1
+	}
+    } else {
+      close $fd
+    }
+
+    # Detect when hard frame pointers are enabled (or required) so we know not
+    # to generate bp clobbers.
+    if [regexp "^(.+ +| *)-(O0|fno-omit-frame-pointer|p|pg)( +.*)?$" \
+	       $cflags match] then {
+	set generator_args "$generator_args --omit-rbp-clobbers"
+    }
+
+    set descr "$subdir CFLAGS=\"$cflags\" generator_args=\"$generator_args\""
+    verbose "$tmpdir: Running test $descr" 1
+
+    # Cleanup any previous test in objdir
+    file delete -force $objdir
+    file mkdir $objdir
+
+    # Build the generator (only needs to be done once).
+    set src "$srcdir/$subdir/gen.cc"
+    if { $status == 0 } then {
+	if { (![file exists "$generator"]) || ([file mtime "$generator"]
+					    < [file mtime "$src"]) } {
+	    # Temporarily switch to the environment for the host compiler.
+	    restore_ld_library_path_env_vars
+	    set cxx "$HOSTCXX $HOSTCXXFLAGS $warn_flags -std=c++11"
+	    set status [remote_exec host "$cxx -o $generator $src"]
+	    set status [lindex $status 0]
+	    set_ld_library_path_env_vars
+	    if { $status != 0 } then {
+		warning "Could not build $subdir generator"
+	    }
+	}
+    }
+
+    # Generate header
+    if { $status == 0 } then {
+	set status [remote_exec host "$generator $generator_args $generated_header"]
+	set status [lindex $status 0]
+	if { $status != 0 } then {
+	    warning "Could not generate $generated_header"
+	}
+    }
+
+    set cc "$GCC_UNDER_TEST -I$objdir -I$srcdir/$subdir $cflags $warn_flags"
+
+    # Assemble do-test.S
+    set src "$srcdir/$subdir/do-test.S"
+    if { $status == 0 } then {
+	set status [remote_exec build "$cc -c -o $do_test_o $src"]
+	set status [lindex $status 0]
+	if { $status != 0 } then {
+	    warning "Could not assemble $src"
+	}
+    }
+
+    # Build ms-sysv.c
+    set src "$srcdir/$subdir/ms-sysv.c"
+    if { $status == 0 } then {
+	set status [remote_exec build "$cc -c -o $ms_sysv_o $src" "" "" "" 1200]
+	set status [lindex $status 0]
+	if { $status != 0 } then {
+	    warning "Could not build $src."
+	}
+    }
+
+    # Link
+    if { $status == 0 } then {
+	set status [remote_exec build "$cc -o $ms_sysv_exe $ms_sysv_o $do_test_o"]
+	set status [lindex $status 0]
+	if { $status != 0 } then {
+	    warning "Link failed."
+	}
+    }
+
+    # Execute
+    if { $status == 0 } then {
+	set status [remote_exec build "$ms_sysv_exe"]
+	set status [lindex $status 0]
+    }
+
+    if { $status != 0 } then {
+	fail $descr
+    } else {
+	pass $descr
+    }
+}
+
+dg-init
+
+# Setup parallelization
+set next_test 0
+set parallel_dir "$env(GCC_RUNTEST_PARALLELIZE_DIR)/abi-ms-sysv"
+file mkdir "$env(GCC_RUNTEST_PARALLELIZE_DIR)"
+file mkdir "$parallel_dir"
+
+if { ![file isdirectory "$parallel_dir"] } then {
+    error "Failed to create directory $parallel_dir: $::errorCode"
+    return
+}
+
+set gen_opts "-p0-5"
+set all_options [list "-O2" "-O0 -g3"]
+
+# Run without -mcall-ms2sysv-xlogues always
+foreach opt $all_options {
+    runtest_ms_sysv "$opt" "$gen_opts"
+}
+
+# Skip -mcall-ms2sysv-xlogues on Windows (not supported)
+if { ![istarget *-*-cygwin*] && ![istarget *-*-mingw*] } {
+    foreach opt $all_options {
+	runtest_ms_sysv "-mcall-ms2sysv-xlogues $opt" "$gen_opts"
+    }
+}
+
+dg-finish
-- 
2.11.0

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCH 11/12] [i386] Add remainder of -mcall-ms2sysv-xlogues implementation
  2017-04-27  8:05 [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
  2017-04-27  8:05 ` [PATCH 12/12] [i386,testsuite] Test program for ms to sysv abi function calls Daniel Santos
@ 2017-04-27  8:05 ` Daniel Santos
  2017-05-04 22:11   ` [PATCH 11/12 rev1] " Daniel Santos
  2017-04-27  8:05 ` [PATCH 09/12] [i386] Add patterns and predicates foutline-msabi-xlouges Daniel Santos
                   ` (12 subsequent siblings)
  14 siblings, 1 reply; 41+ messages in thread
From: Daniel Santos @ 2017-04-27  8:05 UTC (permalink / raw)
  To: gcc-patches, Uros Bizjak, Jan Hubicka

Add functions emit_outlined_ms2sysv_save and
emit_outlined_ms2sysv_restore, which are called from
ix86_expand_prologue and ix86_expand_epilogue, respectively.  Also adds
the code to ix86_expand_call that enables the optimization (setting the
machine_function's outline_ms_sysv field).

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
 gcc/config/i386/i386.c | 281 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 272 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index debfe457d97..6a4e6f8e728 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14271,6 +14271,79 @@ ix86_elim_entry_set_got (rtx reg)
     }
 }
 
+static rtx
+gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
+{
+  rtx addr, mem;
+
+  if (offset)
+    addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
+  mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
+  return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
+}
+
+static inline rtx
+gen_frame_load (rtx reg, rtx frame_reg, int offset)
+{
+  return gen_frame_set (reg, frame_reg, offset, false);
+}
+
+static inline rtx
+gen_frame_store (rtx reg, rtx frame_reg, int offset)
+{
+  return gen_frame_set (reg, frame_reg, offset, true);
+}
+
+static void
+ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
+{
+  struct machine_function *m = cfun->machine;
+  const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
+			  + m->call_ms2sysv_extra_regs;
+  rtvec v = rtvec_alloc (ncregs - 1 + 3);
+  unsigned int align, i, vi = 0;
+  rtx_insn *insn;
+  rtx sym, addr;
+  rtx rax = gen_rtx_REG (word_mode, AX_REG);
+  const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
+  HOST_WIDE_INT rax_offset = xlogue.get_stub_ptr_offset () + m->fs.sp_offset;
+  HOST_WIDE_INT stack_alloc_size = frame.stack_pointer_offset - m->fs.sp_offset;
+  HOST_WIDE_INT stack_align_off_in = xlogue.get_stack_align_off_in ();
+
+  /* Verify that the incoming stack 16-byte alignment offset matches the
+     layout we're using.  */
+  gcc_assert (stack_align_off_in == (m->fs.sp_offset & UNITS_PER_WORD));
+
+  /* Get the stub symbol.  */
+  sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
+						  : XLOGUE_STUB_SAVE);
+  RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
+  RTVEC_ELT (v, vi++) = const0_rtx;
+
+  /* Setup RAX as the stub's base pointer.  */
+  align = GET_MODE_ALIGNMENT (V4SFmode);
+  addr = choose_baseaddr (rax_offset, &align);
+  gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
+  insn = emit_insn (gen_rtx_SET (rax, addr));
+
+  gcc_assert (stack_alloc_size >= xlogue.get_stack_space_used ());
+  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+			     GEN_INT (-stack_alloc_size), -1,
+			     m->fs.cfa_reg == stack_pointer_rtx);
+  for (i = 0; i < ncregs; ++i)
+    {
+      const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
+      rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
+			     r.regno);
+      RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);;
+    }
+
+  gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
+
+  insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
+  RTX_FRAME_RELATED_P (insn) = true;
+}
+
 /* Expand the prologue into a bunch of separate insns.  */
 
 void
@@ -14518,7 +14591,7 @@ ix86_expand_prologue (void)
 	 performing the actual alignment.  Otherwise we cannot guarantee
 	 that there's enough storage above the realignment point.  */
       allocate = frame.stack_realign_allocate_offset - m->fs.sp_offset;
-      if (allocate)
+      if (allocate && !m->call_ms2sysv)
         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
 				   GEN_INT (-allocate), -1, false);
 
@@ -14526,7 +14599,6 @@ ix86_expand_prologue (void)
       insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
 					stack_pointer_rtx,
 					GEN_INT (-align_bytes)));
-
       /* For the purposes of register save area addressing, the stack
 	 pointer can no longer be used to access anything in the frame
 	 below m->fs.sp_realigned_offset and the frame pointer cannot be
@@ -14543,6 +14615,9 @@ ix86_expand_prologue (void)
 	m->fs.sp_valid = false;
     }
 
+  if (m->call_ms2sysv)
+    ix86_emit_outlined_ms2sysv_save (frame);
+
   allocate = frame.stack_pointer_offset - m->fs.sp_offset;
 
   if (flag_stack_usage_info)
@@ -14863,17 +14938,19 @@ ix86_emit_restore_regs_using_pop (void)
   unsigned int regno;
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false))
+    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
       ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
 }
 
-/* Emit code and notes for the LEAVE instruction.  */
+/* Emit code and notes for the LEAVE instruction.  If insn is non-null,
+   omits the emit and only attaches the notes.  */
 
 static void
-ix86_emit_leave (void)
+ix86_emit_leave (rtx_insn *insn)
 {
   struct machine_function *m = cfun->machine;
-  rtx_insn *insn = emit_insn (ix86_gen_leave ());
+  if (!insn)
+    insn = emit_insn (ix86_gen_leave ());
 
   ix86_add_queued_cfa_restore_notes (insn);
 
@@ -14967,6 +15044,157 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
       }
 }
 
+static void
+ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
+				  bool use_call, int style)
+{
+  struct machine_function *m = cfun->machine;
+  const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
+			  + m->call_ms2sysv_extra_regs;
+  unsigned elems_needed = ncregs + 1;
+  rtvec v;
+  unsigned int align, i, vi = 0;
+  rtx_insn *insn;
+  rtx sym, tmp;
+  rtx rsi = gen_rtx_REG (word_mode, SI_REG);
+  rtx r10 = NULL_RTX;
+  const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
+  HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
+  HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
+  rtx rsi_frame_load = NULL_RTX;
+  HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
+  enum xlogue_stub stub;
+
+  gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
+
+  /* If using a realigned stack, we should never start with padding.  */
+  gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
+
+  /* Setup RSI as the stub's base pointer.  */
+  align = GET_MODE_ALIGNMENT (V4SFmode);
+  tmp = choose_baseaddr (rsi_offset, &align);
+  gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
+  emit_insn (gen_rtx_SET (rsi, tmp));
+
+  /* Get a symbol for the stub.  */
+  if (frame_pointer_needed)
+    stub = use_call ? XLOGUE_STUB_RESTORE_HFP
+		    : XLOGUE_STUB_RESTORE_HFP_TAIL;
+  else
+    stub = use_call ? XLOGUE_STUB_RESTORE
+		    : XLOGUE_STUB_RESTORE_TAIL;
+  sym = xlogue.get_stub_rtx (stub);
+
+  if (!use_call)
+    elems_needed += frame_pointer_needed ? 2 : 3;
+  v = rtvec_alloc (elems_needed);
+
+  /* We call the epilogue stub when we need to pop incoming args or we are
+     doing a sibling call as the tail.  Otherwise, we will emit a jmp to the
+     epilogue stub and it is the tail-call.  */
+  if (use_call)
+      RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
+  else
+    {
+      RTVEC_ELT (v, vi++) = ret_rtx;
+      RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
+      if (!frame_pointer_needed)
+	{
+	  /* If no hard frame pointer, we set R10 to the SP restore value.  */
+	  gcc_assert (!m->fs.fp_valid);
+	  gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
+	  gcc_assert (m->fs.sp_valid);
+
+	  r10 = gen_rtx_REG (DImode, R10_REG);
+	  tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
+	  emit_insn (gen_rtx_SET (r10, tmp));
+	  RTVEC_ELT (v, vi++) = const0_rtx;
+	}
+      else
+	{
+	  gcc_assert (m->fs.fp_valid);
+	  gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
+
+	  RTVEC_ELT (v, vi++) = const1_rtx;
+	}
+    }
+
+  /* Generate frame load insns and restore notes.  */
+  for (i = 0; i < ncregs; ++i)
+    {
+      const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
+      enum machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
+      rtx reg, frame_load;
+
+      reg = gen_rtx_REG (mode, r.regno);
+      frame_load = gen_frame_load (reg, rsi, r.offset);
+
+      /* Save RSI frame load insn & note to add last.  */
+      if (r.regno == SI_REG)
+	{
+	  gcc_assert (!rsi_frame_load);
+	  rsi_frame_load = frame_load;
+	  rsi_restore_offset = r.offset;
+	}
+      else
+	{
+	  RTVEC_ELT (v, vi++) = frame_load;
+	  ix86_add_cfa_restore_note (NULL, reg, r.offset);
+	}
+    }
+
+  /* Add RSI frame load & restore note at the end.  */
+  gcc_assert (rsi_frame_load);
+  gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
+  RTVEC_ELT (v, vi++) = rsi_frame_load;
+  ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
+			     rsi_restore_offset);
+
+  /* Finally, for tail-call w/o a hard frame pointer, set SP to R10.  */
+  if (!use_call && !frame_pointer_needed)
+    {
+      gcc_assert (m->fs.sp_valid);
+      gcc_assert (!m->fs.sp_realigned);
+      RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
+
+      /* At this point, R10 should point to frame.stack_realign_offset.  */
+      if (m->fs.cfa_reg == stack_pointer_rtx)
+	m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
+      m->fs.sp_offset = frame.stack_realign_offset;
+    }
+
+  gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
+  tmp = gen_rtx_PARALLEL (VOIDmode, v);
+  if (use_call)
+      insn = emit_insn (tmp);
+  else
+    {
+      insn = emit_jump_insn (tmp);
+      JUMP_LABEL (insn) = ret_rtx;
+
+      if (frame_pointer_needed)
+	ix86_emit_leave (insn);
+      else
+	{
+	  /* Need CFA adjust note.  */
+	  tmp = gen_rtx_SET (stack_pointer_rtx, r10);
+	  add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
+	}
+    }
+
+  RTX_FRAME_RELATED_P (insn) = true;
+  ix86_add_queued_cfa_restore_notes (insn);
+
+  /* If we're not doing a tail-call, we need to adjust the stack.  */
+  if (use_call && m->fs.sp_valid)
+    {
+      HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
+      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				GEN_INT (dealloc), style,
+				m->fs.cfa_reg == stack_pointer_rtx);
+    }
+}
+
 /* Restore function stack, frame, and registers.  */
 
 void
@@ -14977,6 +15205,7 @@ ix86_expand_epilogue (int style)
   struct ix86_frame frame;
   bool restore_regs_via_mov;
   bool using_drap;
+  bool restore_stub_is_tail = false;
 
   ix86_finalize_stack_realign_flags ();
   ix86_compute_frame_layout (&frame);
@@ -15079,7 +15308,37 @@ ix86_expand_epilogue (int style)
     ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
 					  style == 2);
 
-  if (restore_regs_via_mov)
+  if (m->call_ms2sysv)
+    {
+      int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
+
+      /* We cannot use a tail-call for the stub if:
+	 1. We have to pop incoming args,
+	 2. We have additional int regs to restore, or
+	 3. A sibling call will be the tail-call, or
+	 4. We are emitting an eh_return_internal epilogue.
+
+	 TODO: Item 4 has not yet tested!
+
+	 If any of the above are true, we will call the stub rather than
+	 jump to it.  */
+      restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
+      ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
+    }
+
+  /* If using out-of-line stub that is a tail-call, then...*/
+  if (m->call_ms2sysv && restore_stub_is_tail)
+    {
+      /* TODO: parinoid tests. (remove)  */
+      gcc_assert (m->fs.sp_valid);
+      gcc_assert (!m->fs.sp_realigned);
+      gcc_assert (!m->fs.fp_valid);
+      gcc_assert (!m->fs.realigned);
+      gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
+      gcc_assert (!crtl->drap_reg);
+      gcc_assert (!frame.nregs);
+    }
+  else if (restore_regs_via_mov)
     {
       rtx t;
 
@@ -15210,7 +15469,7 @@ ix86_expand_epilogue (int style)
       else if (TARGET_USE_LEAVE
 	       || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
 	       || !cfun->machine->use_fast_prologue_epilogue)
-	ix86_emit_leave ();
+	ix86_emit_leave (NULL);
       else
         {
 	  pro_epilogue_adjust_stack (stack_pointer_rtx,
@@ -15321,7 +15580,7 @@ ix86_expand_epilogue (int style)
       else
 	emit_jump_insn (gen_simple_return_pop_internal (popc));
     }
-  else
+  else if (!m->call_ms2sysv || !restore_stub_is_tail)
     emit_jump_insn (gen_simple_return_internal ());
 
   /* Restore the state back to the state from the prologue,
@@ -29054,6 +29313,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
 
 	  clobber_reg (&use, gen_rtx_REG (mode, regno));
 	}
+
+      /* Set here, but it may get cleared later.  */
+      if (TARGET_CALL_MS2SYSV_XLOGUES)
+	cfun->machine->call_ms2sysv = true;
     }
 
   if (vec_len > 1)
-- 
2.11.0

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCH 04/12] [i386] Minor refactoring
  2017-04-27  8:05 [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
                   ` (8 preceding siblings ...)
  2017-04-27  8:05 ` [PATCH 01/12] [i386] Re-align stack frame prior to SSE saves Daniel Santos
@ 2017-04-27  8:23 ` Daniel Santos
  2017-04-27  8:44 ` [PATCH 07/12] [i386] Modify ix86_save_reg to optionally omit stub-managed registers Daniel Santos
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 41+ messages in thread
From: Daniel Santos @ 2017-04-27  8:23 UTC (permalink / raw)
  To: gcc-patches, Uros Bizjak, Jan Hubicka

For the sake of clarity, I've separated out these minor refactoring
changes from the remainder of this patch set.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
 gcc/config/i386/i386.c | 21 ++++++++++-----------
 gcc/config/i386/i386.h |  4 +++-
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e8a4ba6fe8d..113f83742c2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2424,7 +2424,7 @@ static int const x86_64_int_return_registers[4] =
 
 /* Additional registers that are clobbered by SYSV calls.  */
 
-int const x86_64_ms_sysv_extra_clobbered_registers[12] =
+unsigned const x86_64_ms_sysv_extra_clobbered_registers[12] =
 {
   SI_REG, DI_REG,
   XMM6_REG, XMM7_REG,
@@ -12539,6 +12539,7 @@ ix86_builtin_setjmp_frame_value (void)
 static void
 ix86_compute_frame_layout (struct ix86_frame *frame)
 {
+  struct machine_function *m = cfun->machine;
   unsigned HOST_WIDE_INT stack_alignment_needed;
   HOST_WIDE_INT offset;
   unsigned HOST_WIDE_INT preferred_alignment;
@@ -12573,19 +12574,19 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
      scheduling that can be done, which means that there's very little point
      in doing anything except PUSHs.  */
   if (TARGET_SEH)
-    cfun->machine->use_fast_prologue_epilogue = false;
+    m->use_fast_prologue_epilogue = false;
 
   /* During reload iteration the amount of registers saved can change.
      Recompute the value as needed.  Do not recompute when amount of registers
      didn't change as reload does multiple calls to the function and does not
      expect the decision to change within single iteration.  */
   else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
-           && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
+	   && m->use_fast_prologue_epilogue_nregs != frame->nregs)
     {
       int count = frame->nregs;
       struct cgraph_node *node = cgraph_node::get (current_function_decl);
 
-      cfun->machine->use_fast_prologue_epilogue_nregs = count;
+      m->use_fast_prologue_epilogue_nregs = count;
 
       /* The fast prologue uses move instead of push to save registers.  This
          is significantly longer, but also executes faster as modern hardware
@@ -12602,14 +12603,14 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
       if (node->frequency < NODE_FREQUENCY_NORMAL
 	  || (flag_branch_probabilities
 	      && node->frequency < NODE_FREQUENCY_HOT))
-        cfun->machine->use_fast_prologue_epilogue = false;
+	m->use_fast_prologue_epilogue = false;
       else
-        cfun->machine->use_fast_prologue_epilogue
+	m->use_fast_prologue_epilogue
 	   = !expensive_function_p (count);
     }
 
   frame->save_regs_using_mov
-    = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
+    = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue
        /* If static stack checking is enabled and done with probes,
 	  the registers need to be saved before allocating the frame.  */
        && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
@@ -28683,11 +28684,9 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
   else if (TARGET_64BIT_MS_ABI
 	   && (!callarg2 || INTVAL (callarg2) != -2))
     {
-      int const cregs_size
-	= ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
-      int i;
+      unsigned i;
 
-      for (i = 0; i < cregs_size; i++)
+      for (i = 0; i < NUM_X86_64_MS_CLOBBERED_REGS; i++)
 	{
 	  int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
 	  machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 4e4cb7ca7e3..645b239a768 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2163,7 +2163,9 @@ extern int const dbx_register_map[FIRST_PSEUDO_REGISTER];
 extern int const dbx64_register_map[FIRST_PSEUDO_REGISTER];
 extern int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER];
 
-extern int const x86_64_ms_sysv_extra_clobbered_registers[12];
+extern unsigned const x86_64_ms_sysv_extra_clobbered_registers[12];
+#define NUM_X86_64_MS_CLOBBERED_REGS \
+  (ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers))
 
 /* Before the prologue, RA is at 0(%esp).  */
 #define INCOMING_RETURN_ADDR_RTX \
-- 
2.11.0

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCH 07/12] [i386] Modify ix86_save_reg to optionally omit stub-managed registers
  2017-04-27  8:05 [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
                   ` (9 preceding siblings ...)
  2017-04-27  8:23 ` [PATCH 04/12] [i386] Minor refactoring Daniel Santos
@ 2017-04-27  8:44 ` Daniel Santos
  2017-04-27  8:51 ` [PATCH 06/12] [i386] Add class xlogue_layout and new fields to struct machine_function Daniel Santos
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 41+ messages in thread
From: Daniel Santos @ 2017-04-27  8:44 UTC (permalink / raw)
  To: gcc-patches, Uros Bizjak, Jan Hubicka

Add HARD_REG_SET stub_managed_regs to track which registers will be
managed by the pro/epilogue stubs for the function.

Add a third parameter bool ignore_outlined to ix86_save_reg to specify
rather or not the count should include registers marked in
stub_managed_regs.  All call sites are modified.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
 gcc/config/i386/i386.c | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 2da3da1f97a..4f0cb7dd6cc 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12618,6 +12618,10 @@ ix86_hard_regno_scratch_ok (unsigned int regno)
 	      && df_regs_ever_live_p (regno)));
 }
 
+/* Registers who's save & restore will be managed by stubs called from
+   pro/epilogue.  */
+static HARD_REG_SET GTY(()) stub_managed_regs;
+
 /* Return true if register class CL should be an additional allocno
    class.  */
 
@@ -12630,7 +12634,7 @@ ix86_additional_allocno_class_p (reg_class_t cl)
 /* Return TRUE if we need to save REGNO.  */
 
 static bool
-ix86_save_reg (unsigned int regno, bool maybe_eh_return)
+ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
 {
   /* If there are no caller-saved registers, we preserve all registers,
      except for MMX and x87 registers which aren't supported when saving
@@ -12698,6 +12702,10 @@ ix86_save_reg (unsigned int regno, bool maybe_eh_return)
 	}
     }
 
+  if (ignore_outlined && cfun->machine->call_ms2sysv
+      && in_hard_reg_set_p (stub_managed_regs, DImode, regno))
+    return false;
+
   if (crtl->drap_reg
       && regno == REGNO (crtl->drap_reg)
       && !cfun->machine->no_drap_save_restore)
@@ -12718,7 +12726,7 @@ ix86_nsaved_regs (void)
   int regno;
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
+    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
       nregs ++;
   return nregs;
 }
@@ -12734,7 +12742,7 @@ ix86_nsaved_sseregs (void)
   if (!TARGET_64BIT_MS_ABI)
     return 0;
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
       nregs ++;
   return nregs;
 }
@@ -12814,6 +12822,7 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
 
   frame->nregs = ix86_nsaved_regs ();
   frame->nsseregs = ix86_nsaved_sseregs ();
+  CLEAR_HARD_REG_SET (stub_managed_regs);
 
   /* 64-bit MS ABI seem to require stack alignment to be always 16,
      except for function prologues, leaf functions and when the defult
@@ -13207,7 +13216,7 @@ ix86_emit_save_regs (void)
   rtx_insn *insn;
 
   for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
-    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
+    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
       {
 	insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
 	RTX_FRAME_RELATED_P (insn) = 1;
@@ -13297,7 +13306,7 @@ ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
   unsigned int regno;
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
+    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
       {
         ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
 	cfa_offset -= UNITS_PER_WORD;
@@ -13312,7 +13321,7 @@ ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
   unsigned int regno;
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
       {
 	ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
 	cfa_offset -= GET_MODE_SIZE (V4SFmode);
@@ -13696,13 +13705,13 @@ get_scratch_register_on_entry (struct scratch_reg *sr)
 	       && !static_chain_p
 	       && drap_regno != CX_REG)
 	regno = CX_REG;
-      else if (ix86_save_reg (BX_REG, true))
+      else if (ix86_save_reg (BX_REG, true, false))
 	regno = BX_REG;
       /* esi is the static chain register.  */
       else if (!(regparm == 3 && static_chain_p)
-	       && ix86_save_reg (SI_REG, true))
+	       && ix86_save_reg (SI_REG, true, false))
 	regno = SI_REG;
-      else if (ix86_save_reg (DI_REG, true))
+      else if (ix86_save_reg (DI_REG, true, false))
 	regno = DI_REG;
       else
 	{
@@ -14812,7 +14821,7 @@ ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
   unsigned int regno;
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
+    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
       {
 	rtx reg = gen_rtx_REG (word_mode, regno);
 	rtx mem;
@@ -14851,7 +14860,7 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
   unsigned int regno;
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
+    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
       {
 	rtx reg = gen_rtx_REG (V4SFmode, regno);
 	rtx mem;
-- 
2.11.0

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCH 06/12] [i386] Add class xlogue_layout and new fields to struct machine_function
  2017-04-27  8:05 [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
                   ` (10 preceding siblings ...)
  2017-04-27  8:44 ` [PATCH 07/12] [i386] Modify ix86_save_reg to optionally omit stub-managed registers Daniel Santos
@ 2017-04-27  8:51 ` Daniel Santos
  2017-04-27 18:32 ` [PATCH v4 0/12 GCC8] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 41+ messages in thread
From: Daniel Santos @ 2017-04-27  8:51 UTC (permalink / raw)
  To: gcc-patches, Uros Bizjak, Jan Hubicka

Of the new fields added to struct machine_function, call_ms2sysv is
initially set in ix86_expand_call, but may later be cleared when
ix86_compute_frame_layout is called (both of these are in subsequent
patch).  If it is not cleared, then the remaining new fields will be
set in ix86_compute_frame_layout (also a subsequent patch).

The new class xlogue_layout manages the layout of the stack area used by
the out-of-line save & restore stubs as well as any padding needed
before and after the save area.  It also provides the proper symbol rtx
for the requested stub based upon values of the new fields in struct
machine_function, which specify how many registers are being saved, what
padding is needed, etc.

xlouge_layout cannot be used until stack realign flags are finalized and
ix86_compute_frame_layout is called, at which point
xlouge_layout::get_instance may be used to retrieve the appropriate
(constant) instance of xlouge_layout.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
 gcc/config/i386/i386.c | 262 +++++++++++++++++++++++++++++++++++++++++++++++++
 gcc/config/i386/i386.h |  18 ++++
 2 files changed, 280 insertions(+)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 521116195cb..2da3da1f97a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -93,6 +93,7 @@ static rtx legitimize_dllimport_symbol (rtx, bool);
 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
 static rtx legitimize_pe_coff_symbol (rtx, bool);
 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
+static bool ix86_save_reg (unsigned int, bool, bool);
 
 #ifndef CHECK_STACK_LIMIT
 #define CHECK_STACK_LIMIT (-1)
@@ -2432,6 +2433,267 @@ unsigned const x86_64_ms_sysv_extra_clobbered_registers[12] =
   XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
 };
 
+enum xlogue_stub {
+  XLOGUE_STUB_SAVE,
+  XLOGUE_STUB_RESTORE,
+  XLOGUE_STUB_RESTORE_TAIL,
+  XLOGUE_STUB_SAVE_HFP,
+  XLOGUE_STUB_RESTORE_HFP,
+  XLOGUE_STUB_RESTORE_HFP_TAIL,
+
+  XLOGUE_STUB_COUNT
+};
+
+enum xlogue_stub_sets {
+  XLOGUE_SET_ALIGNED,
+  XLOGUE_SET_ALIGNED_PLUS_8,
+  XLOGUE_SET_HFP_ALIGNED_OR_REALIGN,
+  XLOGUE_SET_HFP_ALIGNED_PLUS_8,
+
+  XLOGUE_SET_COUNT
+};
+
+/* Register save/restore layout used by out-of-line stubs.  */
+class xlogue_layout {
+public:
+  struct reginfo
+  {
+    unsigned regno;
+    HOST_WIDE_INT offset;	/* Offset used by stub base pointer (rax or
+				   rsi) to where each register is stored.  */
+  };
+
+  unsigned get_nregs () const			{return m_nregs;}
+  HOST_WIDE_INT get_stack_align_off_in () const	{return m_stack_align_off_in;}
+
+  const reginfo &get_reginfo (unsigned reg) const
+  {
+    gcc_assert (reg < m_nregs);
+    return m_regs[reg];
+  }
+
+  const char *get_stub_name (enum xlogue_stub stub,
+			     unsigned n_extra_args) const;
+  /* Returns an rtx for the stub's symbol based upon
+       1.) the specified stub (save, restore or restore_ret) and
+       2.) the value of cfun->machine->call_ms2sysv_extra_regs and
+       3.) rather or not stack alignment is being performed.  */
+  rtx get_stub_rtx (enum xlogue_stub stub) const;
+
+  /* Returns the amount of stack space (including padding) that the stub
+     needs to store registers based upon data in the machine_function.  */
+  HOST_WIDE_INT get_stack_space_used () const
+  {
+    const struct machine_function &m = *cfun->machine;
+    unsigned last_reg = m.call_ms2sysv_extra_regs + MIN_REGS - 1;
+
+    gcc_assert (m.call_ms2sysv_extra_regs <= MAX_EXTRA_REGS);
+    return m_regs[last_reg].offset
+	    + (m.call_ms2sysv_pad_out ? 8 : 0)
+	    + STUB_INDEX_OFFSET;
+  }
+
+  /* Returns the offset for the base pointer used by the stub.  */
+  HOST_WIDE_INT get_stub_ptr_offset () const
+  {
+    return STUB_INDEX_OFFSET + m_stack_align_off_in;
+  }
+
+  static const struct xlogue_layout &get_instance ();
+  static unsigned compute_stub_managed_regs (HARD_REG_SET &stub_managed_regs);
+
+  static const HOST_WIDE_INT STUB_INDEX_OFFSET = 0x70;
+  static const unsigned MIN_REGS = NUM_X86_64_MS_CLOBBERED_REGS;
+  static const unsigned MAX_REGS = 18;
+  static const unsigned MAX_EXTRA_REGS = MAX_REGS - MIN_REGS;
+  static const unsigned VARIANT_COUNT = MAX_EXTRA_REGS + 1;
+  static const unsigned STUB_NAME_MAX_LEN = 16;
+  static const char * const STUB_BASE_NAMES[XLOGUE_STUB_COUNT];
+  static const unsigned REG_ORDER[MAX_REGS];
+  static const unsigned REG_ORDER_REALIGN[MAX_REGS];
+
+private:
+  xlogue_layout ();
+  xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp);
+  xlogue_layout (const xlogue_layout &);
+
+  /* True if hard frame pointer is used.  */
+  bool m_hfp;
+
+  /* Max number of register this layout manages.  */
+  unsigned m_nregs;
+
+  /* Incoming offset from 16-byte alignment.  */
+  HOST_WIDE_INT m_stack_align_off_in;
+
+  /* Register order and offsets.  */
+  struct reginfo m_regs[MAX_REGS];
+
+  /* Lazy-inited cache of symbol names for stubs.  */
+  char m_stub_names[XLOGUE_STUB_COUNT][VARIANT_COUNT][STUB_NAME_MAX_LEN];
+
+  static const struct xlogue_layout GTY(()) s_instances[XLOGUE_SET_COUNT];
+};
+
+const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
+  "savms64",
+  "resms64",
+  "resms64x",
+  "savms64f",
+  "resms64f",
+  "resms64fx"
+};
+
+const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
+/* The below offset values are where each register is stored for the layout
+   relative to incoming stack pointer.  The value of each m_regs[].offset will
+   be relative to the incoming base pointer (rax or rsi) used by the stub.
+
+    s_instances:   0		1		2		3
+    Offset:					realigned or	aligned + 8
+    Register	   aligned	aligned + 8	aligned w/HFP	w/HFP	*/
+    XMM15_REG,	/* 0x10		0x18		0x10		0x18	*/
+    XMM14_REG,	/* 0x20		0x28		0x20		0x28	*/
+    XMM13_REG,	/* 0x30		0x38		0x30		0x38	*/
+    XMM12_REG,	/* 0x40		0x48		0x40		0x48	*/
+    XMM11_REG,	/* 0x50		0x58		0x50		0x58	*/
+    XMM10_REG,	/* 0x60		0x68		0x60		0x68	*/
+    XMM9_REG,	/* 0x70		0x78		0x70		0x78	*/
+    XMM8_REG,	/* 0x80		0x88		0x80		0x88	*/
+    XMM7_REG,	/* 0x90		0x98		0x90		0x98	*/
+    XMM6_REG,	/* 0xa0		0xa8		0xa0		0xa8	*/
+    SI_REG,	/* 0xa8		0xb0		0xa8		0xb0	*/
+    DI_REG,	/* 0xb0		0xb8		0xb0		0xb8	*/
+    BX_REG,	/* 0xb8		0xc0		0xb8		0xc0	*/
+    BP_REG,	/* 0xc0		0xc8		N/A		N/A	*/
+    R12_REG,	/* 0xc8		0xd0		0xc0		0xc8	*/
+    R13_REG,	/* 0xd0		0xd8		0xc8		0xd0	*/
+    R14_REG,	/* 0xd8		0xe0		0xd0		0xd8	*/
+    R15_REG,	/* 0xe0		0xe8		0xd8		0xe0	*/
+};
+
+/* Instantiates all xlogue_layout instances.  */
+const struct xlogue_layout GTY(())
+xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
+  xlogue_layout (0, false),
+  xlogue_layout (8, false),
+  xlogue_layout (0, true),
+  xlogue_layout (8, true)
+};
+
+/* Return an appropriate const instance of xlogue_layout based upon values
+   in cfun->machine and crtl.  */
+const struct xlogue_layout &xlogue_layout::get_instance ()
+{
+  enum xlogue_stub_sets stub_set;
+  bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
+
+  if (stack_realign_fp)
+    stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
+  else if (frame_pointer_needed)
+    stub_set = aligned_plus_8
+	      ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
+	      : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
+  else
+    stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
+
+  return s_instances[stub_set];
+}
+
+/* Determine which clobbered registers can be saved by the stub and store
+   them in stub_managed_regs.  Returns the count of registers the stub will
+   save and restore.  */
+unsigned
+xlogue_layout::compute_stub_managed_regs (HARD_REG_SET &stub_managed_regs)
+{
+  bool hfp = frame_pointer_needed || stack_realign_fp;
+
+  unsigned i, count;
+  unsigned regno;
+
+  for (i = 0; i < NUM_X86_64_MS_CLOBBERED_REGS; ++i)
+    {
+      regno = x86_64_ms_sysv_extra_clobbered_registers[i];
+      if (regno == BP_REG && hfp)
+	continue;
+      if (!ix86_save_reg (regno, false, false))
+	return 0;
+    }
+
+  for (count = i = 0; i < MAX_REGS; ++i)
+    {
+      regno = REG_ORDER[i];
+      if (regno == BP_REG && hfp)
+	continue;
+      if (!ix86_save_reg (regno, false, false))
+	break;
+      add_to_hard_reg_set (&stub_managed_regs, DImode, regno);
+      ++count;
+    }
+    gcc_assert (count >= MIN_REGS && count <= MAX_REGS);
+    return count;
+}
+
+/* Constructor for xlogue_layout.  */
+xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
+  : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
+    m_stack_align_off_in (stack_align_off_in)
+{
+  memset (m_regs, 0, sizeof (m_regs));
+  memset (m_stub_names, 0, sizeof (m_stub_names));
+
+  HOST_WIDE_INT offset = stack_align_off_in;
+  unsigned i, j;
+  for (i = j = 0; i < MAX_REGS; ++i)
+    {
+      unsigned regno = REG_ORDER[i];
+
+      if (regno == BP_REG && hfp)
+	continue;
+      if (SSE_REGNO_P (regno))
+	{
+	  offset += 16;
+	  /* Verify that SSE regs are always aligned.  */
+	  gcc_assert (!((stack_align_off_in + offset) & 15));
+	}
+      else
+	offset += 8;
+
+      m_regs[j].regno    = regno;
+      m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
+    }
+    gcc_assert (j == m_nregs);
+}
+
+const char *xlogue_layout::get_stub_name (enum xlogue_stub stub,
+					  unsigned n_extra_regs) const
+{
+  xlogue_layout *writey_this = const_cast<xlogue_layout*>(this);
+  char *name = writey_this->m_stub_names[stub][n_extra_regs];
+
+  /* Lazy init */
+  if (!*name)
+    {
+      int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%u",
+			  STUB_BASE_NAMES[stub], MIN_REGS + n_extra_regs);
+      gcc_checking_assert (res <= (int)STUB_NAME_MAX_LEN);
+    }
+
+  return name;
+}
+
+/* Return rtx of a symbol ref for the entry point (based upon
+   cfun->machine->call_ms2sysv_extra_regs) of the specified stub.  */
+rtx xlogue_layout::get_stub_rtx (enum xlogue_stub stub) const
+{
+  const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
+  gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
+  gcc_assert (stub < XLOGUE_STUB_COUNT);
+  gcc_assert (crtl->stack_realign_finalized);
+
+  return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
+}
+
 /* Define the structure for the machine field in struct function.  */
 
 struct GTY(()) stack_local_entry {
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 645b239a768..5366f1fc88f 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2578,6 +2578,24 @@ struct GTY(()) machine_function {
      pass arguments and can be used for indirect sibcall.  */
   BOOL_BITFIELD arg_reg_available : 1;
 
+  /* If true, we're out-of-lining reg save/restore for regs clobbered
+     by ms_abi functions calling a sysv function.  */
+  BOOL_BITFIELD call_ms2sysv : 1;
+
+  /* If true, the incoming 16-byte aligned stack has an offset (of 8) and
+     needs padding.  */
+  BOOL_BITFIELD call_ms2sysv_pad_in : 1;
+
+  /* If true, the size of the stub save area plus inline int reg saves will
+     result in an 8 byte offset, so needs padding.  */
+  BOOL_BITFIELD call_ms2sysv_pad_out : 1;
+
+  /* This is the number of extra registers saved by stub (valid range is
+     0-6). Each additional register is only saved/restored by the stubs
+     if all successive ones are. (Will always be zero when using a hard
+     frame pointer.) */
+  unsigned int call_ms2sysv_extra_regs:3;
+
   /* During prologue/epilogue generation, the current frame state.
      Otherwise, the frame state at the end of the prologue.  */
   struct machine_frame_state fs;
-- 
2.11.0

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH v4 0/12 GCC8] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-04-27  8:05 [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
                   ` (11 preceding siblings ...)
  2017-04-27  8:51 ` [PATCH 06/12] [i386] Add class xlogue_layout and new fields to struct machine_function Daniel Santos
@ 2017-04-27 18:32 ` Daniel Santos
  2017-05-01 11:31 ` [PATCH v4 0/12] " Uros Bizjak
  2017-05-13  0:01 ` [PING] " Daniel Santos
  14 siblings, 0 replies; 41+ messages in thread
From: Daniel Santos @ 2017-04-27 18:32 UTC (permalink / raw)
  To: gcc-patches

I probably should have mentioned that these are all for GCC8.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 05/12] [i386] Add option -mcall-ms2sysv-xlogues
  2017-04-27  8:05 ` [PATCH 05/12] [i386] Add option -mcall-ms2sysv-xlogues Daniel Santos
@ 2017-04-28  6:00   ` Sandra Loosemore
  2017-04-28  7:37     ` [PATCH 05/12 rev 1] " Daniel Santos
  0 siblings, 1 reply; 41+ messages in thread
From: Sandra Loosemore @ 2017-04-28  6:00 UTC (permalink / raw)
  To: Daniel Santos, gcc-patches, Uros Bizjak, Jan Hubicka,
	Gerald Pfeifer, Joseph Myers

On 04/27/2017 02:09 AM, Daniel Santos wrote:

> @@ -25308,6 +25308,17 @@ You can control this behavior for specific functions by
>   using the function attributes @code{ms_abi} and @code{sysv_abi}.
>   @xref{Function Attributes}.
>
> +@item -mcall-ms2sysv-xlogues
> +@opindex mcall-ms2sysv-xlogues
> +@opindex mno-call-ms2sysv-xlogues
> +Due to differences in 64-bit ABIs, any Microsoft ABI function that calls a
> +System V ABI function must consider RSI, RDI and XMM6-15 as clobbered.  By
> +default, the code for saving and restoring these registers is emitted inline,
> +resulting in fairly lengthy prologues and epilogues.  Using
> +@option{-mcall-ms2sysv-xlogues} emits prologues and epilogues that
> +use stubs in the static portion of libgcc to perform these saves & restores,

s/&/and/

The documentation part is OK with that change.

-Sandra

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 05/12 rev 1] [i386] Add option -mcall-ms2sysv-xlogues
  2017-04-28  6:00   ` Sandra Loosemore
@ 2017-04-28  7:37     ` Daniel Santos
  0 siblings, 0 replies; 41+ messages in thread
From: Daniel Santos @ 2017-04-28  7:37 UTC (permalink / raw)
  To: gcc-patches; +Cc: Sandra Loosemore, Uros Bizjak, Jan Hubicka

Oops.  I blame my fingers.  :)

Daniel
---
 gcc/config/i386/i386.c   |  6 +++++-
 gcc/config/i386/i386.opt |  4 ++++
 gcc/doc/invoke.texi      | 13 ++++++++++++-
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 113f83742c2..521116195cb 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -4508,7 +4508,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
     { "-mstv",				MASK_STV },
     { "-mavx256-split-unaligned-load",	MASK_AVX256_SPLIT_UNALIGNED_LOAD },
     { "-mavx256-split-unaligned-store",	MASK_AVX256_SPLIT_UNALIGNED_STORE },
-    { "-mprefer-avx128",		MASK_PREFER_AVX128 }
+    { "-mprefer-avx128",		MASK_PREFER_AVX128 },
+    { "-mcall-ms2sysv-xlogues",		MASK_CALL_MS2SYSV_XLOGUES }
   };
 
   /* Additional flag options.  */
@@ -6319,6 +6320,9 @@ ix86_option_override_internal (bool main_args_p,
 #endif
    }
 
+  if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES)
+    sorry ("-mcall-ms2sysv-xlogues isn%'t currently supported with SEH");
+
   if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
     opts->x_target_flags |= MASK_VZEROUPPER;
   if (!(opts_set->x_target_flags & MASK_STV))
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 9384e29b1de..65b228544a5 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -538,6 +538,10 @@ Enum(calling_abi) String(sysv) Value(SYSV_ABI)
 EnumValue
 Enum(calling_abi) String(ms) Value(MS_ABI)
 
+mcall-ms2sysv-xlogues
+Target Report Mask(CALL_MS2SYSV_XLOGUES) Save
+Use libgcc stubs to save and restore registers clobbered by 64-bit Microsoft to System V ABI calls.
+
 mveclibabi=
 Target RejectNegative Joined Var(ix86_veclibabi_type) Enum(ix86_veclibabi) Init(ix86_veclibabi_type_none)
 Vector library ABI to use.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 0eeea7b3b87..d9894f37ee5 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1209,7 +1209,7 @@ See RS/6000 and PowerPC Options.
 -msse2avx  -mfentry  -mrecord-mcount  -mnop-mcount  -m8bit-idiv @gol
 -mavx256-split-unaligned-load  -mavx256-split-unaligned-store @gol
 -malign-data=@var{type}  -mstack-protector-guard=@var{guard} @gol
--mmitigate-rop  -mgeneral-regs-only}
+-mmitigate-rop  -mgeneral-regs-only  -mcall-ms2sysv-xlogues}
 
 @emph{x86 Windows Options}
 @gccoptlist{-mconsole  -mcygwin  -mno-cygwin  -mdll @gol
@@ -25308,6 +25308,17 @@ You can control this behavior for specific functions by
 using the function attributes @code{ms_abi} and @code{sysv_abi}.
 @xref{Function Attributes}.
 
+@item -mcall-ms2sysv-xlogues
+@opindex mcall-ms2sysv-xlogues
+@opindex mno-call-ms2sysv-xlogues
+Due to differences in 64-bit ABIs, any Microsoft ABI function that calls a
+System V ABI function must consider RSI, RDI and XMM6-15 as clobbered.  By
+default, the code for saving and restoring these registers is emitted inline,
+resulting in fairly lengthy prologues and epilogues.  Using
+@option{-mcall-ms2sysv-xlogues} emits prologues and epilogues that
+use stubs in the static portion of libgcc to perform these saves and restores,
+thus reducing function size at the cost of a few extra instructions.
+
 @item -mtls-dialect=@var{type}
 @opindex mtls-dialect
 Generate code to access thread-local storage using the @samp{gnu} or
-- 
2.11.0

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 09/12] [i386] Add patterns and predicates foutline-msabi-xlouges
  2017-04-27  8:05 ` [PATCH 09/12] [i386] Add patterns and predicates foutline-msabi-xlouges Daniel Santos
@ 2017-05-01 11:18   ` Uros Bizjak
  2017-05-02 22:19     ` Daniel Santos
  2017-05-04 21:35   ` [PATCH 09/12 rev1] [i386] Add patterns and predicates mcall-ms2sysv-xlogues Daniel Santos
  1 sibling, 1 reply; 41+ messages in thread
From: Uros Bizjak @ 2017-05-01 11:18 UTC (permalink / raw)
  To: Daniel Santos; +Cc: gcc-patches, Jan Hubicka

On Thu, Apr 27, 2017 at 10:09 AM, Daniel Santos <daniel.santos@pobox.com> wrote:
> Adds the predicates save_multiple and restore_multiple to predicates.md,
> which are used by following patterns in sse.md:
>
> * save_multiple - insn that calls a save stub
> * restore_multiple - call_insn that calls a save stub and returns to the
>   function to allow a sibling call (which should typically offer better
>   optimization than the restore stub as the tail call)
> * restore_multiple_and_return - a jump_insn that returns from the
>   function as a tail-call.
> * restore_multiple_leave_return - like the above, but restores the frame
>   pointer before returning.
>
> Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
> ---
>  gcc/config/i386/predicates.md | 155 ++++++++++++++++++++++++++++++++++++++++++
>  gcc/config/i386/sse.md        |  37 ++++++++++
>  2 files changed, 192 insertions(+)
>
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index 8f250a2e720..36fe8abc3f4 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -1657,3 +1657,158 @@
>    (ior (match_operand 0 "register_operand")
>         (and (match_code "const_int")
>             (match_test "op == constm1_rtx"))))
> +
> +;; Return true if:
> +;; 1. first op is a symbol reference,
> +;; 2. >= 13 operands, and
> +;; 3. operands 2 to end is one of:
> +;;   a. save a register to a memory location, or
> +;;   b. restore stack pointer.
> +(define_predicate "save_multiple"
> +  (match_code "parallel")
> +{
> +  const unsigned nregs = XVECLEN (op, 0);
> +  rtx head = XVECEXP (op, 0, 0);
> +  unsigned i;
> +
> +  if (GET_CODE (head) != USE)
> +    return false;
> +  else
> +    {
> +      rtx op0 = XEXP (head, 0);
> +      if (op0 == NULL_RTX || GET_CODE (op0) != SYMBOL_REF)
> +       return false;
> +    }
> +
> +  if (nregs < 13)
> +    return false;
> +
> +  for (i = 2; i < nregs; i++)
> +    {
> +      rtx e, src, dest;
> +
> +      e = XVECEXP (op, 0, i);
> +
> +      switch (GET_CODE (e))
> +       {
> +         case SET:
> +           src  = SET_SRC (e);
> +           dest = SET_DEST (e);
> +
> +           /* storing a register to memory.  */
> +           if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)

Please use REG_P (...) and MEM_P (...) - and possible others -
predicates in the code.

> +             {
> +               rtx addr = XEXP (dest, 0);
> +
> +               /* Good if dest address is in RAX.  */
> +               if (GET_CODE (addr) == REG
> +                   && REGNO (addr) == AX_REG)
> +                 continue;
> +
> +               /* Good if dest address is offset of RAX.  */
> +               if (GET_CODE (addr) == PLUS
> +                   && GET_CODE (XEXP (addr, 0)) == REG
> +                   && REGNO (XEXP (addr, 0)) == AX_REG)
> +                 continue;
> +             }
> +           break;
> +
> +         default:
> +           break;
> +       }
> +       return false;
> +    }
> +  return true;
> +})
> +
> +;; Return true if:
> +;; * first op is (return) or a a use (symbol reference),
> +;; * >= 14 operands, and
> +;; * operands 2 to end are one of:
> +;;   - restoring a register from a memory location that's an offset of RSI.
> +;;   - clobbering a reg
> +;;   - adjusting SP
> +(define_predicate "restore_multiple"
> +  (match_code "parallel")
> +{
> +  const unsigned nregs = XVECLEN (op, 0);
> +  rtx head = XVECEXP (op, 0, 0);
> +  unsigned i;
> +
> +  switch (GET_CODE (head))
> +    {
> +      case RETURN:
> +       i = 3;
> +       break;
> +
> +      case USE:
> +      {
> +       rtx op0 = XEXP (head, 0);
> +
> +       if (op0 == NULL_RTX || GET_CODE (op0) != SYMBOL_REF)
> +         return false;
> +
> +       i = 1;
> +       break;
> +      }
> +
> +      default:
> +       return false;
> +    }
> +
> +  if (nregs < i + 12)
> +    return false;
> +
> +  for (; i < nregs; i++)
> +    {
> +      rtx e, src, dest;
> +
> +      e = XVECEXP (op, 0, i);
> +
> +      switch (GET_CODE (e))
> +       {
> +         case CLOBBER:
> +           continue;

I don't see where CLOBBER is genreated in ix86_emit_outlined_ms2sysv_restore.

> +
> +         case SET:
> +           src  = SET_SRC (e);
> +           dest = SET_DEST (e);
> +
> +           /* Restoring a register from memory.  */
> +           if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
> +             {
> +               rtx addr = XEXP (src, 0);
> +
> +               /* Good if src address is in RSI.  */
> +               if (GET_CODE (addr) == REG
> +                   && REGNO (addr) == SI_REG)
> +                 continue;
> +
> +               /* Good if src address is offset of RSI.  */
> +               if (GET_CODE (addr) == PLUS
> +                   && GET_CODE (XEXP (addr, 0)) == REG
> +                   && REGNO (XEXP (addr, 0)) == SI_REG)
> +                 continue;
> +
> +               /* Good if adjusting stack pointer.  */
> +               if (GET_CODE (dest) == REG
> +                   && REGNO (dest) == SP_REG
> +                   && GET_CODE (src) == PLUS
> +                   && GET_CODE (XEXP (src, 0)) == REG
> +                   && REGNO (XEXP (src, 0)) == SP_REG)
> +                 continue;
> +             }
> +
> +           /* Restoring stack pointer from another register.  */
> +           if (GET_CODE (dest) == REG && REGNO (dest) == SP_REG
> +               && GET_CODE (src) == REG)
> +             continue;
> +           break;
> +
> +         default:
> +           break;
> +       }
> +       return false;
> +    }
> +  return true;
> +})

I think that the above functions should check only if the function is
storing/restoring correct registers, all other RTXes should be
explicitly written in the insn patterns.

> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index e8ccb1e10c3..c9fe7274def 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -19997,3 +19997,40 @@
>            (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
>    "TARGET_AVX512VPOPCNTDQ"
>    "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
> +
> +;; Save multiple registers out-of-line.
> +(define_insn "save_multiple<mode>"
> +  [(match_parallel 0 "save_multiple"
> +    [(use (match_operand:P 1 "symbol_operand"))
> +     (const_int 0)
> +    ])]
> +  "TARGET_SSE && TARGET_64BIT"
> +  "call\t%P1")

You probably don't need a (const_int 0) tag. According to the
documentation, RTX matching guarantees subexpression match, so in the
predicate you should check only stores of registers (as suggested
above).

> +;; Restore multiple registers out-of-line.
> +(define_insn "restore_multiple<mode>"
> +  [(match_parallel 0 "restore_multiple"
> +    [(use (match_operand:P 1 "symbol_operand"))])]
> +  "TARGET_SSE && TARGET_64BIT"
> +  "call\t%P1")
> +
> +;; Restore multiple registers out-of-line and return.
> +(define_insn "restore_multiple_and_return<mode>"
> +  [(match_parallel 0 "restore_multiple"
> +    [(return)
> +     (use (match_operand:P 1 "symbol_operand"))
> +     (const_int 0)
> +    ])]
> +  "TARGET_SSE && TARGET_64BIT"
> +  "jmp\t%P1")

No need for (const_int 0) tag.

> +;; Restore multiple registers out-of-line when hard frame pointer is used,
> +;; perform the leave operation prior to returning (from the function).
> +(define_insn "restore_multiple_leave_return<mode>"
> +  [(match_parallel 0 "restore_multiple"
> +    [(return)
> +     (use (match_operand:P 1 "symbol_operand"))
> +     (const_int 1)
> +    ])]
> +  "TARGET_SSE && TARGET_64BIT"
> +  "jmp\t%P1")

You will have to write out all  sub-RTXes of the "leave" pattern,
including clobber.

I'd recommend that in the predicate, you check match_parallel from the
bottom up, since subexpressions on the top are already matched, and
you can have different number of subexpressions at the top.

Uros.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-04-27  8:05 [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
                   ` (12 preceding siblings ...)
  2017-04-27 18:32 ` [PATCH v4 0/12 GCC8] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
@ 2017-05-01 11:31 ` Uros Bizjak
  2017-05-02 10:25   ` JonY
  2017-05-13  0:01 ` [PING] " Daniel Santos
  14 siblings, 1 reply; 41+ messages in thread
From: Uros Bizjak @ 2017-05-01 11:31 UTC (permalink / raw)
  To: Daniel Santos; +Cc: gcc-patches, Jan Hubicka

On Thu, Apr 27, 2017 at 10:04 AM, Daniel Santos <daniel.santos@pobox.com> wrote:
> All of patches are concerned with 64-bit Microsoft ABI functions that call
> System V ABI function which clobbers RSI, RDI and XMM6-15 and are aimed at
> improving performance and .text size of Wine 64. I had previously submitted
> these as separate patch sets, but have combined them for simplicity. (Does
> this make the ChangeLogs too big? Please let me know if you want me to break
> these back apart.) Below are the included patchsets and a summary of changes
> since the previous post(s):

Well, the ChangeLog is acceptable.

I have comments on how new RTX patterns are generated and checked
(patches 9/12 and 11/12). Other patches look good to me, so after
issues with 9/12 and 11/12 are resolved, I think the patch set is
ready to go.

After the above issue is addressed, I propose to move forward by
committing the patchset, and resolve any possible issues later. There
are just too many code paths in the stack frame construction and
teardown to notice all possible interactions between new and old code.
It looks that existing code won't be affected without activating new
option, so we can be a bit less cautious with the patchset. An
important part is thus a comprehensive added test suite, which seems
to pass.

I also assume that Cygwin and MinGW people agree with the patch and
the functionality itself.

Uros.

> 1.) PR78962 Use aligned SSE movs for re-aligned MS ABI pro/epilogues.
> https://gcc.gnu.org/ml/gcc-patches/2016-12/msg01859.html
>
> Changes:
>
>  * The SEH unwind emit code (in winnt.c) does not currently support
>    CFA_REG_EXPRESSION, which is required to make this work, so I have
>    disabled it on SEH targets.
>  * Updated comments on CFA_REG_EXPRESSION in winnt.c.
>
>
> 2.) Add option to call out-of-line stubs instead of emitting inline saves
> and restores. https://gcc.gnu.org/ml/gcc-patches/2017-02/msg00548.html
>
> Changes:
>
>  * Renamed option from -moutline-msabi-xlogues to -mcall-ms2sysv-xlogues
>  * Since this patch set depends upon aligned SSE MOVs after stack
>    realignment, I have disabled it on SEH targets with a sorry().
>  * I was previously trying to cache the rtx for symbols to the libgcc
>    stubs instead of creating new ones, but this caused problems in
>    subsequent passes and it was disabled with a "TODO" comment. I have
>    removed this code, as well as the rtx cache that was just wasting
>    memory in class xlogue_layout.
>  * Improved comment documentation.
>
>
> 3.) A comprehensive test program to validate correct behavior in these pro-
> and epilogues. https://gcc.gnu.org/ml/gcc-patches/2017-02/msg00542.html
>
> Changes:
>
>  * The previous version repeated all tests for each -j<jobs> instead of
>    running in parallel. I have fixed this implementing a primitive but
>    effective file-based parallelization scheme.
>  * I noticed that there was gcc/testsuite/gcc.target/x86_64/abi
>    directory for tests specific to testing 64-bit abi issues, so I've
>    moved my tests to an "ms-sysv" subdirectory of that (instead of
>    gcc/testsuite/gcc.target/i386/msabi).
>  * Fixed breakages on Cygwin.
>  * Corrected a bad "_noinfo" optimization barrier (function call by
>    volatile pointer).
>  * Minor cleanup/improvements.
>
>
>  gcc/Makefile.in                                    |   2 +
>  gcc/config/i386/i386.c                             | 916
> +++++++++++++++++++--
>  gcc/config/i386/i386.h                             |  33 +-
>  gcc/config/i386/i386.opt                           |   4 +
>  gcc/config/i386/predicates.md                      | 155 ++++
>  gcc/config/i386/sse.md                             |  37 +
>  gcc/config/i386/winnt.c                            |   3 +-
>  gcc/doc/invoke.texi                                |  13 +-
>  .../gcc.target/x86_64/abi/ms-sysv/do-test.S        | 163 ++++
>  gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/gen.cc | 807 ++++++++++++++++++
>  .../gcc.target/x86_64/abi/ms-sysv/ms-sysv.c        | 373 +++++++++
>  .../gcc.target/x86_64/abi/ms-sysv/ms-sysv.exp      | 178 ++++
>  libgcc/config.host                                 |   2 +-
>  libgcc/config/i386/i386-asm.h                      |  82 ++
>  libgcc/config/i386/resms64.S                       |  57 ++
>  libgcc/config/i386/resms64f.S                      |  55 ++
>  libgcc/config/i386/resms64fx.S                     |  57 ++
>  libgcc/config/i386/resms64x.S                      |  59 ++
>  libgcc/config/i386/savms64.S                       |  57 ++
>  libgcc/config/i386/savms64f.S                      |  55 ++
>  libgcc/config/i386/t-msabi                         |   7 +
>  21 files changed, 3020 insertions(+), 95 deletions(-)
>
>
> gcc/ChangeLog:
>
> 2017-04-25  Daniel Santos<daniel.santos@pobox.com>
>
>         * config/i386/i386.opt: Add option -mcall-ms2sysv-xlogues.
>         * config/i386/i386.h
>         (x86_64_ms_sysv_extra_clobbered_registers): Change type to unsigned.
>         (NUM_X86_64_MS_CLOBBERED_REGS): New macro.
>         (struct machine_function): Add new members call_ms2sysv,
>         call_ms2sysv_pad_in, call_ms2sysv_pad_out and
> call_ms2sysv_extra_regs.
>         (struct machine_frame_state): New fields sp_realigned and
>         sp_realigned_offset.
>         * config/i386/i386.c
>         (enum xlogue_stub): New enum.
>         (enum xlogue_stub_sets): New enum.
>         (class xlogue_layout): New class.
>         (struct ix86_frame): New fields stack_realign_allocate_offset,
>         stack_realign_offset and outlined_save_offset.  Modify comments to
>         detail stack layout when using out-of-line stubs.
>         (ix86_target_string): Add -mcall-ms2sysv-xlogues option.
>         (ix86_option_override_internal): Add sorry() for TARGET_SEH and
>         -mcall-ms2sysv-xlogues.
>         (stub_managed_regs): New static variable.
>         (ix86_save_reg): Add new parameter ignore_outlined to optionally
> omit
>         registers managed by out-of-line stub.
>         (disable_call_ms2sysv_xlogues): New function.
>         (ix86_compute_frame_layout): Modify re-alignment calculations,
> disable
>         m->call_ms2sysv when appropriate and compute frame layout for
>         out-of-line stubs.
>         (sp_valid_at, fp_valid_at): New inline functions.
>         (choose_basereg): New function.
>         (choose_baseaddr): Add align parameter, use choose_basereg and
> modify
>         all callers.
>         (ix86_emit_save_reg_using_mov,
> ix86_emit_restore_sse_regs_using_mov):
>         Use align parameter of choose_baseaddr to generated aligned SSE movs
>         when possible.
>         (pro_epilogue_adjust_stack): Modify to track
>         machine_frame_state::sp_realigned.
>         (ix86_nsaved_regs): Modify to accommodate changes to ix86_save_reg.
>         (ix86_nsaved_sseregs): Likewise.
>         (ix86_emit_save_regs): Likewise.
>         (ix86_emit_save_regs_using_mov): Likewise.
>         (ix86_emit_save_sse_regs_using_mov): Likewise.
>         (get_scratch_register_on_entry): Likewise.
>         (gen_frame_set): New function.
>         (gen_frame_load): Likewise.
>         (gen_frame_store): Likewise.
>         (emit_outlined_ms2sysv_save): Likewise.
>         (emit_outlined_ms2sysv_restore): Likewise.
>         (ix86_expand_prologue): Modify stack re-alignment code and call
>         emit_outlined_ms2sysv_save when appropriate.
>         (ix86_emit_leave): Clear machine_frame_state::sp_realigned.  Add
>         parameter rtx_insn *insn, which allows the function to be used to
> only
>         generate the notes.
>         (ix86_expand_epilogue): Modify validity checks of frame and stack
>         pointers, and call emit_outlined_ms2sysv_restore when appropriate.
>         (ix86_expand_call): Modify to enable m->call_ms2sysv when
> appropriate.
>         * config/i386/predicates.md
>         (save_multiple): New predicate.
>         (restore_multiple): Likewise.
>         * config/i386/sse.md
>         (save_multiple<mode>): New pattern.
>         (save_multiple_realign<mode>): Likewise.
>         (restore_multiple<mode>): Likewise.
>         (restore_multiple_and_return<mode>): Likewise.
>         (restore_multiple_leave_return<mode>): Likewise.
>         * Makefile.in: Export HOSTCXX and HOSTCXXFLAGS to site.exp
>
> gcc/testsuite/ChangeLog:
>
> 2017-04-25  Daniel Santos<daniel.santos@pobox.com>
>
>         * config.host: Add i386/t-msabi to i386/t-linux file list.
>         * config/i386/i386-asm.h: New file.
>         * config/i386/resms64.S: New file.
>         * config/i386/resms64f.S: New file.
>         * config/i386/resms64fx.S: New file.
>         * config/i386/resms64x.S: New file.
>         * config/i386/savms64.S: New file.
>         * config/i386/savms64f.S: New file.
>         * config/i386/t-msabi: New file.
>
> libgcc/ChangeLog:
>
> 2017-04-25  Daniel Santos<daniel.santos@pobox.com>
>
>         * gcc.target/x86_64/abi/ms-sysv/do-test.S: New file.
>         * gcc.target/x86_64/abi/ms-sysv/gen.cc: Likewise.
>         * gcc.target/x86_64/abi/ms-sysv/ms-sysv.c: Likewise.
>         * gcc.target/x86_64/abi/ms-sysv/ms-sysv.exp: Likewise.
>

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-05-01 11:31 ` [PATCH v4 0/12] " Uros Bizjak
@ 2017-05-02 10:25   ` JonY
  2017-05-02 10:45     ` Kai Tietz
  2017-05-03  4:32     ` Daniel Santos
  0 siblings, 2 replies; 41+ messages in thread
From: JonY @ 2017-05-02 10:25 UTC (permalink / raw)
  To: Uros Bizjak, Daniel Santos; +Cc: gcc-patches, Jan Hubicka


[-- Attachment #1.1: Type: text/plain, Size: 1648 bytes --]

On 05/01/2017 11:31 AM, Uros Bizjak wrote:
> On Thu, Apr 27, 2017 at 10:04 AM, Daniel Santos <daniel.santos@pobox.com> wrote:
>> All of patches are concerned with 64-bit Microsoft ABI functions that call
>> System V ABI function which clobbers RSI, RDI and XMM6-15 and are aimed at
>> improving performance and .text size of Wine 64. I had previously submitted
>> these as separate patch sets, but have combined them for simplicity. (Does
>> this make the ChangeLogs too big? Please let me know if you want me to break
>> these back apart.) Below are the included patchsets and a summary of changes
>> since the previous post(s):
> 
> Well, the ChangeLog is acceptable.
> 
> I have comments on how new RTX patterns are generated and checked
> (patches 9/12 and 11/12). Other patches look good to me, so after
> issues with 9/12 and 11/12 are resolved, I think the patch set is
> ready to go.
> 
> After the above issue is addressed, I propose to move forward by
> committing the patchset, and resolve any possible issues later. There
> are just too many code paths in the stack frame construction and
> teardown to notice all possible interactions between new and old code.
> It looks that existing code won't be affected without activating new
> option, so we can be a bit less cautious with the patchset. An
> important part is thus a comprehensive added test suite, which seems
> to pass.
> 
> I also assume that Cygwin and MinGW people agree with the patch and
> the functionality itself.
> 
> Uros.
> 

Cygwin and MinGW does not use SysV/MS transitions directly in their own
code, changes should be OK.




[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 858 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-05-02 10:25   ` JonY
@ 2017-05-02 10:45     ` Kai Tietz
  2017-05-03  6:01       ` Daniel Santos
  2017-05-05  9:05       ` Daniel Santos
  2017-05-03  4:32     ` Daniel Santos
  1 sibling, 2 replies; 41+ messages in thread
From: Kai Tietz @ 2017-05-02 10:45 UTC (permalink / raw)
  To: JonY; +Cc: Uros Bizjak, Daniel Santos, gcc-patches, Jan Hubicka

2017-05-02 12:21 GMT+02:00 JonY <10walls@gmail.com>:
> On 05/01/2017 11:31 AM, Uros Bizjak wrote:
>> On Thu, Apr 27, 2017 at 10:04 AM, Daniel Santos <daniel.santos@pobox.com> wrote:
>>> All of patches are concerned with 64-bit Microsoft ABI functions that call
>>> System V ABI function which clobbers RSI, RDI and XMM6-15 and are aimed at
>>> improving performance and .text size of Wine 64. I had previously submitted
>>> these as separate patch sets, but have combined them for simplicity. (Does
>>> this make the ChangeLogs too big? Please let me know if you want me to break
>>> these back apart.) Below are the included patchsets and a summary of changes
>>> since the previous post(s):
>>
>> Well, the ChangeLog is acceptable.
>>
>> I have comments on how new RTX patterns are generated and checked
>> (patches 9/12 and 11/12). Other patches look good to me, so after
>> issues with 9/12 and 11/12 are resolved, I think the patch set is
>> ready to go.
>>
>> After the above issue is addressed, I propose to move forward by
>> committing the patchset, and resolve any possible issues later. There
>> are just too many code paths in the stack frame construction and
>> teardown to notice all possible interactions between new and old code.
>> It looks that existing code won't be affected without activating new
>> option, so we can be a bit less cautious with the patchset. An
>> important part is thus a comprehensive added test suite, which seems
>> to pass.
>>
>> I also assume that Cygwin and MinGW people agree with the patch and
>> the functionality itself.
>>
>> Uros.
>>
>
> Cygwin and MinGW does not use SysV/MS transitions directly in their own
> code, changes should be OK.
>
>
>

Right, and Wine people will tell, if something doesn't work for them.
So ok for me too.

Kai

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 09/12] [i386] Add patterns and predicates foutline-msabi-xlouges
  2017-05-01 11:18   ` Uros Bizjak
@ 2017-05-02 22:19     ` Daniel Santos
  2017-05-03  6:17       ` Uros Bizjak
  0 siblings, 1 reply; 41+ messages in thread
From: Daniel Santos @ 2017-05-02 22:19 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches, Jan Hubicka

Thank you for the review.

On 05/01/2017 06:18 AM, Uros Bizjak wrote:
> On Thu, Apr 27, 2017 at 10:09 AM, Daniel Santos <daniel.santos@pobox.com> wrote:
>> Adds the predicates save_multiple and restore_multiple to predicates.md,
>> which are used by following patterns in sse.md:
>>
>> * save_multiple - insn that calls a save stub
>> * restore_multiple - call_insn that calls a save stub and returns to the
>>    function to allow a sibling call (which should typically offer better
>>    optimization than the restore stub as the tail call)
>> * restore_multiple_and_return - a jump_insn that returns from the
>>    function as a tail-call.
>> * restore_multiple_leave_return - like the above, but restores the frame
>>    pointer before returning.
>>
>> Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
>> ---
>>   gcc/config/i386/predicates.md | 155 ++++++++++++++++++++++++++++++++++++++++++
>>   gcc/config/i386/sse.md        |  37 ++++++++++
>>   2 files changed, 192 insertions(+)
>>
>> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
>> index 8f250a2e720..36fe8abc3f4 100644
>> --- a/gcc/config/i386/predicates.md
>> +++ b/gcc/config/i386/predicates.md
>> @@ -1657,3 +1657,158 @@
>>     (ior (match_operand 0 "register_operand")
>>          (and (match_code "const_int")
>>              (match_test "op == constm1_rtx"))))
>> +
>> +;; Return true if:
>> +;; 1. first op is a symbol reference,
>> +;; 2. >= 13 operands, and
>> +;; 3. operands 2 to end is one of:
>> +;;   a. save a register to a memory location, or
>> +;;   b. restore stack pointer.
>> +(define_predicate "save_multiple"
>> +  (match_code "parallel")
>> +{
>> +  const unsigned nregs = XVECLEN (op, 0);
>> +  rtx head = XVECEXP (op, 0, 0);
>> +  unsigned i;
>> +
>> +  if (GET_CODE (head) != USE)
>> +    return false;
>> +  else
>> +    {
>> +      rtx op0 = XEXP (head, 0);
>> +      if (op0 == NULL_RTX || GET_CODE (op0) != SYMBOL_REF)
>> +       return false;
>> +    }
>> +
>> +  if (nregs < 13)
>> +    return false;
>> +
>> +  for (i = 2; i < nregs; i++)
>> +    {
>> +      rtx e, src, dest;
>> +
>> +      e = XVECEXP (op, 0, i);
>> +
>> +      switch (GET_CODE (e))
>> +       {
>> +         case SET:
>> +           src  = SET_SRC (e);
>> +           dest = SET_DEST (e);
>> +
>> +           /* storing a register to memory.  */
>> +           if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
> Please use REG_P (...) and MEM_P (...) - and possible others -
> predicates in the code.
>
>> +             {
>> +               rtx addr = XEXP (dest, 0);
>> +
>> +               /* Good if dest address is in RAX.  */
>> +               if (GET_CODE (addr) == REG
>> +                   && REGNO (addr) == AX_REG)
>> +                 continue;
>> +
>> +               /* Good if dest address is offset of RAX.  */
>> +               if (GET_CODE (addr) == PLUS
>> +                   && GET_CODE (XEXP (addr, 0)) == REG
>> +                   && REGNO (XEXP (addr, 0)) == AX_REG)
>> +                 continue;
>> +             }
>> +           break;
>> +
>> +         default:
>> +           break;
>> +       }
>> +       return false;
>> +    }
>> +  return true;
>> +})
>> +
>> +;; Return true if:
>> +;; * first op is (return) or a a use (symbol reference),
>> +;; * >= 14 operands, and
>> +;; * operands 2 to end are one of:
>> +;;   - restoring a register from a memory location that's an offset of RSI.
>> +;;   - clobbering a reg
>> +;;   - adjusting SP
>> +(define_predicate "restore_multiple"
>> +  (match_code "parallel")
>> +{
>> +  const unsigned nregs = XVECLEN (op, 0);
>> +  rtx head = XVECEXP (op, 0, 0);
>> +  unsigned i;
>> +
>> +  switch (GET_CODE (head))
>> +    {
>> +      case RETURN:
>> +       i = 3;
>> +       break;
>> +
>> +      case USE:
>> +      {
>> +       rtx op0 = XEXP (head, 0);
>> +
>> +       if (op0 == NULL_RTX || GET_CODE (op0) != SYMBOL_REF)
>> +         return false;
>> +
>> +       i = 1;
>> +       break;
>> +      }
>> +
>> +      default:
>> +       return false;
>> +    }
>> +
>> +  if (nregs < i + 12)
>> +    return false;
>> +
>> +  for (; i < nregs; i++)
>> +    {
>> +      rtx e, src, dest;
>> +
>> +      e = XVECEXP (op, 0, i);
>> +
>> +      switch (GET_CODE (e))
>> +       {
>> +         case CLOBBER:
>> +           continue;
> I don't see where CLOBBER is genreated in ix86_emit_outlined_ms2sysv_restore.

I think this is clutter that I didn't remove after changing the stubs.

>> +
>> +         case SET:
>> +           src  = SET_SRC (e);
>> +           dest = SET_DEST (e);
>> +
>> +           /* Restoring a register from memory.  */
>> +           if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
>> +             {
>> +               rtx addr = XEXP (src, 0);
>> +
>> +               /* Good if src address is in RSI.  */
>> +               if (GET_CODE (addr) == REG
>> +                   && REGNO (addr) == SI_REG)
>> +                 continue;
>> +
>> +               /* Good if src address is offset of RSI.  */
>> +               if (GET_CODE (addr) == PLUS
>> +                   && GET_CODE (XEXP (addr, 0)) == REG
>> +                   && REGNO (XEXP (addr, 0)) == SI_REG)
>> +                 continue;
>> +
>> +               /* Good if adjusting stack pointer.  */
>> +               if (GET_CODE (dest) == REG
>> +                   && REGNO (dest) == SP_REG
>> +                   && GET_CODE (src) == PLUS
>> +                   && GET_CODE (XEXP (src, 0)) == REG
>> +                   && REGNO (XEXP (src, 0)) == SP_REG)
>> +                 continue;
>> +             }
>> +
>> +           /* Restoring stack pointer from another register.  */
>> +           if (GET_CODE (dest) == REG && REGNO (dest) == SP_REG
>> +               && GET_CODE (src) == REG)
>> +             continue;
>> +           break;
>> +
>> +         default:
>> +           break;
>> +       }
>> +       return false;
>> +    }
>> +  return true;
>> +})
> I think that the above functions should check only if the function is
> storing/restoring correct registers, all other RTXes should be
> explicitly written in the insn patterns.
>
>> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
>> index e8ccb1e10c3..c9fe7274def 100644
>> --- a/gcc/config/i386/sse.md
>> +++ b/gcc/config/i386/sse.md
>> @@ -19997,3 +19997,40 @@
>>             (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
>>     "TARGET_AVX512VPOPCNTDQ"
>>     "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
>> +
>> +;; Save multiple registers out-of-line.
>> +(define_insn "save_multiple<mode>"
>> +  [(match_parallel 0 "save_multiple"
>> +    [(use (match_operand:P 1 "symbol_operand"))
>> +     (const_int 0)
>> +    ])]
>> +  "TARGET_SSE && TARGET_64BIT"
>> +  "call\t%P1")
> You probably don't need a (const_int 0) tag. According to the
> documentation, RTX matching guarantees subexpression match, so in the
> predicate you should check only stores of registers (as suggested
> above).
>
>> +;; Restore multiple registers out-of-line.
>> +(define_insn "restore_multiple<mode>"
>> +  [(match_parallel 0 "restore_multiple"
>> +    [(use (match_operand:P 1 "symbol_operand"))])]
>> +  "TARGET_SSE && TARGET_64BIT"
>> +  "call\t%P1")
>> +
>> +;; Restore multiple registers out-of-line and return.
>> +(define_insn "restore_multiple_and_return<mode>"
>> +  [(match_parallel 0 "restore_multiple"
>> +    [(return)
>> +     (use (match_operand:P 1 "symbol_operand"))
>> +     (const_int 0)
>> +    ])]
>> +  "TARGET_SSE && TARGET_64BIT"
>> +  "jmp\t%P1")
> No need for (const_int 0) tag.
>
>> +;; Restore multiple registers out-of-line when hard frame pointer is used,
>> +;; perform the leave operation prior to returning (from the function).
>> +(define_insn "restore_multiple_leave_return<mode>"
>> +  [(match_parallel 0 "restore_multiple"
>> +    [(return)
>> +     (use (match_operand:P 1 "symbol_operand"))
>> +     (const_int 1)
>> +    ])]
>> +  "TARGET_SSE && TARGET_64BIT"
>> +  "jmp\t%P1")
> You will have to write out all  sub-RTXes of the "leave" pattern,
> including clobber.
>
> I'd recommend that in the predicate, you check match_parallel from the
> bottom up, since subexpressions on the top are already matched, and
> you can have different number of subexpressions at the top.
>
> Uros.

OK, this all makes sense then. I was using const_int tags to 
differentiate the insns, but omitting the insns for the leave (for 
example) are what ambiguates them in the first place.

So one question I never had resolved is rather or not the order the 
insns in a parallel matters.  From your suggestions, I'm guessing that 
it would be perfectly OK for the leave insns to follow use symbol and 
then have the register restores after that, even though that wouldn't 
make sense from an assembly level because we would be writing beyond the 
stack pointer.  Am I reading this correctly?  If so, then putting the 
register save/restore matching in the predicate (checking top down) and 
having all others in the pattern make great sense and should both 
simplify it and make it more clear.

Also, I'm wondering if there's anything wrong with calling 
ix86_gen_leave () and plucking the insns out of the generated parallel 
insn and moving that into my own parallel rather than generating them in 
my own function.  I guess all the matters is what is cleanest.

Thanks,
Daniel


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-05-02 10:25   ` JonY
  2017-05-02 10:45     ` Kai Tietz
@ 2017-05-03  4:32     ` Daniel Santos
  1 sibling, 0 replies; 41+ messages in thread
From: Daniel Santos @ 2017-05-03  4:32 UTC (permalink / raw)
  To: JonY, Uros Bizjak; +Cc: gcc-patches, Jan Hubicka, Kai Tietz

On 05/02/2017 05:21 AM, JonY wrote:
> On 05/01/2017 11:31 AM, Uros Bizjak wrote:
>
>> I also assume that Cygwin and MinGW people agree with the patch and
>> the functionality itself.
>>
>> Uros.
>>
> Cygwin and MinGW does not use SysV/MS transitions directly in their own
> code, changes should be OK.

And to be clear, this did initially have a failed gcc test on Cygwin due 
to the aligned SSE MOVs portion of the patch set (this is the first 
three patches) and this is resolved by disabling that feature on SEH 
targets.  This is the last two lines in the below chunk from 3/12:

> @@ -14080,11 +14102,19 @@ ix86_expand_prologue (void)
>   					GEN_INT (-align_bytes)));
>   
>         /* For the purposes of register save area addressing, the stack
> -         pointer is no longer valid.  As for the value of sp_offset,
> -	 see ix86_compute_frame_layout, which we need to match in order
> -	 to pass verification of stack_pointer_offset at the end.  */
> +	 pointer can no longer be used to access anything in the frame
> +	 below m->fs.sp_realigned_offset and the frame pointer cannot be
> +	 used for anything at or above.  */
>         m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
> -      m->fs.sp_valid = false;
> +      m->fs.sp_realigned = true;
> +      m->fs.sp_realigned_offset = m->fs.sp_offset - frame.nsseregs * 16;
> +      gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
> +      /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
> +	 is needed to describe where a register is saved using a realigned
> +	 stack pointer, so we need to invalidate the stack pointer for that
> +	 target.  */
> +      if (TARGET_SEH)
> +	m->fs.sp_valid = false;
>       }
>   
>     allocate = frame.stack_pointer_offset - m->fs.sp_offset;

Still I have run complete tests on Cygwin with 32- and 64-bit using both 
the Cygwin and MinGW compilers.

Daniel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-05-02 10:45     ` Kai Tietz
@ 2017-05-03  6:01       ` Daniel Santos
  2017-05-05  9:05       ` Daniel Santos
  1 sibling, 0 replies; 41+ messages in thread
From: Daniel Santos @ 2017-05-03  6:01 UTC (permalink / raw)
  To: Kai Tietz, JonY; +Cc: Uros Bizjak, gcc-patches, Jan Hubicka

On 05/02/2017 05:40 AM, Kai Tietz wrote:
> Right, and Wine people will tell, if something doesn't work for them.
> So ok for me too.
>
> Kai

Yes, and I although I haven't repeated the Wine tests in a few months, 
little has changed since my last run.  I'll be running them again soon 
anyway.

Daniel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 09/12] [i386] Add patterns and predicates foutline-msabi-xlouges
  2017-05-02 22:19     ` Daniel Santos
@ 2017-05-03  6:17       ` Uros Bizjak
  2017-05-03  7:38         ` Daniel Santos
  0 siblings, 1 reply; 41+ messages in thread
From: Uros Bizjak @ 2017-05-03  6:17 UTC (permalink / raw)
  To: Daniel Santos; +Cc: gcc-patches, Jan Hubicka

On Wed, May 3, 2017 at 12:16 AM, Daniel Santos <daniel.santos@pobox.com> wrote:

>> I'd recommend that in the predicate, you check match_parallel from the
>> bottom up, since subexpressions on the top are already matched, and
>> you can have different number of subexpressions at the top.
>>
>> Uros.
>
>
> OK, this all makes sense then. I was using const_int tags to differentiate
> the insns, but omitting the insns for the leave (for example) are what
> ambiguates them in the first place.
>
> So one question I never had resolved is rather or not the order the insns in
> a parallel matters.  From your suggestions, I'm guessing that it would be
> perfectly OK for the leave insns to follow use symbol and then have the
> register restores after that, even though that wouldn't make sense from an
> assembly level because we would be writing beyond the stack pointer.  Am I
> reading this correctly?  If so, then putting the register save/restore
> matching in the predicate (checking top down) and having all others in the
> pattern make great sense and should both simplify it and make it more clear.

The order of subexpressions of parallel in general does not matter.

> Also, I'm wondering if there's anything wrong with calling ix86_gen_leave ()
> and plucking the insns out of the generated parallel insn and moving that
> into my own parallel rather than generating them in my own function.  I
> guess all the matters is what is cleanest.

Hm... I'd rather see subexpressions generated "by hand".

Uros.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 09/12] [i386] Add patterns and predicates foutline-msabi-xlouges
  2017-05-03  6:17       ` Uros Bizjak
@ 2017-05-03  7:38         ` Daniel Santos
  2017-05-03  8:38           ` Uros Bizjak
  0 siblings, 1 reply; 41+ messages in thread
From: Daniel Santos @ 2017-05-03  7:38 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches, Jan Hubicka

On 05/03/2017 01:10 AM, Uros Bizjak wrote:
> The order of subexpressions of parallel in general does not matter.

Thanks, this makes things much clearer.

>> Also, I'm wondering if there's anything wrong with calling ix86_gen_leave ()
>> and plucking the insns out of the generated parallel insn and moving that
>> into my own parallel rather than generating them in my own function.  I
>> guess all the matters is what is cleanest.
> Hm... I'd rather see subexpressions generated "by hand".

OK.  While we're on the topic, are you OK with my changes to 
ix86_emit_leave to generate the notes or would you prefer those by hand 
as well?

Also, are these predicates what you had in mind?  (I haven't actually 
tested them just yet.)

(define_predicate "save_multiple"
   (match_code "parallel")
{
   const unsigned len = XVECLEN (op, 0);
   unsigned i;

   /* Starting from end of vector, count register saves.  */
   for (i = 0; i < len; ++i)
     {
       rtx src, dest, addr;
       rtx e = XVECEXP (op, 0, len - 1 - i);

       if (GET_CODE (e) != SET)
         break;

       src  = SET_SRC (e);
       dest = SET_DEST (e);

       if (!REG_P (src) || !MEM_P (dest))
         break;

       addr = XEXP (dest, 0);

       /* Good if dest address is in RAX.  */
       if (REG_P (addr) && REGNO (addr) == AX_REG)
         continue;

       /* Good if dest address is offset of RAX.  */
       if (GET_CODE (addr) == PLUS
           && REG_P (XEXP (addr, 0))
           && REGNO (XEXP (addr, 0)) == AX_REG)
         continue;

       break;
     }
   return (i >= 12 && i <= 18);
})


(define_predicate "restore_multiple"
   (match_code "parallel")
{
   const unsigned len = XVECLEN (op, 0);
   unsigned i;

   /* Starting from end of vector, count register restores.  */
   for (i = 0; i < len; ++i)
     {
       rtx src, dest, addr;
       rtx e = XVECEXP (op, 0, len - 1 - i);

       if (GET_CODE (e) != SET)
         break;

       src  = SET_SRC (e);
       dest = SET_DEST (e);

       if (!MEM_P (src) || !REG_P (dest))
         break;

       addr = XEXP (src, 0);

       /* Good if src address is in RSI.  */
       if (REG_P (addr) && REGNO (addr) == SI_REG)
         continue;

       /* Good if src address is offset of RSI.  */
       if (GET_CODE (addr) == PLUS
           && REG_P (XEXP (addr, 0))
           && REGNO (XEXP (addr, 0)) == SI_REG)
         continue;

       break;
     }
   return (i >= 12 && i <= 18);
})


Thanks,
Daniel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 09/12] [i386] Add patterns and predicates foutline-msabi-xlouges
  2017-05-03  7:38         ` Daniel Santos
@ 2017-05-03  8:38           ` Uros Bizjak
  0 siblings, 0 replies; 41+ messages in thread
From: Uros Bizjak @ 2017-05-03  8:38 UTC (permalink / raw)
  To: Daniel Santos; +Cc: gcc-patches, Jan Hubicka

On Wed, May 3, 2017 at 9:38 AM, Daniel Santos <daniel.santos@pobox.com> wrote:
> On 05/03/2017 01:10 AM, Uros Bizjak wrote:
>>
>> The order of subexpressions of parallel in general does not matter.
>
>
> Thanks, this makes things much clearer.
>
>>> Also, I'm wondering if there's anything wrong with calling ix86_gen_leave
>>> ()
>>> and plucking the insns out of the generated parallel insn and moving that
>>> into my own parallel rather than generating them in my own function.  I
>>> guess all the matters is what is cleanest.
>>
>> Hm... I'd rather see subexpressions generated "by hand".
>
>
> OK.  While we're on the topic, are you OK with my changes to ix86_emit_leave
> to generate the notes or would you prefer those by hand as well?

I think they are OK. We are effectively emitting a leave here.

> Also, are these predicates what you had in mind?  (I haven't actually tested
> them just yet.)

Yes, these look good to me.

Uros.

> (define_predicate "save_multiple"
>   (match_code "parallel")
> {
>   const unsigned len = XVECLEN (op, 0);
>   unsigned i;
>
>   /* Starting from end of vector, count register saves.  */
>   for (i = 0; i < len; ++i)
>     {
>       rtx src, dest, addr;
>       rtx e = XVECEXP (op, 0, len - 1 - i);
>
>       if (GET_CODE (e) != SET)
>         break;
>
>       src  = SET_SRC (e);
>       dest = SET_DEST (e);
>
>       if (!REG_P (src) || !MEM_P (dest))
>         break;
>
>       addr = XEXP (dest, 0);
>
>       /* Good if dest address is in RAX.  */
>       if (REG_P (addr) && REGNO (addr) == AX_REG)
>         continue;
>
>       /* Good if dest address is offset of RAX.  */
>       if (GET_CODE (addr) == PLUS
>           && REG_P (XEXP (addr, 0))
>           && REGNO (XEXP (addr, 0)) == AX_REG)
>         continue;
>
>       break;
>     }
>   return (i >= 12 && i <= 18);
> })
>
>
> (define_predicate "restore_multiple"
>   (match_code "parallel")
> {
>   const unsigned len = XVECLEN (op, 0);
>   unsigned i;
>
>   /* Starting from end of vector, count register restores.  */
>   for (i = 0; i < len; ++i)
>     {
>       rtx src, dest, addr;
>       rtx e = XVECEXP (op, 0, len - 1 - i);
>
>       if (GET_CODE (e) != SET)
>         break;
>
>       src  = SET_SRC (e);
>       dest = SET_DEST (e);
>
>       if (!MEM_P (src) || !REG_P (dest))
>         break;
>
>       addr = XEXP (src, 0);
>
>       /* Good if src address is in RSI.  */
>       if (REG_P (addr) && REGNO (addr) == SI_REG)
>         continue;
>
>       /* Good if src address is offset of RSI.  */
>       if (GET_CODE (addr) == PLUS
>           && REG_P (XEXP (addr, 0))
>           && REGNO (XEXP (addr, 0)) == SI_REG)
>         continue;
>
>       break;
>     }
>   return (i >= 12 && i <= 18);
> })
>
>
> Thanks,
> Daniel
>

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCH 09/12 rev1] [i386] Add patterns and predicates mcall-ms2sysv-xlogues
  2017-04-27  8:05 ` [PATCH 09/12] [i386] Add patterns and predicates foutline-msabi-xlouges Daniel Santos
  2017-05-01 11:18   ` Uros Bizjak
@ 2017-05-04 21:35   ` Daniel Santos
  1 sibling, 0 replies; 41+ messages in thread
From: Daniel Santos @ 2017-05-04 21:35 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches, Jan Hubicka

I've cleaned up the patterns and predicates as per your instructions, resulting
in 74 less lines of code.  Adding explicit insns to restore the stack pointer
and pointer perform the "leave" (to the patterns restore_multiple_and_return
and restore_multiple_leave_return, respectively) disambiguates them just fine
without the const_int tag while correctly describing exactly what the pattern
does.

Thanks for your guidance.  I understand RTL much better now.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
 gcc/config/i386/predicates.md | 81 +++++++++++++++++++++++++++++++++++++++++++
 gcc/config/i386/sse.md        | 37 ++++++++++++++++++++
 2 files changed, 118 insertions(+)

diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 8f250a2e720..e7371a41b16 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1657,3 +1657,84 @@
   (ior (match_operand 0 "register_operand")
        (and (match_code "const_int")
 	    (match_test "op == constm1_rtx"))))
+
+;; Return true if the vector ends with between 12 and 18 register saves using
+;; RAX as the base address.
+(define_predicate "save_multiple"
+  (match_code "parallel")
+{
+  const unsigned len = XVECLEN (op, 0);
+  unsigned i;
+
+  /* Starting from end of vector, count register saves.  */
+  for (i = 0; i < len; ++i)
+    {
+      rtx src, dest, addr;
+      rtx e = XVECEXP (op, 0, len - 1 - i);
+
+      if (GET_CODE (e) != SET)
+	break;
+
+      src  = SET_SRC (e);
+      dest = SET_DEST (e);
+
+      if (!REG_P (src) || !MEM_P (dest))
+	break;
+
+      addr = XEXP (dest, 0);
+
+      /* Good if dest address is in RAX.  */
+      if (REG_P (addr) && REGNO (addr) == AX_REG)
+	continue;
+
+      /* Good if dest address is offset of RAX.  */
+      if (GET_CODE (addr) == PLUS
+	  && REG_P (XEXP (addr, 0))
+	  && REGNO (XEXP (addr, 0)) == AX_REG)
+	continue;
+
+      break;
+    }
+  return (i >= 12 && i <= 18);
+})
+
+
+;; Return true if the vector ends with between 12 and 18 register loads using
+;; RSI as the base address.
+(define_predicate "restore_multiple"
+  (match_code "parallel")
+{
+  const unsigned len = XVECLEN (op, 0);
+  unsigned i;
+
+  /* Starting from end of vector, count register restores.  */
+  for (i = 0; i < len; ++i)
+    {
+      rtx src, dest, addr;
+      rtx e = XVECEXP (op, 0, len - 1 - i);
+
+      if (GET_CODE (e) != SET)
+	break;
+
+      src  = SET_SRC (e);
+      dest = SET_DEST (e);
+
+      if (!MEM_P (src) || !REG_P (dest))
+	break;
+
+      addr = XEXP (src, 0);
+
+      /* Good if src address is in RSI.  */
+      if (REG_P (addr) && REGNO (addr) == SI_REG)
+	continue;
+
+      /* Good if src address is offset of RSI.  */
+      if (GET_CODE (addr) == PLUS
+	  && REG_P (XEXP (addr, 0))
+	  && REGNO (XEXP (addr, 0)) == SI_REG)
+	continue;
+
+      break;
+    }
+  return (i >= 12 && i <= 18);
+})
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 094404bc913..d488b25c254 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -20010,3 +20010,40 @@
           (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512VPOPCNTDQ"
   "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
+
+;; Save multiple registers out-of-line.
+(define_insn "save_multiple<mode>"
+  [(match_parallel 0 "save_multiple"
+    [(use (match_operand:P 1 "symbol_operand"))])]
+  "TARGET_SSE && TARGET_64BIT"
+  "call\t%P1")
+
+;; Restore multiple registers out-of-line.
+(define_insn "restore_multiple<mode>"
+  [(match_parallel 0 "restore_multiple"
+    [(use (match_operand:P 1 "symbol_operand"))])]
+  "TARGET_SSE && TARGET_64BIT"
+  "call\t%P1")
+
+;; Restore multiple registers out-of-line and return.
+(define_insn "restore_multiple_and_return<mode>"
+  [(match_parallel 0 "restore_multiple"
+    [(return)
+     (use (match_operand:P 1 "symbol_operand"))
+     (set (reg:DI SP_REG) (reg:DI R10_REG))
+    ])]
+  "TARGET_SSE && TARGET_64BIT"
+  "jmp\t%P1")
+
+;; Restore multiple registers out-of-line when hard frame pointer is used,
+;; perform the leave operation prior to returning (from the function).
+(define_insn "restore_multiple_leave_return<mode>"
+  [(match_parallel 0 "restore_multiple"
+    [(return)
+     (use (match_operand:P 1 "symbol_operand"))
+     (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
+     (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
+     (clobber (mem:BLK (scratch)))
+    ])]
+  "TARGET_SSE && TARGET_64BIT"
+  "jmp\t%P1")
-- 
2.11.0

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCH 11/12 rev1] [i386] Add remainder of -mcall-ms2sysv-xlogues implementation
  2017-04-27  8:05 ` [PATCH 11/12] [i386] Add remainder of -mcall-ms2sysv-xlogues implementation Daniel Santos
@ 2017-05-04 22:11   ` Daniel Santos
  0 siblings, 0 replies; 41+ messages in thread
From: Daniel Santos @ 2017-05-04 22:11 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches, Jan Hubicka

Now generates RTL with appropriate stack restore and leave patterns.  Slightly
cleaned up code that calculates the number of vector elements for clarity.

Tests are good when rebased onto gcc-7_1_0-release as HEAD currently fails to
bootstrap.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
 gcc/config/i386/i386.c | 287 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 278 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index f2772b2d10e..e43dc819f9a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14148,6 +14148,78 @@ ix86_elim_entry_set_got (rtx reg)
     }
 }
 
+static rtx
+gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
+{
+  rtx addr, mem;
+
+  if (offset)
+    addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
+  mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
+  return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
+}
+
+static inline rtx
+gen_frame_load (rtx reg, rtx frame_reg, int offset)
+{
+  return gen_frame_set (reg, frame_reg, offset, false);
+}
+
+static inline rtx
+gen_frame_store (rtx reg, rtx frame_reg, int offset)
+{
+  return gen_frame_set (reg, frame_reg, offset, true);
+}
+
+static void
+ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
+{
+  struct machine_function *m = cfun->machine;
+  const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
+			  + m->call_ms2sysv_extra_regs;
+  rtvec v = rtvec_alloc (ncregs + 1);
+  unsigned int align, i, vi = 0;
+  rtx_insn *insn;
+  rtx sym, addr;
+  rtx rax = gen_rtx_REG (word_mode, AX_REG);
+  const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
+  HOST_WIDE_INT rax_offset = xlogue.get_stub_ptr_offset () + m->fs.sp_offset;
+  HOST_WIDE_INT stack_alloc_size = frame.stack_pointer_offset - m->fs.sp_offset;
+  HOST_WIDE_INT stack_align_off_in = xlogue.get_stack_align_off_in ();
+
+  /* Verify that the incoming stack 16-byte alignment offset matches the
+     layout we're using.  */
+  gcc_assert (stack_align_off_in == (m->fs.sp_offset & UNITS_PER_WORD));
+
+  /* Get the stub symbol.  */
+  sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
+						  : XLOGUE_STUB_SAVE);
+  RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
+
+  /* Setup RAX as the stub's base pointer.  */
+  align = GET_MODE_ALIGNMENT (V4SFmode);
+  addr = choose_baseaddr (rax_offset, &align);
+  gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
+  insn = emit_insn (gen_rtx_SET (rax, addr));
+
+  gcc_assert (stack_alloc_size >= xlogue.get_stack_space_used ());
+  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+			     GEN_INT (-stack_alloc_size), -1,
+			     m->fs.cfa_reg == stack_pointer_rtx);
+  for (i = 0; i < ncregs; ++i)
+    {
+      const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
+      rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
+			     r.regno);
+      RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);;
+    }
+
+  gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
+
+  insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
+  RTX_FRAME_RELATED_P (insn) = true;
+}
+
 /* Expand the prologue into a bunch of separate insns.  */
 
 void
@@ -14395,7 +14467,7 @@ ix86_expand_prologue (void)
 	 performing the actual alignment.  Otherwise we cannot guarantee
 	 that there's enough storage above the realignment point.  */
       allocate = frame.stack_realign_allocate_offset - m->fs.sp_offset;
-      if (allocate)
+      if (allocate && !m->call_ms2sysv)
         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
 				   GEN_INT (-allocate), -1, false);
 
@@ -14403,7 +14475,6 @@ ix86_expand_prologue (void)
       insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
 					stack_pointer_rtx,
 					GEN_INT (-align_bytes)));
-
       /* For the purposes of register save area addressing, the stack
 	 pointer can no longer be used to access anything in the frame
 	 below m->fs.sp_realigned_offset and the frame pointer cannot be
@@ -14420,6 +14491,9 @@ ix86_expand_prologue (void)
 	m->fs.sp_valid = false;
     }
 
+  if (m->call_ms2sysv)
+    ix86_emit_outlined_ms2sysv_save (frame);
+
   allocate = frame.stack_pointer_offset - m->fs.sp_offset;
 
   if (flag_stack_usage_info)
@@ -14740,17 +14814,19 @@ ix86_emit_restore_regs_using_pop (void)
   unsigned int regno;
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false))
+    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
       ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
 }
 
-/* Emit code and notes for the LEAVE instruction.  */
+/* Emit code and notes for the LEAVE instruction.  If insn is non-null,
+   omits the emit and only attaches the notes.  */
 
 static void
-ix86_emit_leave (void)
+ix86_emit_leave (rtx_insn *insn)
 {
   struct machine_function *m = cfun->machine;
-  rtx_insn *insn = emit_insn (ix86_gen_leave ());
+  if (!insn)
+    insn = emit_insn (ix86_gen_leave ());
 
   ix86_add_queued_cfa_restore_notes (insn);
 
@@ -14844,6 +14920,164 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
       }
 }
 
+static void
+ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
+				  bool use_call, int style)
+{
+  struct machine_function *m = cfun->machine;
+  const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
+			  + m->call_ms2sysv_extra_regs;
+  rtvec v;
+  unsigned int elems_needed, align, i, vi = 0;
+  rtx_insn *insn;
+  rtx sym, tmp;
+  rtx rsi = gen_rtx_REG (word_mode, SI_REG);
+  rtx r10 = NULL_RTX;
+  const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
+  HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
+  HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
+  rtx rsi_frame_load = NULL_RTX;
+  HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
+  enum xlogue_stub stub;
+
+  gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
+
+  /* If using a realigned stack, we should never start with padding.  */
+  gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
+
+  /* Setup RSI as the stub's base pointer.  */
+  align = GET_MODE_ALIGNMENT (V4SFmode);
+  tmp = choose_baseaddr (rsi_offset, &align);
+  gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
+  emit_insn (gen_rtx_SET (rsi, tmp));
+
+  /* Get a symbol for the stub.  */
+  if (frame_pointer_needed)
+    stub = use_call ? XLOGUE_STUB_RESTORE_HFP
+		    : XLOGUE_STUB_RESTORE_HFP_TAIL;
+  else
+    stub = use_call ? XLOGUE_STUB_RESTORE
+		    : XLOGUE_STUB_RESTORE_TAIL;
+  sym = xlogue.get_stub_rtx (stub);
+
+  elems_needed = ncregs;
+  if (use_call)
+    elems_needed += 1;
+  else
+    elems_needed += frame_pointer_needed ? 5 : 3;
+  v = rtvec_alloc (elems_needed);
+
+  /* We call the epilogue stub when we need to pop incoming args or we are
+     doing a sibling call as the tail.  Otherwise, we will emit a jmp to the
+     epilogue stub and it is the tail-call.  */
+  if (use_call)
+      RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
+  else
+    {
+      RTVEC_ELT (v, vi++) = ret_rtx;
+      RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
+      if (frame_pointer_needed)
+	{
+	  rtx rbp = gen_rtx_REG (DImode, BP_REG);
+	  gcc_assert (m->fs.fp_valid);
+	  gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
+
+	  tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8));
+	  RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
+	  RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
+	  tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
+	  RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
+	}
+      else
+	{
+	  /* If no hard frame pointer, we set R10 to the SP restore value.  */
+	  gcc_assert (!m->fs.fp_valid);
+	  gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
+	  gcc_assert (m->fs.sp_valid);
+
+	  r10 = gen_rtx_REG (DImode, R10_REG);
+	  tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
+	  emit_insn (gen_rtx_SET (r10, tmp));
+
+	  RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
+	}
+    }
+
+  /* Generate frame load insns and restore notes.  */
+  for (i = 0; i < ncregs; ++i)
+    {
+      const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
+      enum machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
+      rtx reg, frame_load;
+
+      reg = gen_rtx_REG (mode, r.regno);
+      frame_load = gen_frame_load (reg, rsi, r.offset);
+
+      /* Save RSI frame load insn & note to add last.  */
+      if (r.regno == SI_REG)
+	{
+	  gcc_assert (!rsi_frame_load);
+	  rsi_frame_load = frame_load;
+	  rsi_restore_offset = r.offset;
+	}
+      else
+	{
+	  RTVEC_ELT (v, vi++) = frame_load;
+	  ix86_add_cfa_restore_note (NULL, reg, r.offset);
+	}
+    }
+
+  /* Add RSI frame load & restore note at the end.  */
+  gcc_assert (rsi_frame_load);
+  gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
+  RTVEC_ELT (v, vi++) = rsi_frame_load;
+  ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
+			     rsi_restore_offset);
+
+  /* Finally, for tail-call w/o a hard frame pointer, set SP to R10.  */
+  if (!use_call && !frame_pointer_needed)
+    {
+      gcc_assert (m->fs.sp_valid);
+      gcc_assert (!m->fs.sp_realigned);
+
+      /* At this point, R10 should point to frame.stack_realign_offset.  */
+      if (m->fs.cfa_reg == stack_pointer_rtx)
+	m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
+      m->fs.sp_offset = frame.stack_realign_offset;
+    }
+
+  gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
+  tmp = gen_rtx_PARALLEL (VOIDmode, v);
+  if (use_call)
+      insn = emit_insn (tmp);
+  else
+    {
+      insn = emit_jump_insn (tmp);
+      JUMP_LABEL (insn) = ret_rtx;
+
+      if (frame_pointer_needed)
+	ix86_emit_leave (insn);
+      else
+	{
+	  /* Need CFA adjust note.  */
+	  tmp = gen_rtx_SET (stack_pointer_rtx, r10);
+	  add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
+	}
+    }
+
+  RTX_FRAME_RELATED_P (insn) = true;
+  ix86_add_queued_cfa_restore_notes (insn);
+
+  /* If we're not doing a tail-call, we need to adjust the stack.  */
+  if (use_call && m->fs.sp_valid)
+    {
+      HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
+      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				GEN_INT (dealloc), style,
+				m->fs.cfa_reg == stack_pointer_rtx);
+    }
+}
+
 /* Restore function stack, frame, and registers.  */
 
 void
@@ -14854,6 +15088,7 @@ ix86_expand_epilogue (int style)
   struct ix86_frame frame;
   bool restore_regs_via_mov;
   bool using_drap;
+  bool restore_stub_is_tail = false;
 
   ix86_finalize_stack_realign_flags ();
   ix86_compute_frame_layout (&frame);
@@ -14956,7 +15191,37 @@ ix86_expand_epilogue (int style)
     ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
 					  style == 2);
 
-  if (restore_regs_via_mov)
+  if (m->call_ms2sysv)
+    {
+      int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
+
+      /* We cannot use a tail-call for the stub if:
+	 1. We have to pop incoming args,
+	 2. We have additional int regs to restore, or
+	 3. A sibling call will be the tail-call, or
+	 4. We are emitting an eh_return_internal epilogue.
+
+	 TODO: Item 4 has not yet tested!
+
+	 If any of the above are true, we will call the stub rather than
+	 jump to it.  */
+      restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
+      ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
+    }
+
+  /* If using out-of-line stub that is a tail-call, then...*/
+  if (m->call_ms2sysv && restore_stub_is_tail)
+    {
+      /* TODO: parinoid tests. (remove eventually)  */
+      gcc_assert (m->fs.sp_valid);
+      gcc_assert (!m->fs.sp_realigned);
+      gcc_assert (!m->fs.fp_valid);
+      gcc_assert (!m->fs.realigned);
+      gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
+      gcc_assert (!crtl->drap_reg);
+      gcc_assert (!frame.nregs);
+    }
+  else if (restore_regs_via_mov)
     {
       rtx t;
 
@@ -15087,7 +15352,7 @@ ix86_expand_epilogue (int style)
       else if (TARGET_USE_LEAVE
 	       || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
 	       || !cfun->machine->use_fast_prologue_epilogue)
-	ix86_emit_leave ();
+	ix86_emit_leave (NULL);
       else
         {
 	  pro_epilogue_adjust_stack (stack_pointer_rtx,
@@ -15198,7 +15463,7 @@ ix86_expand_epilogue (int style)
       else
 	emit_jump_insn (gen_simple_return_pop_internal (popc));
     }
-  else
+  else if (!m->call_ms2sysv || !restore_stub_is_tail)
     emit_jump_insn (gen_simple_return_internal ());
 
   /* Restore the state back to the state from the prologue,
@@ -28927,6 +29192,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
 
 	  clobber_reg (&use, gen_rtx_REG (mode, regno));
 	}
+
+      /* Set here, but it may get cleared later.  */
+      if (TARGET_CALL_MS2SYSV_XLOGUES)
+	cfun->machine->call_ms2sysv = true;
     }
 
   if (vec_len > 1)
-- 
2.11.0

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-05-02 10:45     ` Kai Tietz
  2017-05-03  6:01       ` Daniel Santos
@ 2017-05-05  9:05       ` Daniel Santos
  2017-05-06 20:41         ` Daniel Santos
  1 sibling, 1 reply; 41+ messages in thread
From: Daniel Santos @ 2017-05-05  9:05 UTC (permalink / raw)
  To: gcc-patches

On 05/02/2017 05:40 AM, Kai Tietz wrote:
> Right, and Wine people will tell, if something doesn't work for them.
> So ok for me too.
>
> Kai
Well, I haven't re-run these tests in a few months, but I got 272 failed 
wine tests with gcc 7.1 and 234 with my patch set rebased onto 7.1.  So 
it looks like I'll be trying to diagnose these failures this weekend.

Daniel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-05-05  9:05       ` Daniel Santos
@ 2017-05-06 20:41         ` Daniel Santos
  2017-05-08 20:07           ` Daniel Santos
  0 siblings, 1 reply; 41+ messages in thread
From: Daniel Santos @ 2017-05-06 20:41 UTC (permalink / raw)
  To: gcc-patches

On 05/05/2017 03:56 AM, Daniel Santos wrote:
> On 05/02/2017 05:40 AM, Kai Tietz wrote:
>> Right, and Wine people will tell, if something doesn't work for them.
>> So ok for me too.
>>
>> Kai
> Well, I haven't re-run these tests in a few months, but I got 272 
> failed wine tests with gcc 7.1 and 234 with my patch set rebased onto 
> 7.1.  So it looks like I'll be trying to diagnose these failures this 
> weekend.

Those are bad numbers.  I had forgotten to filter out the testlist.o 
files.  Below are my most recent numbers running Wine 2.7:

gcc-5.4.0 CFLAGS="-march=native -O2 -g": 74
gcc-7.1.0 CFLAGS="-march=native -O2 -g": 74
gcc-7.1.0 CFLAGS="-march=nocona -mtune=generic -O2 -g": 79
gcc-7.1.0 CFLAGS="-march=native -O2 -g -mcall-ms2sysv-xlogues" (patched): 31

I'm building out a clean test environment on another machine to try to 
rule out clutter issues (and video driver issues) on my workstation.

Daniel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-05-06 20:41         ` Daniel Santos
@ 2017-05-08 20:07           ` Daniel Santos
  0 siblings, 0 replies; 41+ messages in thread
From: Daniel Santos @ 2017-05-08 20:07 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

On 05/06/2017 03:22 PM, Daniel Santos wrote:
>
> gcc-5.4.0 CFLAGS="-march=native -O2 -g": 74
> gcc-7.1.0 CFLAGS="-march=native -O2 -g": 74
> gcc-7.1.0 CFLAGS="-march=nocona -mtune=generic -O2 -g": 79
> gcc-7.1.0 CFLAGS="-march=native -O2 -g -mcall-ms2sysv-xlogues" 
> (patched): 31
>
> I'm building out a clean test environment on another machine to try to 
> rule out clutter issues (and video driver issues) on my workstation.
>
> Daniel
>

I've re-run Wine's tests with a new clean VM environment and some 
changes to include more tests and similar results:

Compiler                                         Failures
gcc-4.9.4:                                       39
gcc-7.1.0:                                       78
gcc-7.1.0-patched (with -mcall-ms2sysv-xlogues): 40


The first error not present in the gcc-4.9.4 tests that I examined 
looked like a run-of-the-mill race condition in Wine that just happened 
to not crash when built with 4.9.4.  So I'm going to guess that the 
disappearance of these failures with -mcall-ms2sysv-xlogues is just 
incidental.  I think we're in good condition with this patch set.

Daniel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PING] [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-04-27  8:05 [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
                   ` (13 preceding siblings ...)
  2017-05-01 11:31 ` [PATCH v4 0/12] " Uros Bizjak
@ 2017-05-13  0:01 ` Daniel Santos
  2017-05-13 18:29   ` Uros Bizjak
  14 siblings, 1 reply; 41+ messages in thread
From: Daniel Santos @ 2017-05-13  0:01 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

Ping?  I have posted revisions of the following in patch set:

05/12 - https://gcc.gnu.org/ml/gcc-patches/2017-04/msg01442.html
09/12 - https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00348.html
11/12 - https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00350.html

I have retested them on Linux x86-64 in addition a Wine testsuite 
comparison resulting in fewer failed tests (31) than when using 
unpatched 7.1.0 (78) and 5.4.0 (78).  A cursory examination of the now 
working failures with 7.1.0 seemed to be to be due to race conditions in 
Wine that are incidentally hidden after the patches.

Is there anything else needed before we can commit these?  They still 
rebase cleanly onto the HEAD, but I can repost as "v5" if you prefer.

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PING] [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-05-13  0:01 ` [PING] " Daniel Santos
@ 2017-05-13 18:29   ` Uros Bizjak
  2017-05-13 23:43     ` Daniel Santos
  0 siblings, 1 reply; 41+ messages in thread
From: Uros Bizjak @ 2017-05-13 18:29 UTC (permalink / raw)
  To: Daniel Santos; +Cc: gcc-patches

On Sat, May 13, 2017 at 1:01 AM, Daniel Santos <daniel.santos@pobox.com> wrote:
> Ping?  I have posted revisions of the following in patch set:
>
> 05/12 - https://gcc.gnu.org/ml/gcc-patches/2017-04/msg01442.html
> 09/12 - https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00348.html
> 11/12 - https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00350.html
>
> I have retested them on Linux x86-64 in addition a Wine testsuite comparison
> resulting in fewer failed tests (31) than when using unpatched 7.1.0 (78)
> and 5.4.0 (78).  A cursory examination of the now working failures with
> 7.1.0 seemed to be to be due to race conditions in Wine that are
> incidentally hidden after the patches.
>
> Is there anything else needed before we can commit these?  They still rebase
> cleanly onto the HEAD, but I can repost as "v5" if you prefer.

Please go ahead and commit the patches.

However, please stay around to fix possible fallout. As said - you are
touching quite complex part of the compiler ...

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PING] [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-05-13 18:29   ` Uros Bizjak
@ 2017-05-13 23:43     ` Daniel Santos
  2017-05-14 10:25       ` Uros Bizjak
  0 siblings, 1 reply; 41+ messages in thread
From: Daniel Santos @ 2017-05-13 23:43 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

On 05/13/2017 11:52 AM, Uros Bizjak wrote:
> On Sat, May 13, 2017 at 1:01 AM, Daniel Santos <daniel.santos@pobox.com> wrote:
>> Ping?  I have posted revisions of the following in patch set:
>>
>> 05/12 - https://gcc.gnu.org/ml/gcc-patches/2017-04/msg01442.html
>> 09/12 - https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00348.html
>> 11/12 - https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00350.html
>>
>> I have retested them on Linux x86-64 in addition a Wine testsuite comparison
>> resulting in fewer failed tests (31) than when using unpatched 7.1.0 (78)
>> and 5.4.0 (78).  A cursory examination of the now working failures with
>> 7.1.0 seemed to be to be due to race conditions in Wine that are
>> incidentally hidden after the patches.
>>
>> Is there anything else needed before we can commit these?  They still rebase
>> cleanly onto the HEAD, but I can repost as "v5" if you prefer.
> Please go ahead and commit the patches.
>
> However, please stay around to fix possible fallout. As said - you are
> touching quite complex part of the compiler ...
>
> Thanks,
> Uros.

Thanks!  I'll definitely be around, I have a lot more that I'm working 
on with C generics/pseudo-templates (all middle-end stuff). I also want 
to examine more ways that SSE saves/restores can be omitted in these ms 
to sysv calls through static analysis and such.

Anyway, I don't yet have SVN write access, will you sponsor my request?

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PING] [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-05-13 23:43     ` Daniel Santos
@ 2017-05-14 10:25       ` Uros Bizjak
  2017-07-26 19:03         ` H.J. Lu
  0 siblings, 1 reply; 41+ messages in thread
From: Uros Bizjak @ 2017-05-14 10:25 UTC (permalink / raw)
  To: Daniel Santos; +Cc: gcc-patches

On Sun, May 14, 2017 at 12:34 AM, Daniel Santos <daniel.santos@pobox.com> wrote:
> On 05/13/2017 11:52 AM, Uros Bizjak wrote:
>>
>> On Sat, May 13, 2017 at 1:01 AM, Daniel Santos <daniel.santos@pobox.com>
>> wrote:
>>>
>>> Ping?  I have posted revisions of the following in patch set:
>>>
>>> 05/12 - https://gcc.gnu.org/ml/gcc-patches/2017-04/msg01442.html
>>> 09/12 - https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00348.html
>>> 11/12 - https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00350.html
>>>
>>> I have retested them on Linux x86-64 in addition a Wine testsuite
>>> comparison
>>> resulting in fewer failed tests (31) than when using unpatched 7.1.0 (78)
>>> and 5.4.0 (78).  A cursory examination of the now working failures with
>>> 7.1.0 seemed to be to be due to race conditions in Wine that are
>>> incidentally hidden after the patches.
>>>
>>> Is there anything else needed before we can commit these?  They still
>>> rebase
>>> cleanly onto the HEAD, but I can repost as "v5" if you prefer.
>>
>> Please go ahead and commit the patches.
>>
>> However, please stay around to fix possible fallout. As said - you are
>> touching quite complex part of the compiler ...
>>
>> Thanks,
>> Uros.
>
>
> Thanks!  I'll definitely be around, I have a lot more that I'm working on
> with C generics/pseudo-templates (all middle-end stuff). I also want to
> examine more ways that SSE saves/restores can be omitted in these ms to sysv
> calls through static analysis and such.
>
> Anyway, I don't yet have SVN write access, will you sponsor my request?

The patchset was committed to mainline SVN as r248029.

Uros.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 12/12] [i386,testsuite] Test program for ms to sysv abi function calls.
  2017-04-27  8:05 ` [PATCH 12/12] [i386,testsuite] Test program for ms to sysv abi function calls Daniel Santos
@ 2017-05-17  9:52   ` Thomas Preudhomme
  0 siblings, 0 replies; 41+ messages in thread
From: Thomas Preudhomme @ 2017-05-17  9:52 UTC (permalink / raw)
  To: gcc-patches, daniel.santos

Hi Daniel,

On 27/04/17 09:09, Daniel Santos wrote:
> A comprehensive program for testing x86_64 ms_abi functions that call
> sysv_abi functions to help validate -mcall-ms2sysv-xlogues and use of
> aligned SSE MOVs after a (non-DRAP) realigned stack.
>
> Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
> ---
>  gcc/Makefile.in                                    |   2 +
>  .../gcc.target/x86_64/abi/ms-sysv/do-test.S        | 163 +++++
>  gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/gen.cc | 807 +++++++++++++++++++++
>  .../gcc.target/x86_64/abi/ms-sysv/ms-sysv.c        | 373 ++++++++++
>  .../gcc.target/x86_64/abi/ms-sysv/ms-sysv.exp      | 178 +++++
>  5 files changed, 1523 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/do-test.S
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/gen.cc
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/ms-sysv.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/ms-sysv.exp
>

[SNIP]

> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/ms-sysv.exp b/gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/ms-sysv.exp
> new file mode 100644
> index 00000000000..e317af9bd85
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/ms-sysv.exp
> @@ -0,0 +1,178 @@

[SNIP]

> +
> +# Exit immediately if this isn't a native x86_64 target.
> +if { (![istarget x86_64-*-*] && ![istarget i?86-*-*])
> +     || ![is-effective-target lp64] || ![isnative] } then {
> +    unsupported "$subdir"
> +    return
> +}

This reports these tests as UNSUPPORTED for non x86_64 target rather than just 
not showing these tests. The usual pattern from what I could see is to just 
return (see gcc.target/arm/acle.exp)

Best regards,

Thomas

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PING] [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-05-14 10:25       ` Uros Bizjak
@ 2017-07-26 19:03         ` H.J. Lu
  2017-07-27  0:36           ` Daniel Santos
  2017-07-28 13:51           ` Daniel Santos
  0 siblings, 2 replies; 41+ messages in thread
From: H.J. Lu @ 2017-07-26 19:03 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: Daniel Santos, gcc-patches

On Sun, May 14, 2017 at 3:23 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Sun, May 14, 2017 at 12:34 AM, Daniel Santos <daniel.santos@pobox.com> wrote:
>> On 05/13/2017 11:52 AM, Uros Bizjak wrote:
>>>
>>> On Sat, May 13, 2017 at 1:01 AM, Daniel Santos <daniel.santos@pobox.com>
>>> wrote:
>>>>
>>>> Ping?  I have posted revisions of the following in patch set:
>>>>
>>>> 05/12 - https://gcc.gnu.org/ml/gcc-patches/2017-04/msg01442.html
>>>> 09/12 - https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00348.html
>>>> 11/12 - https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00350.html
>>>>
>>>> I have retested them on Linux x86-64 in addition a Wine testsuite
>>>> comparison
>>>> resulting in fewer failed tests (31) than when using unpatched 7.1.0 (78)
>>>> and 5.4.0 (78).  A cursory examination of the now working failures with
>>>> 7.1.0 seemed to be to be due to race conditions in Wine that are
>>>> incidentally hidden after the patches.
>>>>
>>>> Is there anything else needed before we can commit these?  They still
>>>> rebase
>>>> cleanly onto the HEAD, but I can repost as "v5" if you prefer.
>>>
>>> Please go ahead and commit the patches.
>>>
>>> However, please stay around to fix possible fallout. As said - you are
>>> touching quite complex part of the compiler ...
>>>
>>> Thanks,
>>> Uros.
>>
>>
>> Thanks!  I'll definitely be around, I have a lot more that I'm working on
>> with C generics/pseudo-templates (all middle-end stuff). I also want to
>> examine more ways that SSE saves/restores can be omitted in these ms to sysv
>> calls through static analysis and such.
>>
>> Anyway, I don't yet have SVN write access, will you sponsor my request?
>
> The patchset was committed to mainline SVN as r248029.
>
> Uros.

This patch caused:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81563


-- 
H.J.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PING] [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-07-26 19:03         ` H.J. Lu
@ 2017-07-27  0:36           ` Daniel Santos
  2017-07-28 13:51           ` Daniel Santos
  1 sibling, 0 replies; 41+ messages in thread
From: Daniel Santos @ 2017-07-27  0:36 UTC (permalink / raw)
  To: H.J. Lu, Uros Bizjak; +Cc: gcc-patches

On 07/26/2017 02:03 PM, H.J. Lu wrote:
> This patch caused:
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81563

Yes, I discovered this flaw while working on PR 80969 but I hadn't found 
an actual testcase where it caused a problem yet.  I'm about to submit 
my patchset for review, so sorry I didn't get it committed sooner.  My 
patch set further improves sp_valid_at and fp_valid_at since it's 
possible that the the last offset the frame pointer can be used to 
access is not equal to realignment offset.  I'll try to get this out 
tonight or tomorrow.

Thanks!
Daniel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PING] [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-07-26 19:03         ` H.J. Lu
  2017-07-27  0:36           ` Daniel Santos
@ 2017-07-28 13:51           ` Daniel Santos
  2017-07-28 14:41             ` H.J. Lu
  1 sibling, 1 reply; 41+ messages in thread
From: Daniel Santos @ 2017-07-28 13:51 UTC (permalink / raw)
  To: H.J. Lu, Uros Bizjak; +Cc: gcc-patches

On 07/26/2017 02:03 PM, H.J. Lu wrote:
> This patch caused:
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81563

Hello.  I've rebased my patch set and I'm now retesting.  I'm afraid 
that your changes are wrong because my my sp_valid_at and fp_valid_at 
functions are wrong -- these are supposed to be for the base offset and 
not the CFA offset, sorry about that.  This means that the check in 
choose_basereg (and thus choose_baseaddr) have been wrong as well.  I'm 
retesting now.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PING] [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-07-28 13:51           ` Daniel Santos
@ 2017-07-28 14:41             ` H.J. Lu
  2017-07-31 10:25               ` Daniel Santos
  0 siblings, 1 reply; 41+ messages in thread
From: H.J. Lu @ 2017-07-28 14:41 UTC (permalink / raw)
  To: Daniel Santos; +Cc: Uros Bizjak, gcc-patches

On Fri, Jul 28, 2017 at 6:57 AM, Daniel Santos <daniel.santos@pobox.com> wrote:
> On 07/26/2017 02:03 PM, H.J. Lu wrote:
>>
>> This patch caused:
>>
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81563
>
>
> Hello.  I've rebased my patch set and I'm now retesting.  I'm afraid that
> your changes are wrong because my my sp_valid_at and fp_valid_at functions
> are wrong -- these are supposed to be for the base offset and not the CFA
> offset, sorry about that.  This means that the check in choose_basereg (and
> thus choose_baseaddr) have been wrong as well.  I'm retesting now.

Please check your change with gcc.target/i386/pr81563.c.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PING] [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues
  2017-07-28 14:41             ` H.J. Lu
@ 2017-07-31 10:25               ` Daniel Santos
  0 siblings, 0 replies; 41+ messages in thread
From: Daniel Santos @ 2017-07-31 10:25 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Uros Bizjak, gcc-patches

On 07/28/2017 09:41 AM, H.J. Lu wrote:
> On Fri, Jul 28, 2017 at 6:57 AM, Daniel Santos<daniel.santos@pobox.com>  wrote:
>> On 07/26/2017 02:03 PM, H.J. Lu wrote:
>>> This patch caused:
>>>
>>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81563
>> Hello.  I've rebased my patch set and I'm now retesting.  I'm afraid that
>> your changes are wrong because my my sp_valid_at and fp_valid_at functions
>> are wrong -- these are supposed to be for the base offset and not the CFA
>> offset, sorry about that.  This means that the check in choose_basereg (and
>> thus choose_baseaddr) have been wrong as well.  I'm retesting now.
> Please check your change with gcc.target/i386/pr81563.c.
>
> Thanks.

I'm still getting used to x86 stack math and and briefly I thought that 
my understanding of the CFA was wrong and that I had messed up 
sp_valid_at and fp_valid_at, but I was mistaken, so sorry for the false 
alarm.  My rebased patches pass all tests, so it's OK.

^ permalink raw reply	[flat|nested] 41+ messages in thread

end of thread, other threads:[~2017-07-31 10:25 UTC | newest]

Thread overview: 41+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-27  8:05 [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
2017-04-27  8:05 ` [PATCH 12/12] [i386,testsuite] Test program for ms to sysv abi function calls Daniel Santos
2017-05-17  9:52   ` Thomas Preudhomme
2017-04-27  8:05 ` [PATCH 11/12] [i386] Add remainder of -mcall-ms2sysv-xlogues implementation Daniel Santos
2017-05-04 22:11   ` [PATCH 11/12 rev1] " Daniel Santos
2017-04-27  8:05 ` [PATCH 09/12] [i386] Add patterns and predicates foutline-msabi-xlouges Daniel Santos
2017-05-01 11:18   ` Uros Bizjak
2017-05-02 22:19     ` Daniel Santos
2017-05-03  6:17       ` Uros Bizjak
2017-05-03  7:38         ` Daniel Santos
2017-05-03  8:38           ` Uros Bizjak
2017-05-04 21:35   ` [PATCH 09/12 rev1] [i386] Add patterns and predicates mcall-ms2sysv-xlogues Daniel Santos
2017-04-27  8:05 ` [PATCH 03/12] [i386] Use re-aligned stack pointer for aligned SSE movs Daniel Santos
2017-04-27  8:05 ` [PATCH 05/12] [i386] Add option -mcall-ms2sysv-xlogues Daniel Santos
2017-04-28  6:00   ` Sandra Loosemore
2017-04-28  7:37     ` [PATCH 05/12 rev 1] " Daniel Santos
2017-04-27  8:05 ` [PATCH 02/12] [i386] Keep stack pointer valid after after re-alignment Daniel Santos
2017-04-27  8:05 ` [PATCH 10/12] [i386] Add ms2sysv pro/epilogue stubs to libgcc Daniel Santos
2017-04-27  8:05 ` [PATCH 08/12] [i386] Modify ix86_compute_frame_layout for -mcall-ms2sysv-xlogues Daniel Santos
2017-04-27  8:05 ` [PATCH 01/12] [i386] Re-align stack frame prior to SSE saves Daniel Santos
2017-04-27  8:23 ` [PATCH 04/12] [i386] Minor refactoring Daniel Santos
2017-04-27  8:44 ` [PATCH 07/12] [i386] Modify ix86_save_reg to optionally omit stub-managed registers Daniel Santos
2017-04-27  8:51 ` [PATCH 06/12] [i386] Add class xlogue_layout and new fields to struct machine_function Daniel Santos
2017-04-27 18:32 ` [PATCH v4 0/12 GCC8] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
2017-05-01 11:31 ` [PATCH v4 0/12] " Uros Bizjak
2017-05-02 10:25   ` JonY
2017-05-02 10:45     ` Kai Tietz
2017-05-03  6:01       ` Daniel Santos
2017-05-05  9:05       ` Daniel Santos
2017-05-06 20:41         ` Daniel Santos
2017-05-08 20:07           ` Daniel Santos
2017-05-03  4:32     ` Daniel Santos
2017-05-13  0:01 ` [PING] " Daniel Santos
2017-05-13 18:29   ` Uros Bizjak
2017-05-13 23:43     ` Daniel Santos
2017-05-14 10:25       ` Uros Bizjak
2017-07-26 19:03         ` H.J. Lu
2017-07-27  0:36           ` Daniel Santos
2017-07-28 13:51           ` Daniel Santos
2017-07-28 14:41             ` H.J. Lu
2017-07-31 10:25               ` Daniel Santos

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).