public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] testsuite, X86, Darwin: Fix bf16 ABI tests for Mach-O/macOS ABI.
@ 2022-12-04 11:51 Iain Sandoe
  2022-12-04 20:20 ` Uros Bizjak
  0 siblings, 1 reply; 8+ messages in thread
From: Iain Sandoe @ 2022-12-04 11:51 UTC (permalink / raw)
  To: gcc-patches, ubizjak; +Cc: crazylht

This is almost a completely Darwin-local patch, but there is one (repeated)
place where a general change is needed - which is in making xmm_regs and
x87_regs extern in the three copies of args.h (this is consistent with the
other saved vars).  These fails represent most of the current testsuite noise
on x86 Darwin.

tested on x86-64 Darwin and Linux.

OK for master?
Iain

-- >8 --

These tests have failed since introduction since they assume that the
assembler output is ELF and that the ABI targeted supports the addressing.

For Darwin, Mach-O and ABI we need to make several changes:
1. Use the __USER_LABEL__PREFIX__
2. Remove the use of ELF-specific constructs (.size, .type etc.)
3. We cannot make direct access to common variables in the ABI, so that we
   must move these to BSS.

Since that set is quite significant, I elected to make a separate source
section for Darwin.  This is introduced by #elif defined(__APPLE__) because
__MACH__ is also used by HURD.

There are potentially other X86 targets (e.g. XCOFF) that could have yet
more changes, so I added a catchall section that #errors if the object format
is neither ELF or Mach-O.

Signed-off-by: Iain Sandoe <iain@sandoe.co.uk>

gcc/testsuite/ChangeLog:

	* gcc.target/x86_64/abi/bf16/args.h:
	* gcc.target/x86_64/abi/bf16/asm-support.S:
	* gcc.target/x86_64/abi/bf16/m256bf16/args.h:
	* gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S:
	* gcc.target/x86_64/abi/bf16/m512bf16/args.h:
	* gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S:
---
 .../gcc.target/x86_64/abi/bf16/args.h         |   4 +-
 .../gcc.target/x86_64/abi/bf16/asm-support.S  | 105 +++++++++++++++
 .../x86_64/abi/bf16/m256bf16/args.h           |   4 +-
 .../x86_64/abi/bf16/m256bf16/asm-support.S    | 107 +++++++++++++++
 .../x86_64/abi/bf16/m512bf16/args.h           |   4 +-
 .../x86_64/abi/bf16/m512bf16/asm-support.S    | 123 ++++++++++++++++++
 6 files changed, 341 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
index 11d7e2b3a1c..95f9a394f2c 100644
--- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
@@ -42,8 +42,8 @@ typedef union {
 } X87_T;
 extern void (*callthis)(void);
 extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
-XMM_T xmm_regs[16];
-X87_T x87_regs[8];
+extern XMM_T xmm_regs[16];
+extern X87_T x87_regs[8];
 extern volatile unsigned long long volatile_var;
 extern void snapshot (void);
 extern void snapshot_ret (void);
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
index 7559aa910c4..331bf92d761 100644
--- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
@@ -1,3 +1,5 @@
+
+#ifdef __ELF__
 	.text
 	.p2align 4,,15
 .globl snapshot
@@ -82,3 +84,106 @@ snapshot_ret:
 #ifdef __linux__
 	.section	.note.GNU-stack,"",@progbits
 #endif
+
+#elif defined(__APPLE__)
+	.text
+	.p2align 4,,15
+	.globl _snapshot
+_snapshot:
+LFB3:
+	movq	%rax, _rax(%rip)
+	movq	%rbx, _rbx(%rip)
+	movq	%rcx, _rcx(%rip)
+	movq	%rdx, _rdx(%rip)
+	movq	%rdi, _rdi(%rip)
+	movq	%rsi, _rsi(%rip)
+	movq	%rbp, _rbp(%rip)
+	movq	%rsp, _rsp(%rip)
+	movq	%r8, _r8(%rip)
+	movq	%r9, _r9(%rip)
+	movq	%r10, _r10(%rip)
+	movq	%r11, _r11(%rip)
+	movq	%r12, _r12(%rip)
+	movq	%r13, _r13(%rip)
+	movq	%r14, _r14(%rip)
+	movq	%r15, _r15(%rip)
+	movdqu	%xmm0, _xmm_regs+0(%rip)
+	movdqu	%xmm1, _xmm_regs+16(%rip)
+	movdqu	%xmm2, _xmm_regs+32(%rip)
+	movdqu	%xmm3, _xmm_regs+48(%rip)
+	movdqu	%xmm4, _xmm_regs+64(%rip)
+	movdqu	%xmm5, _xmm_regs+80(%rip)
+	movdqu	%xmm6, _xmm_regs+96(%rip)
+	movdqu	%xmm7, _xmm_regs+112(%rip)
+	movdqu	%xmm8, _xmm_regs+128(%rip)
+	movdqu	%xmm9, _xmm_regs+144(%rip)
+	movdqu	%xmm10, _xmm_regs+160(%rip)
+	movdqu	%xmm11, _xmm_regs+176(%rip)
+	movdqu	%xmm12, _xmm_regs+192(%rip)
+	movdqu	%xmm13, _xmm_regs+208(%rip)
+	movdqu	%xmm14, _xmm_regs+224(%rip)
+	movdqu	%xmm15, _xmm_regs+240(%rip)
+	jmp	*_callthis(%rip)
+LFE3:
+
+	.p2align 4,,15
+	.globl _snapshot_ret
+_snapshot_ret:
+	movq	%rdi, _rdi(%rip)
+	subq	$8, %rsp
+	call	*_callthis(%rip)
+	addq	$8, %rsp
+	movq	%rax, _rax(%rip)
+	movq	%rdx, _rdx(%rip)
+	movdqu	%xmm0, _xmm_regs+0(%rip)
+	movdqu	%xmm1, _xmm_regs+16(%rip)
+	fstpt	_x87_regs(%rip)
+	fstpt	_x87_regs+16(%rip)
+	fldt	_x87_regs+16(%rip)
+	fldt	_x87_regs(%rip)
+	ret
+
+	.globl	_callthis
+	.zerofill __DATA,__bss,_callthis,8,3
+	.globl	_rax
+	.zerofill __DATA,__bss,_rax,8,3
+	.globl	_rbx
+	.zerofill __DATA,__bss,_rbx,8,3
+	.globl	_rcx
+	.zerofill __DATA,__bss,_rcx,8,3
+	.globl	_rdx
+	.zerofill __DATA,__bss,_rdx,8,3
+	.globl	_rsi
+	.zerofill __DATA,__bss,_rsi,8,3
+	.globl	_rdi
+	.zerofill __DATA,__bss,_rdi,8,3
+	.globl	_rsp
+	.zerofill __DATA,__bss,_rsp,8,3
+	.globl	_rbp
+	.zerofill __DATA,__bss,_rbp,8,3
+	.globl	_r8
+	.zerofill __DATA,__bss,_r8,8,3
+	.globl	_r9
+	.zerofill __DATA,__bss,_r9,8,3
+	.globl	_r10
+	.zerofill __DATA,__bss,_r10,8,3
+	.globl	_r11
+	.zerofill __DATA,__bss,_r11,8,3
+	.globl	_r12
+	.zerofill __DATA,__bss,_r12,8,3
+	.globl	_r13
+	.zerofill __DATA,__bss,_r13,8,3
+	.globl	_r14
+	.zerofill __DATA,__bss,_r14,8,3
+	.globl	_r15
+	.zerofill __DATA,__bss,_r15,8,3
+	.globl	_xmm_regs
+	.zerofill __DATA,__bss,_xmm_regs,256,5
+	.globl	_x87_regs
+	.zerofill __DATA,__bss,_x87_regs,128,5
+	.globl	_volatile_var
+	.zerofill __DATA,__bss,_volatile_var,8,3
+
+#else
+#error unknown object format
+#endif
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
index 94627ffbd44..1027742cbb2 100644
--- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
@@ -50,8 +50,8 @@ typedef union {
 } X87_T;
 extern void (*callthis)(void);
 extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
-YMM_T ymm_regs[16];
-X87_T x87_regs[8];
+extern YMM_T ymm_regs[16];
+extern X87_T x87_regs[8];
 extern volatile unsigned long long volatile_var;
 extern void snapshot (void);
 extern void snapshot_ret (void);
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S
index 24c8b3c9023..171654aa4db 100644
--- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S
@@ -1,3 +1,5 @@
+
+#ifdef __ELF__
 	.text
 	.p2align 4,,15
 .globl snapshot
@@ -82,3 +84,108 @@ snapshot_ret:
 #ifdef __linux__
 	.section	.note.GNU-stack,"",@progbits
 #endif
+
+#elif defined(__APPLE__)
+
+	.text
+	.p2align 4,,15
+	.globl _snapshot
+_snapshot:
+.LFB3:
+	movq	%rax, _rax(%rip)
+	movq	%rbx, _rbx(%rip)
+	movq	%rcx, _rcx(%rip)
+	movq	%rdx, _rdx(%rip)
+	movq	%rdi, _rdi(%rip)
+	movq	%rsi, _rsi(%rip)
+	movq	%rbp, _rbp(%rip)
+	movq	%rsp, _rsp(%rip)
+	movq	%r8, _r8(%rip)
+	movq	%r9, _r9(%rip)
+	movq	%r10, _r10(%rip)
+	movq	%r11, _r11(%rip)
+	movq	%r12, _r12(%rip)
+	movq	%r13, _r13(%rip)
+	movq	%r14, _r14(%rip)
+	movq	%r15, _r15(%rip)
+	vmovdqu	%ymm0, _ymm_regs+0(%rip)
+	vmovdqu	%ymm1, _ymm_regs+32(%rip)
+	vmovdqu	%ymm2, _ymm_regs+64(%rip)
+	vmovdqu	%ymm3, _ymm_regs+96(%rip)
+	vmovdqu	%ymm4, _ymm_regs+128(%rip)
+	vmovdqu	%ymm5, _ymm_regs+160(%rip)
+	vmovdqu	%ymm6, _ymm_regs+192(%rip)
+	vmovdqu	%ymm7, _ymm_regs+224(%rip)
+	vmovdqu	%ymm8, _ymm_regs+256(%rip)
+	vmovdqu	%ymm9, _ymm_regs+288(%rip)
+	vmovdqu	%ymm10, _ymm_regs+320(%rip)
+	vmovdqu	%ymm11, _ymm_regs+352(%rip)
+	vmovdqu	%ymm12, _ymm_regs+384(%rip)
+	vmovdqu	%ymm13, _ymm_regs+416(%rip)
+	vmovdqu	%ymm14, _ymm_regs+448(%rip)
+	vmovdqu	%ymm15, _ymm_regs+480(%rip)
+	jmp	*_callthis(%rip)
+.LFE3:
+
+	.p2align 4,,15
+	.globl _snapshot_ret
+_snapshot_ret:
+	movq	%rdi, _rdi(%rip)
+	subq	$8, %rsp
+	call	*_callthis(%rip)
+	addq	$8, %rsp
+	movq	%rax, _rax(%rip)
+	movq	%rdx, _rdx(%rip)
+	vmovdqu	%ymm0, _ymm_regs+0(%rip)
+	vmovdqu	%ymm1, _ymm_regs+32(%rip)
+	fstpt	_x87_regs(%rip)
+	fstpt	_x87_regs+16(%rip)
+	fldt	_x87_regs+16(%rip)
+	fldt	_x87_regs(%rip)
+	ret
+
+	.globl	_callthis
+	.zerofill __DATA,__bss,_callthis,8,3
+	.globl	_rax
+	.zerofill __DATA,__bss,_rax,8,3
+	.globl	_rbx
+	.zerofill __DATA,__bss,_rbx,8,3
+	.globl	_rcx
+	.zerofill __DATA,__bss,_rcx,8,3
+	.globl	_rdx
+	.zerofill __DATA,__bss,_rdx,8,3
+	.globl	_rsi
+	.zerofill __DATA,__bss,_rsi,8,3
+	.globl	_rdi
+	.zerofill __DATA,__bss,_rdi,8,3
+	.globl	_rsp
+	.zerofill __DATA,__bss,_rsp,8,3
+	.globl	_rbp
+	.zerofill __DATA,__bss,_rbp,8,3
+	.globl	_r8
+	.zerofill __DATA,__bss,_r8,8,3
+	.globl	_r9
+	.zerofill __DATA,__bss,_r9,8,3
+	.globl	_r10
+	.zerofill __DATA,__bss,_r10,8,3
+	.globl	_r11
+	.zerofill __DATA,__bss,_r11,8,3
+	.globl	_r12
+	.zerofill __DATA,__bss,_r12,8,3
+	.globl	_r13
+	.zerofill __DATA,__bss,_r13,8,3
+	.globl	_r14
+	.zerofill __DATA,__bss,_r14,8,3
+	.globl	_r15
+	.zerofill __DATA,__bss,_r15,8,3
+	.globl	_ymm_regs
+	.zerofill __DATA,__bss,_ymm_regs,512,5
+	.globl	_x87_regs
+	.zerofill __DATA,__bss,_x87_regs,128,5
+	.globl	_volatile_var
+	.zerofill __DATA,__bss,_volatile_var,8,3
+
+#else
+#error unknown object format
+#endif
+
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
index 64b24783833..f9710bae347 100644
--- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
@@ -51,8 +51,8 @@ typedef union {
 } X87_T;
 extern void (*callthis)(void);
 extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
-ZMM_T zmm_regs[32];
-X87_T x87_regs[8];
+extern ZMM_T zmm_regs[32];
+extern X87_T x87_regs[8];
 extern volatile unsigned long long volatile_var;
 extern void snapshot (void);
 extern void snapshot_ret (void);
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S
index 86d54d11c58..9dc6d173a61 100644
--- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S
@@ -1,3 +1,5 @@
+
+#ifdef __ELF__
 	.text
 	.p2align 4,,15
 .globl snapshot
@@ -98,3 +100,124 @@ snapshot_ret:
 #ifdef __linux__
 	.section	.note.GNU-stack,"",@progbits
 #endif
+
+#elif defined(__APPLE__)
+
+	.text
+	.p2align 4,,15
+	.globl _snapshot
+_snapshot:
+.LFB3:
+	movq	%rax, _rax(%rip)
+	movq	%rbx, _rbx(%rip)
+	movq	%rcx, _rcx(%rip)
+	movq	%rdx, _rdx(%rip)
+	movq	%rdi, _rdi(%rip)
+	movq	%rsi, _rsi(%rip)
+	movq	%rbp, _rbp(%rip)
+	movq	%rsp, _rsp(%rip)
+	movq	%r8, _r8(%rip)
+	movq	%r9, _r9(%rip)
+	movq	%r10, _r10(%rip)
+	movq	%r11, _r11(%rip)
+	movq	%r12, _r12(%rip)
+	movq	%r13, _r13(%rip)
+	movq	%r14, _r14(%rip)
+	movq	%r15, _r15(%rip)
+	vmovdqu32 %zmm0, _zmm_regs+0(%rip)
+	vmovdqu32 %zmm1, _zmm_regs+64(%rip)
+	vmovdqu32 %zmm2, _zmm_regs+128(%rip)
+	vmovdqu32 %zmm3, _zmm_regs+192(%rip)
+	vmovdqu32 %zmm4, _zmm_regs+256(%rip)
+	vmovdqu32 %zmm5, _zmm_regs+320(%rip)
+	vmovdqu32 %zmm6, _zmm_regs+384(%rip)
+	vmovdqu32 %zmm7, _zmm_regs+448(%rip)
+	vmovdqu32 %zmm8, _zmm_regs+512(%rip)
+	vmovdqu32 %zmm9, _zmm_regs+576(%rip)
+	vmovdqu32 %zmm10, _zmm_regs+640(%rip)
+	vmovdqu32 %zmm11, _zmm_regs+704(%rip)
+	vmovdqu32 %zmm12, _zmm_regs+768(%rip)
+	vmovdqu32 %zmm13, _zmm_regs+832(%rip)
+	vmovdqu32 %zmm14, _zmm_regs+896(%rip)
+	vmovdqu32 %zmm15, _zmm_regs+960(%rip)
+	vmovdqu32 %zmm16, _zmm_regs+1024(%rip)
+	vmovdqu32 %zmm17, _zmm_regs+1088(%rip)
+	vmovdqu32 %zmm18, _zmm_regs+1152(%rip)
+	vmovdqu32 %zmm19, _zmm_regs+1216(%rip)
+	vmovdqu32 %zmm20, _zmm_regs+1280(%rip)
+	vmovdqu32 %zmm21, _zmm_regs+1344(%rip)
+	vmovdqu32 %zmm22, _zmm_regs+1408(%rip)
+	vmovdqu32 %zmm23, _zmm_regs+1472(%rip)
+	vmovdqu32 %zmm24, _zmm_regs+1536(%rip)
+	vmovdqu32 %zmm25, _zmm_regs+1600(%rip)
+	vmovdqu32 %zmm26, _zmm_regs+1664(%rip)
+	vmovdqu32 %zmm27, _zmm_regs+1728(%rip)
+	vmovdqu32 %zmm28, _zmm_regs+1792(%rip)
+	vmovdqu32 %zmm29, _zmm_regs+1856(%rip)
+	vmovdqu32 %zmm30, _zmm_regs+1920(%rip)
+	vmovdqu32 %zmm31, _zmm_regs+1984(%rip)
+	jmp	*_callthis(%rip)
+.LFE3:
+
+	.p2align 4,,15
+	.globl _snapshot_ret
+_snapshot_ret:
+	movq	%rdi, _rdi(%rip)
+	subq	$8, %rsp
+	call	*_callthis(%rip)
+	addq	$8, %rsp
+	movq	%rax, _rax(%rip)
+	movq	%rdx, _rdx(%rip)
+	vmovdqu32	%zmm0, _zmm_regs+0(%rip)
+	vmovdqu32	%zmm1, _zmm_regs+64(%rip)
+	fstpt	_x87_regs(%rip)
+	fstpt	_x87_regs+16(%rip)
+	fldt	_x87_regs+16(%rip)
+	fldt	_x87_regs(%rip)
+	ret
+
+	.globl	_callthis
+	.zerofill __DATA,__bss,_callthis,8,3
+	.globl	_rax
+	.zerofill __DATA,__bss,_rax,8,3
+	.globl	_rbx
+	.zerofill __DATA,__bss,_rbx,8,3
+	.globl	_rcx
+	.zerofill __DATA,__bss,_rcx,8,3
+	.globl	_rdx
+	.zerofill __DATA,__bss,_rdx,8,3
+	.globl	_rsi
+	.zerofill __DATA,__bss,_rsi,8,3
+	.globl	_rdi
+	.zerofill __DATA,__bss,_rdi,8,3
+	.globl	_rsp
+	.zerofill __DATA,__bss,_rsp,8,3
+	.globl	_rbp
+	.zerofill __DATA,__bss,_rbp,8,3
+	.globl	_r8
+	.zerofill __DATA,__bss,_r8,8,3
+	.globl	_r9
+	.zerofill __DATA,__bss,_r9,8,3
+	.globl	_r10
+	.zerofill __DATA,__bss,_r10,8,3
+	.globl	_r11
+	.zerofill __DATA,__bss,_r11,8,3
+	.globl	_r12
+	.zerofill __DATA,__bss,_r12,8,3
+	.globl	_r13
+	.zerofill __DATA,__bss,_r13,8,3
+	.globl	_r14
+	.zerofill __DATA,__bss,_r14,8,3
+	.globl	_r15
+	.zerofill __DATA,__bss,_r15,8,3
+	.globl	_zmm_regs
+	.zerofill __DATA,__bss,_zmm_regs,2048,6
+	.globl	_x87_regs
+	.zerofill __DATA,__bss,_x87_regs,128,5
+	.globl	_volatile_var
+	.zerofill __DATA,__bss,_volatile_var,8,3
+
+#else
+#error unknown object format
+#endif
+
-- 
2.37.1 (Apple Git-137.1)


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] testsuite, X86, Darwin: Fix bf16 ABI tests for Mach-O/macOS ABI.
  2022-12-04 11:51 [PATCH] testsuite, X86, Darwin: Fix bf16 ABI tests for Mach-O/macOS ABI Iain Sandoe
@ 2022-12-04 20:20 ` Uros Bizjak
  2022-12-04 20:30   ` Iain Sandoe
  0 siblings, 1 reply; 8+ messages in thread
From: Uros Bizjak @ 2022-12-04 20:20 UTC (permalink / raw)
  To: iain; +Cc: gcc-patches, crazylht

On Sun, Dec 4, 2022 at 12:51 PM Iain Sandoe <iains.gcc@gmail.com> wrote:
>
> This is almost a completely Darwin-local patch, but there is one (repeated)
> place where a general change is needed - which is in making xmm_regs and
> x87_regs extern in the three copies of args.h (this is consistent with the
> other saved vars).  These fails represent most of the current testsuite noise
> on x86 Darwin.
>
> tested on x86-64 Darwin and Linux.
>
> OK for master?
> Iain
>
> -- >8 --
>
> These tests have failed since introduction since they assume that the
> assembler output is ELF and that the ABI targeted supports the addressing.
>
> For Darwin, Mach-O and ABI we need to make several changes:
> 1. Use the __USER_LABEL__PREFIX__
> 2. Remove the use of ELF-specific constructs (.size, .type etc.)
> 3. We cannot make direct access to common variables in the ABI, so that we
>    must move these to BSS.
>
> Since that set is quite significant, I elected to make a separate source
> section for Darwin.  This is introduced by #elif defined(__APPLE__) because
> __MACH__ is also used by HURD.
>
> There are potentially other X86 targets (e.g. XCOFF) that could have yet
> more changes, so I added a catchall section that #errors if the object format
> is neither ELF or Mach-O.
>
> Signed-off-by: Iain Sandoe <iain@sandoe.co.uk>
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/x86_64/abi/bf16/args.h:
>         * gcc.target/x86_64/abi/bf16/asm-support.S:
>         * gcc.target/x86_64/abi/bf16/m256bf16/args.h:
>         * gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S:
>         * gcc.target/x86_64/abi/bf16/m512bf16/args.h:
>         * gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S:

Missing descriptions in ChangeLog entry.

Uros.

> ---
>  .../gcc.target/x86_64/abi/bf16/args.h         |   4 +-
>  .../gcc.target/x86_64/abi/bf16/asm-support.S  | 105 +++++++++++++++
>  .../x86_64/abi/bf16/m256bf16/args.h           |   4 +-
>  .../x86_64/abi/bf16/m256bf16/asm-support.S    | 107 +++++++++++++++
>  .../x86_64/abi/bf16/m512bf16/args.h           |   4 +-
>  .../x86_64/abi/bf16/m512bf16/asm-support.S    | 123 ++++++++++++++++++
>  6 files changed, 341 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
> index 11d7e2b3a1c..95f9a394f2c 100644
> --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
> @@ -42,8 +42,8 @@ typedef union {
>  } X87_T;
>  extern void (*callthis)(void);
>  extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
> -XMM_T xmm_regs[16];
> -X87_T x87_regs[8];
> +extern XMM_T xmm_regs[16];
> +extern X87_T x87_regs[8];
>  extern volatile unsigned long long volatile_var;
>  extern void snapshot (void);
>  extern void snapshot_ret (void);
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
> index 7559aa910c4..331bf92d761 100644
> --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
> @@ -1,3 +1,5 @@
> +
> +#ifdef __ELF__
>         .text
>         .p2align 4,,15
>  .globl snapshot
> @@ -82,3 +84,106 @@ snapshot_ret:
>  #ifdef __linux__
>         .section        .note.GNU-stack,"",@progbits
>  #endif
> +
> +#elif defined(__APPLE__)
> +       .text
> +       .p2align 4,,15
> +       .globl _snapshot
> +_snapshot:
> +LFB3:
> +       movq    %rax, _rax(%rip)
> +       movq    %rbx, _rbx(%rip)
> +       movq    %rcx, _rcx(%rip)
> +       movq    %rdx, _rdx(%rip)
> +       movq    %rdi, _rdi(%rip)
> +       movq    %rsi, _rsi(%rip)
> +       movq    %rbp, _rbp(%rip)
> +       movq    %rsp, _rsp(%rip)
> +       movq    %r8, _r8(%rip)
> +       movq    %r9, _r9(%rip)
> +       movq    %r10, _r10(%rip)
> +       movq    %r11, _r11(%rip)
> +       movq    %r12, _r12(%rip)
> +       movq    %r13, _r13(%rip)
> +       movq    %r14, _r14(%rip)
> +       movq    %r15, _r15(%rip)
> +       movdqu  %xmm0, _xmm_regs+0(%rip)
> +       movdqu  %xmm1, _xmm_regs+16(%rip)
> +       movdqu  %xmm2, _xmm_regs+32(%rip)
> +       movdqu  %xmm3, _xmm_regs+48(%rip)
> +       movdqu  %xmm4, _xmm_regs+64(%rip)
> +       movdqu  %xmm5, _xmm_regs+80(%rip)
> +       movdqu  %xmm6, _xmm_regs+96(%rip)
> +       movdqu  %xmm7, _xmm_regs+112(%rip)
> +       movdqu  %xmm8, _xmm_regs+128(%rip)
> +       movdqu  %xmm9, _xmm_regs+144(%rip)
> +       movdqu  %xmm10, _xmm_regs+160(%rip)
> +       movdqu  %xmm11, _xmm_regs+176(%rip)
> +       movdqu  %xmm12, _xmm_regs+192(%rip)
> +       movdqu  %xmm13, _xmm_regs+208(%rip)
> +       movdqu  %xmm14, _xmm_regs+224(%rip)
> +       movdqu  %xmm15, _xmm_regs+240(%rip)
> +       jmp     *_callthis(%rip)
> +LFE3:
> +
> +       .p2align 4,,15
> +       .globl _snapshot_ret
> +_snapshot_ret:
> +       movq    %rdi, _rdi(%rip)
> +       subq    $8, %rsp
> +       call    *_callthis(%rip)
> +       addq    $8, %rsp
> +       movq    %rax, _rax(%rip)
> +       movq    %rdx, _rdx(%rip)
> +       movdqu  %xmm0, _xmm_regs+0(%rip)
> +       movdqu  %xmm1, _xmm_regs+16(%rip)
> +       fstpt   _x87_regs(%rip)
> +       fstpt   _x87_regs+16(%rip)
> +       fldt    _x87_regs+16(%rip)
> +       fldt    _x87_regs(%rip)
> +       ret
> +
> +       .globl  _callthis
> +       .zerofill __DATA,__bss,_callthis,8,3
> +       .globl  _rax
> +       .zerofill __DATA,__bss,_rax,8,3
> +       .globl  _rbx
> +       .zerofill __DATA,__bss,_rbx,8,3
> +       .globl  _rcx
> +       .zerofill __DATA,__bss,_rcx,8,3
> +       .globl  _rdx
> +       .zerofill __DATA,__bss,_rdx,8,3
> +       .globl  _rsi
> +       .zerofill __DATA,__bss,_rsi,8,3
> +       .globl  _rdi
> +       .zerofill __DATA,__bss,_rdi,8,3
> +       .globl  _rsp
> +       .zerofill __DATA,__bss,_rsp,8,3
> +       .globl  _rbp
> +       .zerofill __DATA,__bss,_rbp,8,3
> +       .globl  _r8
> +       .zerofill __DATA,__bss,_r8,8,3
> +       .globl  _r9
> +       .zerofill __DATA,__bss,_r9,8,3
> +       .globl  _r10
> +       .zerofill __DATA,__bss,_r10,8,3
> +       .globl  _r11
> +       .zerofill __DATA,__bss,_r11,8,3
> +       .globl  _r12
> +       .zerofill __DATA,__bss,_r12,8,3
> +       .globl  _r13
> +       .zerofill __DATA,__bss,_r13,8,3
> +       .globl  _r14
> +       .zerofill __DATA,__bss,_r14,8,3
> +       .globl  _r15
> +       .zerofill __DATA,__bss,_r15,8,3
> +       .globl  _xmm_regs
> +       .zerofill __DATA,__bss,_xmm_regs,256,5
> +       .globl  _x87_regs
> +       .zerofill __DATA,__bss,_x87_regs,128,5
> +       .globl  _volatile_var
> +       .zerofill __DATA,__bss,_volatile_var,8,3
> +
> +#else
> +#error unknown object format
> +#endif
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
> index 94627ffbd44..1027742cbb2 100644
> --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
> @@ -50,8 +50,8 @@ typedef union {
>  } X87_T;
>  extern void (*callthis)(void);
>  extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
> -YMM_T ymm_regs[16];
> -X87_T x87_regs[8];
> +extern YMM_T ymm_regs[16];
> +extern X87_T x87_regs[8];
>  extern volatile unsigned long long volatile_var;
>  extern void snapshot (void);
>  extern void snapshot_ret (void);
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S
> index 24c8b3c9023..171654aa4db 100644
> --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S
> @@ -1,3 +1,5 @@
> +
> +#ifdef __ELF__
>         .text
>         .p2align 4,,15
>  .globl snapshot
> @@ -82,3 +84,108 @@ snapshot_ret:
>  #ifdef __linux__
>         .section        .note.GNU-stack,"",@progbits
>  #endif
> +
> +#elif defined(__APPLE__)
> +
> +       .text
> +       .p2align 4,,15
> +       .globl _snapshot
> +_snapshot:
> +.LFB3:
> +       movq    %rax, _rax(%rip)
> +       movq    %rbx, _rbx(%rip)
> +       movq    %rcx, _rcx(%rip)
> +       movq    %rdx, _rdx(%rip)
> +       movq    %rdi, _rdi(%rip)
> +       movq    %rsi, _rsi(%rip)
> +       movq    %rbp, _rbp(%rip)
> +       movq    %rsp, _rsp(%rip)
> +       movq    %r8, _r8(%rip)
> +       movq    %r9, _r9(%rip)
> +       movq    %r10, _r10(%rip)
> +       movq    %r11, _r11(%rip)
> +       movq    %r12, _r12(%rip)
> +       movq    %r13, _r13(%rip)
> +       movq    %r14, _r14(%rip)
> +       movq    %r15, _r15(%rip)
> +       vmovdqu %ymm0, _ymm_regs+0(%rip)
> +       vmovdqu %ymm1, _ymm_regs+32(%rip)
> +       vmovdqu %ymm2, _ymm_regs+64(%rip)
> +       vmovdqu %ymm3, _ymm_regs+96(%rip)
> +       vmovdqu %ymm4, _ymm_regs+128(%rip)
> +       vmovdqu %ymm5, _ymm_regs+160(%rip)
> +       vmovdqu %ymm6, _ymm_regs+192(%rip)
> +       vmovdqu %ymm7, _ymm_regs+224(%rip)
> +       vmovdqu %ymm8, _ymm_regs+256(%rip)
> +       vmovdqu %ymm9, _ymm_regs+288(%rip)
> +       vmovdqu %ymm10, _ymm_regs+320(%rip)
> +       vmovdqu %ymm11, _ymm_regs+352(%rip)
> +       vmovdqu %ymm12, _ymm_regs+384(%rip)
> +       vmovdqu %ymm13, _ymm_regs+416(%rip)
> +       vmovdqu %ymm14, _ymm_regs+448(%rip)
> +       vmovdqu %ymm15, _ymm_regs+480(%rip)
> +       jmp     *_callthis(%rip)
> +.LFE3:
> +
> +       .p2align 4,,15
> +       .globl _snapshot_ret
> +_snapshot_ret:
> +       movq    %rdi, _rdi(%rip)
> +       subq    $8, %rsp
> +       call    *_callthis(%rip)
> +       addq    $8, %rsp
> +       movq    %rax, _rax(%rip)
> +       movq    %rdx, _rdx(%rip)
> +       vmovdqu %ymm0, _ymm_regs+0(%rip)
> +       vmovdqu %ymm1, _ymm_regs+32(%rip)
> +       fstpt   _x87_regs(%rip)
> +       fstpt   _x87_regs+16(%rip)
> +       fldt    _x87_regs+16(%rip)
> +       fldt    _x87_regs(%rip)
> +       ret
> +
> +       .globl  _callthis
> +       .zerofill __DATA,__bss,_callthis,8,3
> +       .globl  _rax
> +       .zerofill __DATA,__bss,_rax,8,3
> +       .globl  _rbx
> +       .zerofill __DATA,__bss,_rbx,8,3
> +       .globl  _rcx
> +       .zerofill __DATA,__bss,_rcx,8,3
> +       .globl  _rdx
> +       .zerofill __DATA,__bss,_rdx,8,3
> +       .globl  _rsi
> +       .zerofill __DATA,__bss,_rsi,8,3
> +       .globl  _rdi
> +       .zerofill __DATA,__bss,_rdi,8,3
> +       .globl  _rsp
> +       .zerofill __DATA,__bss,_rsp,8,3
> +       .globl  _rbp
> +       .zerofill __DATA,__bss,_rbp,8,3
> +       .globl  _r8
> +       .zerofill __DATA,__bss,_r8,8,3
> +       .globl  _r9
> +       .zerofill __DATA,__bss,_r9,8,3
> +       .globl  _r10
> +       .zerofill __DATA,__bss,_r10,8,3
> +       .globl  _r11
> +       .zerofill __DATA,__bss,_r11,8,3
> +       .globl  _r12
> +       .zerofill __DATA,__bss,_r12,8,3
> +       .globl  _r13
> +       .zerofill __DATA,__bss,_r13,8,3
> +       .globl  _r14
> +       .zerofill __DATA,__bss,_r14,8,3
> +       .globl  _r15
> +       .zerofill __DATA,__bss,_r15,8,3
> +       .globl  _ymm_regs
> +       .zerofill __DATA,__bss,_ymm_regs,512,5
> +       .globl  _x87_regs
> +       .zerofill __DATA,__bss,_x87_regs,128,5
> +       .globl  _volatile_var
> +       .zerofill __DATA,__bss,_volatile_var,8,3
> +
> +#else
> +#error unknown object format
> +#endif
> +
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
> index 64b24783833..f9710bae347 100644
> --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
> @@ -51,8 +51,8 @@ typedef union {
>  } X87_T;
>  extern void (*callthis)(void);
>  extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
> -ZMM_T zmm_regs[32];
> -X87_T x87_regs[8];
> +extern ZMM_T zmm_regs[32];
> +extern X87_T x87_regs[8];
>  extern volatile unsigned long long volatile_var;
>  extern void snapshot (void);
>  extern void snapshot_ret (void);
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S
> index 86d54d11c58..9dc6d173a61 100644
> --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S
> @@ -1,3 +1,5 @@
> +
> +#ifdef __ELF__
>         .text
>         .p2align 4,,15
>  .globl snapshot
> @@ -98,3 +100,124 @@ snapshot_ret:
>  #ifdef __linux__
>         .section        .note.GNU-stack,"",@progbits
>  #endif
> +
> +#elif defined(__APPLE__)
> +
> +       .text
> +       .p2align 4,,15
> +       .globl _snapshot
> +_snapshot:
> +.LFB3:
> +       movq    %rax, _rax(%rip)
> +       movq    %rbx, _rbx(%rip)
> +       movq    %rcx, _rcx(%rip)
> +       movq    %rdx, _rdx(%rip)
> +       movq    %rdi, _rdi(%rip)
> +       movq    %rsi, _rsi(%rip)
> +       movq    %rbp, _rbp(%rip)
> +       movq    %rsp, _rsp(%rip)
> +       movq    %r8, _r8(%rip)
> +       movq    %r9, _r9(%rip)
> +       movq    %r10, _r10(%rip)
> +       movq    %r11, _r11(%rip)
> +       movq    %r12, _r12(%rip)
> +       movq    %r13, _r13(%rip)
> +       movq    %r14, _r14(%rip)
> +       movq    %r15, _r15(%rip)
> +       vmovdqu32 %zmm0, _zmm_regs+0(%rip)
> +       vmovdqu32 %zmm1, _zmm_regs+64(%rip)
> +       vmovdqu32 %zmm2, _zmm_regs+128(%rip)
> +       vmovdqu32 %zmm3, _zmm_regs+192(%rip)
> +       vmovdqu32 %zmm4, _zmm_regs+256(%rip)
> +       vmovdqu32 %zmm5, _zmm_regs+320(%rip)
> +       vmovdqu32 %zmm6, _zmm_regs+384(%rip)
> +       vmovdqu32 %zmm7, _zmm_regs+448(%rip)
> +       vmovdqu32 %zmm8, _zmm_regs+512(%rip)
> +       vmovdqu32 %zmm9, _zmm_regs+576(%rip)
> +       vmovdqu32 %zmm10, _zmm_regs+640(%rip)
> +       vmovdqu32 %zmm11, _zmm_regs+704(%rip)
> +       vmovdqu32 %zmm12, _zmm_regs+768(%rip)
> +       vmovdqu32 %zmm13, _zmm_regs+832(%rip)
> +       vmovdqu32 %zmm14, _zmm_regs+896(%rip)
> +       vmovdqu32 %zmm15, _zmm_regs+960(%rip)
> +       vmovdqu32 %zmm16, _zmm_regs+1024(%rip)
> +       vmovdqu32 %zmm17, _zmm_regs+1088(%rip)
> +       vmovdqu32 %zmm18, _zmm_regs+1152(%rip)
> +       vmovdqu32 %zmm19, _zmm_regs+1216(%rip)
> +       vmovdqu32 %zmm20, _zmm_regs+1280(%rip)
> +       vmovdqu32 %zmm21, _zmm_regs+1344(%rip)
> +       vmovdqu32 %zmm22, _zmm_regs+1408(%rip)
> +       vmovdqu32 %zmm23, _zmm_regs+1472(%rip)
> +       vmovdqu32 %zmm24, _zmm_regs+1536(%rip)
> +       vmovdqu32 %zmm25, _zmm_regs+1600(%rip)
> +       vmovdqu32 %zmm26, _zmm_regs+1664(%rip)
> +       vmovdqu32 %zmm27, _zmm_regs+1728(%rip)
> +       vmovdqu32 %zmm28, _zmm_regs+1792(%rip)
> +       vmovdqu32 %zmm29, _zmm_regs+1856(%rip)
> +       vmovdqu32 %zmm30, _zmm_regs+1920(%rip)
> +       vmovdqu32 %zmm31, _zmm_regs+1984(%rip)
> +       jmp     *_callthis(%rip)
> +.LFE3:
> +
> +       .p2align 4,,15
> +       .globl _snapshot_ret
> +_snapshot_ret:
> +       movq    %rdi, _rdi(%rip)
> +       subq    $8, %rsp
> +       call    *_callthis(%rip)
> +       addq    $8, %rsp
> +       movq    %rax, _rax(%rip)
> +       movq    %rdx, _rdx(%rip)
> +       vmovdqu32       %zmm0, _zmm_regs+0(%rip)
> +       vmovdqu32       %zmm1, _zmm_regs+64(%rip)
> +       fstpt   _x87_regs(%rip)
> +       fstpt   _x87_regs+16(%rip)
> +       fldt    _x87_regs+16(%rip)
> +       fldt    _x87_regs(%rip)
> +       ret
> +
> +       .globl  _callthis
> +       .zerofill __DATA,__bss,_callthis,8,3
> +       .globl  _rax
> +       .zerofill __DATA,__bss,_rax,8,3
> +       .globl  _rbx
> +       .zerofill __DATA,__bss,_rbx,8,3
> +       .globl  _rcx
> +       .zerofill __DATA,__bss,_rcx,8,3
> +       .globl  _rdx
> +       .zerofill __DATA,__bss,_rdx,8,3
> +       .globl  _rsi
> +       .zerofill __DATA,__bss,_rsi,8,3
> +       .globl  _rdi
> +       .zerofill __DATA,__bss,_rdi,8,3
> +       .globl  _rsp
> +       .zerofill __DATA,__bss,_rsp,8,3
> +       .globl  _rbp
> +       .zerofill __DATA,__bss,_rbp,8,3
> +       .globl  _r8
> +       .zerofill __DATA,__bss,_r8,8,3
> +       .globl  _r9
> +       .zerofill __DATA,__bss,_r9,8,3
> +       .globl  _r10
> +       .zerofill __DATA,__bss,_r10,8,3
> +       .globl  _r11
> +       .zerofill __DATA,__bss,_r11,8,3
> +       .globl  _r12
> +       .zerofill __DATA,__bss,_r12,8,3
> +       .globl  _r13
> +       .zerofill __DATA,__bss,_r13,8,3
> +       .globl  _r14
> +       .zerofill __DATA,__bss,_r14,8,3
> +       .globl  _r15
> +       .zerofill __DATA,__bss,_r15,8,3
> +       .globl  _zmm_regs
> +       .zerofill __DATA,__bss,_zmm_regs,2048,6
> +       .globl  _x87_regs
> +       .zerofill __DATA,__bss,_x87_regs,128,5
> +       .globl  _volatile_var
> +       .zerofill __DATA,__bss,_volatile_var,8,3
> +
> +#else
> +#error unknown object format
> +#endif
> +
> --
> 2.37.1 (Apple Git-137.1)
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] testsuite, X86, Darwin: Fix bf16 ABI tests for Mach-O/macOS ABI.
  2022-12-04 20:20 ` Uros Bizjak
@ 2022-12-04 20:30   ` Iain Sandoe
  2022-12-05 10:37     ` Uros Bizjak
  0 siblings, 1 reply; 8+ messages in thread
From: Iain Sandoe @ 2022-12-04 20:30 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: GCC Patches, crazylht



> On 4 Dec 2022, at 20:20, Uros Bizjak via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
> 
> On Sun, Dec 4, 2022 at 12:51 PM Iain Sandoe <iains.gcc@gmail.com> wrote:
>> 
>> This is almost a completely Darwin-local patch, but there is one (repeated)
>> place where a general change is needed - which is in making xmm_regs and
>> x87_regs extern in the three copies of args.h (this is consistent with the
>> other saved vars).  These fails represent most of the current testsuite noise
>> on x86 Darwin.
>> 
>> tested on x86-64 Darwin and Linux.
>> 
>> OK for master?
>> Iain
>> 
>> -- >8 --
>> 
>> These tests have failed since introduction since they assume that the
>> assembler output is ELF and that the ABI targeted supports the addressing.
>> 
>> For Darwin, Mach-O and ABI we need to make several changes:
>> 1. Use the __USER_LABEL__PREFIX__
>> 2. Remove the use of ELF-specific constructs (.size, .type etc.)
>> 3. We cannot make direct access to common variables in the ABI, so that we
>>   must move these to BSS.
>> 
>> Since that set is quite significant, I elected to make a separate source
>> section for Darwin.  This is introduced by #elif defined(__APPLE__) because
>> __MACH__ is also used by HURD.
>> 
>> There are potentially other X86 targets (e.g. XCOFF) that could have yet
>> more changes, so I added a catchall section that #errors if the object format
>> is neither ELF or Mach-O.
>> 
>> Signed-off-by: Iain Sandoe <iain@sandoe.co.uk>
>> 
>> gcc/testsuite/ChangeLog:
>> 
>>        * gcc.target/x86_64/abi/bf16/args.h:
>>        * gcc.target/x86_64/abi/bf16/asm-support.S:
>>        * gcc.target/x86_64/abi/bf16/m256bf16/args.h:
>>        * gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S:
>>        * gcc.target/x86_64/abi/bf16/m512bf16/args.h:
>>        * gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S:
> 
> Missing descriptions in ChangeLog entry.

oops, here:

gcc/testsuite/ChangeLog:

	* gcc.target/x86_64/abi/bf16/args.h: Make xmm_regs, x87_regs extern.
	* gcc.target/x86_64/abi/bf16/m256bf16/args.h: Likewise.
	* gcc.target/x86_64/abi/bf16/m512bf16/args.h: Likewise.
	* gcc.target/x86_64/abi/bf16/asm-support.S: Add Mach-O variant.
	* gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S: Likewise.
	* gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S: Likewise.

Iain.

> 

> Uros.
> 
>> ---
>> .../gcc.target/x86_64/abi/bf16/args.h         |   4 +-
>> .../gcc.target/x86_64/abi/bf16/asm-support.S  | 105 +++++++++++++++
>> .../x86_64/abi/bf16/m256bf16/args.h           |   4 +-
>> .../x86_64/abi/bf16/m256bf16/asm-support.S    | 107 +++++++++++++++
>> .../x86_64/abi/bf16/m512bf16/args.h           |   4 +-
>> .../x86_64/abi/bf16/m512bf16/asm-support.S    | 123 ++++++++++++++++++
>> 6 files changed, 341 insertions(+), 6 deletions(-)
>> 
>> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
>> index 11d7e2b3a1c..95f9a394f2c 100644
>> --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
>> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
>> @@ -42,8 +42,8 @@ typedef union {
>> } X87_T;
>> extern void (*callthis)(void);
>> extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
>> -XMM_T xmm_regs[16];
>> -X87_T x87_regs[8];
>> +extern XMM_T xmm_regs[16];
>> +extern X87_T x87_regs[8];
>> extern volatile unsigned long long volatile_var;
>> extern void snapshot (void);
>> extern void snapshot_ret (void);
>> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
>> index 7559aa910c4..331bf92d761 100644
>> --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
>> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
>> @@ -1,3 +1,5 @@
>> +
>> +#ifdef __ELF__
>>        .text
>>        .p2align 4,,15
>> .globl snapshot
>> @@ -82,3 +84,106 @@ snapshot_ret:
>> #ifdef __linux__
>>        .section        .note.GNU-stack,"",@progbits
>> #endif
>> +
>> +#elif defined(__APPLE__)
>> +       .text
>> +       .p2align 4,,15
>> +       .globl _snapshot
>> +_snapshot:
>> +LFB3:
>> +       movq    %rax, _rax(%rip)
>> +       movq    %rbx, _rbx(%rip)
>> +       movq    %rcx, _rcx(%rip)
>> +       movq    %rdx, _rdx(%rip)
>> +       movq    %rdi, _rdi(%rip)
>> +       movq    %rsi, _rsi(%rip)
>> +       movq    %rbp, _rbp(%rip)
>> +       movq    %rsp, _rsp(%rip)
>> +       movq    %r8, _r8(%rip)
>> +       movq    %r9, _r9(%rip)
>> +       movq    %r10, _r10(%rip)
>> +       movq    %r11, _r11(%rip)
>> +       movq    %r12, _r12(%rip)
>> +       movq    %r13, _r13(%rip)
>> +       movq    %r14, _r14(%rip)
>> +       movq    %r15, _r15(%rip)
>> +       movdqu  %xmm0, _xmm_regs+0(%rip)
>> +       movdqu  %xmm1, _xmm_regs+16(%rip)
>> +       movdqu  %xmm2, _xmm_regs+32(%rip)
>> +       movdqu  %xmm3, _xmm_regs+48(%rip)
>> +       movdqu  %xmm4, _xmm_regs+64(%rip)
>> +       movdqu  %xmm5, _xmm_regs+80(%rip)
>> +       movdqu  %xmm6, _xmm_regs+96(%rip)
>> +       movdqu  %xmm7, _xmm_regs+112(%rip)
>> +       movdqu  %xmm8, _xmm_regs+128(%rip)
>> +       movdqu  %xmm9, _xmm_regs+144(%rip)
>> +       movdqu  %xmm10, _xmm_regs+160(%rip)
>> +       movdqu  %xmm11, _xmm_regs+176(%rip)
>> +       movdqu  %xmm12, _xmm_regs+192(%rip)
>> +       movdqu  %xmm13, _xmm_regs+208(%rip)
>> +       movdqu  %xmm14, _xmm_regs+224(%rip)
>> +       movdqu  %xmm15, _xmm_regs+240(%rip)
>> +       jmp     *_callthis(%rip)
>> +LFE3:
>> +
>> +       .p2align 4,,15
>> +       .globl _snapshot_ret
>> +_snapshot_ret:
>> +       movq    %rdi, _rdi(%rip)
>> +       subq    $8, %rsp
>> +       call    *_callthis(%rip)
>> +       addq    $8, %rsp
>> +       movq    %rax, _rax(%rip)
>> +       movq    %rdx, _rdx(%rip)
>> +       movdqu  %xmm0, _xmm_regs+0(%rip)
>> +       movdqu  %xmm1, _xmm_regs+16(%rip)
>> +       fstpt   _x87_regs(%rip)
>> +       fstpt   _x87_regs+16(%rip)
>> +       fldt    _x87_regs+16(%rip)
>> +       fldt    _x87_regs(%rip)
>> +       ret
>> +
>> +       .globl  _callthis
>> +       .zerofill __DATA,__bss,_callthis,8,3
>> +       .globl  _rax
>> +       .zerofill __DATA,__bss,_rax,8,3
>> +       .globl  _rbx
>> +       .zerofill __DATA,__bss,_rbx,8,3
>> +       .globl  _rcx
>> +       .zerofill __DATA,__bss,_rcx,8,3
>> +       .globl  _rdx
>> +       .zerofill __DATA,__bss,_rdx,8,3
>> +       .globl  _rsi
>> +       .zerofill __DATA,__bss,_rsi,8,3
>> +       .globl  _rdi
>> +       .zerofill __DATA,__bss,_rdi,8,3
>> +       .globl  _rsp
>> +       .zerofill __DATA,__bss,_rsp,8,3
>> +       .globl  _rbp
>> +       .zerofill __DATA,__bss,_rbp,8,3
>> +       .globl  _r8
>> +       .zerofill __DATA,__bss,_r8,8,3
>> +       .globl  _r9
>> +       .zerofill __DATA,__bss,_r9,8,3
>> +       .globl  _r10
>> +       .zerofill __DATA,__bss,_r10,8,3
>> +       .globl  _r11
>> +       .zerofill __DATA,__bss,_r11,8,3
>> +       .globl  _r12
>> +       .zerofill __DATA,__bss,_r12,8,3
>> +       .globl  _r13
>> +       .zerofill __DATA,__bss,_r13,8,3
>> +       .globl  _r14
>> +       .zerofill __DATA,__bss,_r14,8,3
>> +       .globl  _r15
>> +       .zerofill __DATA,__bss,_r15,8,3
>> +       .globl  _xmm_regs
>> +       .zerofill __DATA,__bss,_xmm_regs,256,5
>> +       .globl  _x87_regs
>> +       .zerofill __DATA,__bss,_x87_regs,128,5
>> +       .globl  _volatile_var
>> +       .zerofill __DATA,__bss,_volatile_var,8,3
>> +
>> +#else
>> +#error unknown object format
>> +#endif
>> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
>> index 94627ffbd44..1027742cbb2 100644
>> --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
>> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
>> @@ -50,8 +50,8 @@ typedef union {
>> } X87_T;
>> extern void (*callthis)(void);
>> extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
>> -YMM_T ymm_regs[16];
>> -X87_T x87_regs[8];
>> +extern YMM_T ymm_regs[16];
>> +extern X87_T x87_regs[8];
>> extern volatile unsigned long long volatile_var;
>> extern void snapshot (void);
>> extern void snapshot_ret (void);
>> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S
>> index 24c8b3c9023..171654aa4db 100644
>> --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S
>> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S
>> @@ -1,3 +1,5 @@
>> +
>> +#ifdef __ELF__
>>        .text
>>        .p2align 4,,15
>> .globl snapshot
>> @@ -82,3 +84,108 @@ snapshot_ret:
>> #ifdef __linux__
>>        .section        .note.GNU-stack,"",@progbits
>> #endif
>> +
>> +#elif defined(__APPLE__)
>> +
>> +       .text
>> +       .p2align 4,,15
>> +       .globl _snapshot
>> +_snapshot:
>> +.LFB3:
>> +       movq    %rax, _rax(%rip)
>> +       movq    %rbx, _rbx(%rip)
>> +       movq    %rcx, _rcx(%rip)
>> +       movq    %rdx, _rdx(%rip)
>> +       movq    %rdi, _rdi(%rip)
>> +       movq    %rsi, _rsi(%rip)
>> +       movq    %rbp, _rbp(%rip)
>> +       movq    %rsp, _rsp(%rip)
>> +       movq    %r8, _r8(%rip)
>> +       movq    %r9, _r9(%rip)
>> +       movq    %r10, _r10(%rip)
>> +       movq    %r11, _r11(%rip)
>> +       movq    %r12, _r12(%rip)
>> +       movq    %r13, _r13(%rip)
>> +       movq    %r14, _r14(%rip)
>> +       movq    %r15, _r15(%rip)
>> +       vmovdqu %ymm0, _ymm_regs+0(%rip)
>> +       vmovdqu %ymm1, _ymm_regs+32(%rip)
>> +       vmovdqu %ymm2, _ymm_regs+64(%rip)
>> +       vmovdqu %ymm3, _ymm_regs+96(%rip)
>> +       vmovdqu %ymm4, _ymm_regs+128(%rip)
>> +       vmovdqu %ymm5, _ymm_regs+160(%rip)
>> +       vmovdqu %ymm6, _ymm_regs+192(%rip)
>> +       vmovdqu %ymm7, _ymm_regs+224(%rip)
>> +       vmovdqu %ymm8, _ymm_regs+256(%rip)
>> +       vmovdqu %ymm9, _ymm_regs+288(%rip)
>> +       vmovdqu %ymm10, _ymm_regs+320(%rip)
>> +       vmovdqu %ymm11, _ymm_regs+352(%rip)
>> +       vmovdqu %ymm12, _ymm_regs+384(%rip)
>> +       vmovdqu %ymm13, _ymm_regs+416(%rip)
>> +       vmovdqu %ymm14, _ymm_regs+448(%rip)
>> +       vmovdqu %ymm15, _ymm_regs+480(%rip)
>> +       jmp     *_callthis(%rip)
>> +.LFE3:
>> +
>> +       .p2align 4,,15
>> +       .globl _snapshot_ret
>> +_snapshot_ret:
>> +       movq    %rdi, _rdi(%rip)
>> +       subq    $8, %rsp
>> +       call    *_callthis(%rip)
>> +       addq    $8, %rsp
>> +       movq    %rax, _rax(%rip)
>> +       movq    %rdx, _rdx(%rip)
>> +       vmovdqu %ymm0, _ymm_regs+0(%rip)
>> +       vmovdqu %ymm1, _ymm_regs+32(%rip)
>> +       fstpt   _x87_regs(%rip)
>> +       fstpt   _x87_regs+16(%rip)
>> +       fldt    _x87_regs+16(%rip)
>> +       fldt    _x87_regs(%rip)
>> +       ret
>> +
>> +       .globl  _callthis
>> +       .zerofill __DATA,__bss,_callthis,8,3
>> +       .globl  _rax
>> +       .zerofill __DATA,__bss,_rax,8,3
>> +       .globl  _rbx
>> +       .zerofill __DATA,__bss,_rbx,8,3
>> +       .globl  _rcx
>> +       .zerofill __DATA,__bss,_rcx,8,3
>> +       .globl  _rdx
>> +       .zerofill __DATA,__bss,_rdx,8,3
>> +       .globl  _rsi
>> +       .zerofill __DATA,__bss,_rsi,8,3
>> +       .globl  _rdi
>> +       .zerofill __DATA,__bss,_rdi,8,3
>> +       .globl  _rsp
>> +       .zerofill __DATA,__bss,_rsp,8,3
>> +       .globl  _rbp
>> +       .zerofill __DATA,__bss,_rbp,8,3
>> +       .globl  _r8
>> +       .zerofill __DATA,__bss,_r8,8,3
>> +       .globl  _r9
>> +       .zerofill __DATA,__bss,_r9,8,3
>> +       .globl  _r10
>> +       .zerofill __DATA,__bss,_r10,8,3
>> +       .globl  _r11
>> +       .zerofill __DATA,__bss,_r11,8,3
>> +       .globl  _r12
>> +       .zerofill __DATA,__bss,_r12,8,3
>> +       .globl  _r13
>> +       .zerofill __DATA,__bss,_r13,8,3
>> +       .globl  _r14
>> +       .zerofill __DATA,__bss,_r14,8,3
>> +       .globl  _r15
>> +       .zerofill __DATA,__bss,_r15,8,3
>> +       .globl  _ymm_regs
>> +       .zerofill __DATA,__bss,_ymm_regs,512,5
>> +       .globl  _x87_regs
>> +       .zerofill __DATA,__bss,_x87_regs,128,5
>> +       .globl  _volatile_var
>> +       .zerofill __DATA,__bss,_volatile_var,8,3
>> +
>> +#else
>> +#error unknown object format
>> +#endif
>> +
>> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
>> index 64b24783833..f9710bae347 100644
>> --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
>> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
>> @@ -51,8 +51,8 @@ typedef union {
>> } X87_T;
>> extern void (*callthis)(void);
>> extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
>> -ZMM_T zmm_regs[32];
>> -X87_T x87_regs[8];
>> +extern ZMM_T zmm_regs[32];
>> +extern X87_T x87_regs[8];
>> extern volatile unsigned long long volatile_var;
>> extern void snapshot (void);
>> extern void snapshot_ret (void);
>> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S
>> index 86d54d11c58..9dc6d173a61 100644
>> --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S
>> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S
>> @@ -1,3 +1,5 @@
>> +
>> +#ifdef __ELF__
>>        .text
>>        .p2align 4,,15
>> .globl snapshot
>> @@ -98,3 +100,124 @@ snapshot_ret:
>> #ifdef __linux__
>>        .section        .note.GNU-stack,"",@progbits
>> #endif
>> +
>> +#elif defined(__APPLE__)
>> +
>> +       .text
>> +       .p2align 4,,15
>> +       .globl _snapshot
>> +_snapshot:
>> +.LFB3:
>> +       movq    %rax, _rax(%rip)
>> +       movq    %rbx, _rbx(%rip)
>> +       movq    %rcx, _rcx(%rip)
>> +       movq    %rdx, _rdx(%rip)
>> +       movq    %rdi, _rdi(%rip)
>> +       movq    %rsi, _rsi(%rip)
>> +       movq    %rbp, _rbp(%rip)
>> +       movq    %rsp, _rsp(%rip)
>> +       movq    %r8, _r8(%rip)
>> +       movq    %r9, _r9(%rip)
>> +       movq    %r10, _r10(%rip)
>> +       movq    %r11, _r11(%rip)
>> +       movq    %r12, _r12(%rip)
>> +       movq    %r13, _r13(%rip)
>> +       movq    %r14, _r14(%rip)
>> +       movq    %r15, _r15(%rip)
>> +       vmovdqu32 %zmm0, _zmm_regs+0(%rip)
>> +       vmovdqu32 %zmm1, _zmm_regs+64(%rip)
>> +       vmovdqu32 %zmm2, _zmm_regs+128(%rip)
>> +       vmovdqu32 %zmm3, _zmm_regs+192(%rip)
>> +       vmovdqu32 %zmm4, _zmm_regs+256(%rip)
>> +       vmovdqu32 %zmm5, _zmm_regs+320(%rip)
>> +       vmovdqu32 %zmm6, _zmm_regs+384(%rip)
>> +       vmovdqu32 %zmm7, _zmm_regs+448(%rip)
>> +       vmovdqu32 %zmm8, _zmm_regs+512(%rip)
>> +       vmovdqu32 %zmm9, _zmm_regs+576(%rip)
>> +       vmovdqu32 %zmm10, _zmm_regs+640(%rip)
>> +       vmovdqu32 %zmm11, _zmm_regs+704(%rip)
>> +       vmovdqu32 %zmm12, _zmm_regs+768(%rip)
>> +       vmovdqu32 %zmm13, _zmm_regs+832(%rip)
>> +       vmovdqu32 %zmm14, _zmm_regs+896(%rip)
>> +       vmovdqu32 %zmm15, _zmm_regs+960(%rip)
>> +       vmovdqu32 %zmm16, _zmm_regs+1024(%rip)
>> +       vmovdqu32 %zmm17, _zmm_regs+1088(%rip)
>> +       vmovdqu32 %zmm18, _zmm_regs+1152(%rip)
>> +       vmovdqu32 %zmm19, _zmm_regs+1216(%rip)
>> +       vmovdqu32 %zmm20, _zmm_regs+1280(%rip)
>> +       vmovdqu32 %zmm21, _zmm_regs+1344(%rip)
>> +       vmovdqu32 %zmm22, _zmm_regs+1408(%rip)
>> +       vmovdqu32 %zmm23, _zmm_regs+1472(%rip)
>> +       vmovdqu32 %zmm24, _zmm_regs+1536(%rip)
>> +       vmovdqu32 %zmm25, _zmm_regs+1600(%rip)
>> +       vmovdqu32 %zmm26, _zmm_regs+1664(%rip)
>> +       vmovdqu32 %zmm27, _zmm_regs+1728(%rip)
>> +       vmovdqu32 %zmm28, _zmm_regs+1792(%rip)
>> +       vmovdqu32 %zmm29, _zmm_regs+1856(%rip)
>> +       vmovdqu32 %zmm30, _zmm_regs+1920(%rip)
>> +       vmovdqu32 %zmm31, _zmm_regs+1984(%rip)
>> +       jmp     *_callthis(%rip)
>> +.LFE3:
>> +
>> +       .p2align 4,,15
>> +       .globl _snapshot_ret
>> +_snapshot_ret:
>> +       movq    %rdi, _rdi(%rip)
>> +       subq    $8, %rsp
>> +       call    *_callthis(%rip)
>> +       addq    $8, %rsp
>> +       movq    %rax, _rax(%rip)
>> +       movq    %rdx, _rdx(%rip)
>> +       vmovdqu32       %zmm0, _zmm_regs+0(%rip)
>> +       vmovdqu32       %zmm1, _zmm_regs+64(%rip)
>> +       fstpt   _x87_regs(%rip)
>> +       fstpt   _x87_regs+16(%rip)
>> +       fldt    _x87_regs+16(%rip)
>> +       fldt    _x87_regs(%rip)
>> +       ret
>> +
>> +       .globl  _callthis
>> +       .zerofill __DATA,__bss,_callthis,8,3
>> +       .globl  _rax
>> +       .zerofill __DATA,__bss,_rax,8,3
>> +       .globl  _rbx
>> +       .zerofill __DATA,__bss,_rbx,8,3
>> +       .globl  _rcx
>> +       .zerofill __DATA,__bss,_rcx,8,3
>> +       .globl  _rdx
>> +       .zerofill __DATA,__bss,_rdx,8,3
>> +       .globl  _rsi
>> +       .zerofill __DATA,__bss,_rsi,8,3
>> +       .globl  _rdi
>> +       .zerofill __DATA,__bss,_rdi,8,3
>> +       .globl  _rsp
>> +       .zerofill __DATA,__bss,_rsp,8,3
>> +       .globl  _rbp
>> +       .zerofill __DATA,__bss,_rbp,8,3
>> +       .globl  _r8
>> +       .zerofill __DATA,__bss,_r8,8,3
>> +       .globl  _r9
>> +       .zerofill __DATA,__bss,_r9,8,3
>> +       .globl  _r10
>> +       .zerofill __DATA,__bss,_r10,8,3
>> +       .globl  _r11
>> +       .zerofill __DATA,__bss,_r11,8,3
>> +       .globl  _r12
>> +       .zerofill __DATA,__bss,_r12,8,3
>> +       .globl  _r13
>> +       .zerofill __DATA,__bss,_r13,8,3
>> +       .globl  _r14
>> +       .zerofill __DATA,__bss,_r14,8,3
>> +       .globl  _r15
>> +       .zerofill __DATA,__bss,_r15,8,3
>> +       .globl  _zmm_regs
>> +       .zerofill __DATA,__bss,_zmm_regs,2048,6
>> +       .globl  _x87_regs
>> +       .zerofill __DATA,__bss,_x87_regs,128,5
>> +       .globl  _volatile_var
>> +       .zerofill __DATA,__bss,_volatile_var,8,3
>> +
>> +#else
>> +#error unknown object format
>> +#endif
>> +
>> --
>> 2.37.1 (Apple Git-137.1)


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] testsuite, X86, Darwin: Fix bf16 ABI tests for Mach-O/macOS ABI.
  2022-12-04 20:30   ` Iain Sandoe
@ 2022-12-05 10:37     ` Uros Bizjak
  2022-12-05 14:54       ` Iain Sandoe
  0 siblings, 1 reply; 8+ messages in thread
From: Uros Bizjak @ 2022-12-05 10:37 UTC (permalink / raw)
  To: Iain Sandoe; +Cc: GCC Patches, crazylht

On Sun, Dec 4, 2022 at 9:30 PM Iain Sandoe <iain@sandoe.co.uk> wrote:
>
>
>
> > On 4 Dec 2022, at 20:20, Uros Bizjak via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Sun, Dec 4, 2022 at 12:51 PM Iain Sandoe <iains.gcc@gmail.com> wrote:
> >>
> >> This is almost a completely Darwin-local patch, but there is one (repeated)
> >> place where a general change is needed - which is in making xmm_regs and
> >> x87_regs extern in the three copies of args.h (this is consistent with the
> >> other saved vars).  These fails represent most of the current testsuite noise
> >> on x86 Darwin.
> >>
> >> tested on x86-64 Darwin and Linux.
> >>
> >> OK for master?
> >> Iain
> >>
> >> -- >8 --
> >>
> >> These tests have failed since introduction since they assume that the
> >> assembler output is ELF and that the ABI targeted supports the addressing.
> >>
> >> For Darwin, Mach-O and ABI we need to make several changes:
> >> 1. Use the __USER_LABEL__PREFIX__
> >> 2. Remove the use of ELF-specific constructs (.size, .type etc.)
> >> 3. We cannot make direct access to common variables in the ABI, so that we
> >>   must move these to BSS.
> >>
> >> Since that set is quite significant, I elected to make a separate source
> >> section for Darwin.  This is introduced by #elif defined(__APPLE__) because
> >> __MACH__ is also used by HURD.
> >>
> >> There are potentially other X86 targets (e.g. XCOFF) that could have yet
> >> more changes, so I added a catchall section that #errors if the object format
> >> is neither ELF or Mach-O.
> >>
> >> Signed-off-by: Iain Sandoe <iain@sandoe.co.uk>
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >>        * gcc.target/x86_64/abi/bf16/args.h:
> >>        * gcc.target/x86_64/abi/bf16/asm-support.S:
> >>        * gcc.target/x86_64/abi/bf16/m256bf16/args.h:
> >>        * gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S:
> >>        * gcc.target/x86_64/abi/bf16/m512bf16/args.h:
> >>        * gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S:
> >
> > Missing descriptions in ChangeLog entry.
>
> oops, here:
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/x86_64/abi/bf16/args.h: Make xmm_regs, x87_regs extern.
>         * gcc.target/x86_64/abi/bf16/m256bf16/args.h: Likewise.
>         * gcc.target/x86_64/abi/bf16/m512bf16/args.h: Likewise.
>         * gcc.target/x86_64/abi/bf16/asm-support.S: Add Mach-O variant.
>         * gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S: Likewise.
>         * gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S: Likewise.

Please note that in other directories asm-support-darwin.s is
introduced and included via .exp file. Is there a reason a different
approach is introduced here?

Uros.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] testsuite, X86, Darwin: Fix bf16 ABI tests for Mach-O/macOS ABI.
  2022-12-05 10:37     ` Uros Bizjak
@ 2022-12-05 14:54       ` Iain Sandoe
  2022-12-05 21:07         ` Uros Bizjak
  0 siblings, 1 reply; 8+ messages in thread
From: Iain Sandoe @ 2022-12-05 14:54 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: GCC Patches, crazylht

[-- Attachment #1: Type: text/plain, Size: 1220 bytes --]

Hi Uros,

> On 5 Dec 2022, at 10:37, Uros Bizjak via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
> 
> On Sun, Dec 4, 2022 at 9:30 PM Iain Sandoe <iain@sandoe.co.uk> wrote:
>> 

>> gcc/testsuite/ChangeLog:
>> 
>>        * gcc.target/x86_64/abi/bf16/args.h: Make xmm_regs, x87_regs extern.
>>        * gcc.target/x86_64/abi/bf16/m256bf16/args.h: Likewise.
>>        * gcc.target/x86_64/abi/bf16/m512bf16/args.h: Likewise.
>>        * gcc.target/x86_64/abi/bf16/asm-support.S: Add Mach-O variant.
>>        * gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S: Likewise.
>>        * gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S: Likewise.
> 
> Please note that in other directories asm-support-darwin.s is
> introduced and included via .exp file. Is there a reason a different
> approach is introduced here?

Since it seems that testcases get added and amended without considering any
sub-target apart from x86_64-linux-gnu (even by very experienced contributors),
I was hoping that the Darwin section might prompt folks to remember that there
are several other sub-targets.

However, the main thing is to fix the tests .. so here’s a version using separate
files.

OK?
thanks,
Iain


[-- Attachment #2: 0001-testsuite-X86-Darwin-Fix-bf16-ABI-tests-for-Mach-O-M.patch --]
[-- Type: application/octet-stream, Size: 16645 bytes --]

From b52cc366902919c3bf45600f398968207e55052e Mon Sep 17 00:00:00 2001
From: Iain Sandoe <iain@sandoe.co.uk>
Date: Sat, 3 Dec 2022 20:51:54 +0000
Subject: [PATCH v2] testsuite, X86, Darwin: Fix bf16 ABI tests for Mach-O/MacOS
 ABI.

These tests have failed since introduction since they assume that the
assembler output is ELF and that the ABI targeted supports the addressing.

For Darwin, Mach-O and ABI we need to make several changes:
1. Use the __USER_LABEL__PREFIX__
2. Remove the use of ELF-specific constructs (.size, .type etc.)
3. We cannot make direct access to common variables in the ABI, so that we
   must move these to BSS.

These changes are made in darwin-specific asm files.

Signed-off-by: Iain Sandoe <iain@sandoe.co.uk>

gcc/testsuite/ChangeLog:

	* gcc.target/x86_64/abi/bf16/abi-bf16.exp: Use separate asm for Darwin.
	* gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp: Likewise.
	* gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp: Likewise.
	* gcc.target/x86_64/abi/bf16/args.h: Make xmm_regs, x87_regs extern.
	* gcc.target/x86_64/abi/bf16/m256bf16/args.h: Likewise.
	* gcc.target/x86_64/abi/bf16/m512bf16/args.h: Likewise.
	* gcc.target/x86_64/abi/bf16/asm-support-darwin.S: New file.
	* gcc.target/x86_64/abi/bf16/m256bf16/asm-support-darwin.S: New file.
	* gcc.target/x86_64/abi/bf16/m512bf16/asm-support-darwin.S: New file.
---
 .../gcc.target/x86_64/abi/bf16/abi-bf16.exp   |  12 +-
 .../gcc.target/x86_64/abi/bf16/args.h         |   4 +-
 .../x86_64/abi/bf16/asm-support-darwin.S      |  97 +++++++++++++++
 .../x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp |  12 +-
 .../x86_64/abi/bf16/m256bf16/args.h           |   4 +-
 .../abi/bf16/m256bf16/asm-support-darwin.S    |  97 +++++++++++++++
 .../x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp |  12 +-
 .../x86_64/abi/bf16/m512bf16/args.h           |   4 +-
 .../abi/bf16/m512bf16/asm-support-darwin.S    | 113 ++++++++++++++++++
 9 files changed, 340 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support-darwin.S
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support-darwin.S
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support-darwin.S

diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/abi-bf16.exp b/gcc/testsuite/gcc.target/x86_64/abi/bf16/abi-bf16.exp
index bd386f2a560..8edab855dd0 100644
--- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/abi-bf16.exp
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/abi-bf16.exp
@@ -36,9 +36,15 @@ set additional_flags "-W -Wall -msse2"
 
 foreach src [lsort [glob -nocomplain $srcdir/$subdir/test_*.c]] {
     if {[runtest_file_p $runtests $src]} {
-        c-torture-execute [list $src \
-                                $srcdir/$subdir/asm-support.S] \
-                                $additional_flags
+	if { ([istarget *-*-darwin*]) } then {
+	    c-torture-execute [list $src \
+				    $srcdir/$subdir/asm-support-darwin.S] \
+				    $additional_flags
+	} else {
+            c-torture-execute [list $src \
+                                    $srcdir/$subdir/asm-support.S] \
+                                    $additional_flags
+        }
     }
 }
 
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
index 11d7e2b3a1c..95f9a394f2c 100644
--- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
@@ -42,8 +42,8 @@ typedef union {
 } X87_T;
 extern void (*callthis)(void);
 extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
-XMM_T xmm_regs[16];
-X87_T x87_regs[8];
+extern XMM_T xmm_regs[16];
+extern X87_T x87_regs[8];
 extern volatile unsigned long long volatile_var;
 extern void snapshot (void);
 extern void snapshot_ret (void);
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support-darwin.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support-darwin.S
new file mode 100644
index 00000000000..bdaa02fe187
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support-darwin.S
@@ -0,0 +1,97 @@
+	.text
+	.p2align 4,,15
+	.globl _snapshot
+_snapshot:
+LFB3:
+	movq	%rax, _rax(%rip)
+	movq	%rbx, _rbx(%rip)
+	movq	%rcx, _rcx(%rip)
+	movq	%rdx, _rdx(%rip)
+	movq	%rdi, _rdi(%rip)
+	movq	%rsi, _rsi(%rip)
+	movq	%rbp, _rbp(%rip)
+	movq	%rsp, _rsp(%rip)
+	movq	%r8, _r8(%rip)
+	movq	%r9, _r9(%rip)
+	movq	%r10, _r10(%rip)
+	movq	%r11, _r11(%rip)
+	movq	%r12, _r12(%rip)
+	movq	%r13, _r13(%rip)
+	movq	%r14, _r14(%rip)
+	movq	%r15, _r15(%rip)
+	movdqu	%xmm0, _xmm_regs+0(%rip)
+	movdqu	%xmm1, _xmm_regs+16(%rip)
+	movdqu	%xmm2, _xmm_regs+32(%rip)
+	movdqu	%xmm3, _xmm_regs+48(%rip)
+	movdqu	%xmm4, _xmm_regs+64(%rip)
+	movdqu	%xmm5, _xmm_regs+80(%rip)
+	movdqu	%xmm6, _xmm_regs+96(%rip)
+	movdqu	%xmm7, _xmm_regs+112(%rip)
+	movdqu	%xmm8, _xmm_regs+128(%rip)
+	movdqu	%xmm9, _xmm_regs+144(%rip)
+	movdqu	%xmm10, _xmm_regs+160(%rip)
+	movdqu	%xmm11, _xmm_regs+176(%rip)
+	movdqu	%xmm12, _xmm_regs+192(%rip)
+	movdqu	%xmm13, _xmm_regs+208(%rip)
+	movdqu	%xmm14, _xmm_regs+224(%rip)
+	movdqu	%xmm15, _xmm_regs+240(%rip)
+	jmp	*_callthis(%rip)
+LFE3:
+
+	.p2align 4,,15
+	.globl _snapshot_ret
+_snapshot_ret:
+	movq	%rdi, _rdi(%rip)
+	subq	$8, %rsp
+	call	*_callthis(%rip)
+	addq	$8, %rsp
+	movq	%rax, _rax(%rip)
+	movq	%rdx, _rdx(%rip)
+	movdqu	%xmm0, _xmm_regs+0(%rip)
+	movdqu	%xmm1, _xmm_regs+16(%rip)
+	fstpt	_x87_regs(%rip)
+	fstpt	_x87_regs+16(%rip)
+	fldt	_x87_regs+16(%rip)
+	fldt	_x87_regs(%rip)
+	ret
+
+	.globl	_callthis
+	.zerofill __DATA,__bss,_callthis,8,3
+	.globl	_rax
+	.zerofill __DATA,__bss,_rax,8,3
+	.globl	_rbx
+	.zerofill __DATA,__bss,_rbx,8,3
+	.globl	_rcx
+	.zerofill __DATA,__bss,_rcx,8,3
+	.globl	_rdx
+	.zerofill __DATA,__bss,_rdx,8,3
+	.globl	_rsi
+	.zerofill __DATA,__bss,_rsi,8,3
+	.globl	_rdi
+	.zerofill __DATA,__bss,_rdi,8,3
+	.globl	_rsp
+	.zerofill __DATA,__bss,_rsp,8,3
+	.globl	_rbp
+	.zerofill __DATA,__bss,_rbp,8,3
+	.globl	_r8
+	.zerofill __DATA,__bss,_r8,8,3
+	.globl	_r9
+	.zerofill __DATA,__bss,_r9,8,3
+	.globl	_r10
+	.zerofill __DATA,__bss,_r10,8,3
+	.globl	_r11
+	.zerofill __DATA,__bss,_r11,8,3
+	.globl	_r12
+	.zerofill __DATA,__bss,_r12,8,3
+	.globl	_r13
+	.zerofill __DATA,__bss,_r13,8,3
+	.globl	_r14
+	.zerofill __DATA,__bss,_r14,8,3
+	.globl	_r15
+	.zerofill __DATA,__bss,_r15,8,3
+	.globl	_xmm_regs
+	.zerofill __DATA,__bss,_xmm_regs,256,5
+	.globl	_x87_regs
+	.zerofill __DATA,__bss,_x87_regs,128,5
+	.globl	_volatile_var
+	.zerofill __DATA,__bss,_volatile_var,8,3
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp
index 309db8ff12e..02b45052b32 100644
--- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp
@@ -36,9 +36,15 @@ set additional_flags "-W -Wall -mavx2"
 
 foreach src [lsort [glob -nocomplain $srcdir/$subdir/test_*.c]] {
     if {[runtest_file_p $runtests $src]} {
-        c-torture-execute [list $src \
-                                $srcdir/$subdir/asm-support.S] \
-                                $additional_flags
+	if { ([istarget *-*-darwin*]) } then {
+	    c-torture-execute [list $src \
+				    $srcdir/$subdir/asm-support-darwin.S] \
+				    $additional_flags
+	} else {
+            c-torture-execute [list $src \
+                                    $srcdir/$subdir/asm-support.S] \
+                                    $additional_flags
+        }
     }
 }
 
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
index 94627ffbd44..1027742cbb2 100644
--- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
@@ -50,8 +50,8 @@ typedef union {
 } X87_T;
 extern void (*callthis)(void);
 extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
-YMM_T ymm_regs[16];
-X87_T x87_regs[8];
+extern YMM_T ymm_regs[16];
+extern X87_T x87_regs[8];
 extern volatile unsigned long long volatile_var;
 extern void snapshot (void);
 extern void snapshot_ret (void);
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support-darwin.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support-darwin.S
new file mode 100644
index 00000000000..e136b574f6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support-darwin.S
@@ -0,0 +1,97 @@
+	.text
+	.p2align 4,,15
+	.globl _snapshot
+_snapshot:
+.LFB3:
+	movq	%rax, _rax(%rip)
+	movq	%rbx, _rbx(%rip)
+	movq	%rcx, _rcx(%rip)
+	movq	%rdx, _rdx(%rip)
+	movq	%rdi, _rdi(%rip)
+	movq	%rsi, _rsi(%rip)
+	movq	%rbp, _rbp(%rip)
+	movq	%rsp, _rsp(%rip)
+	movq	%r8, _r8(%rip)
+	movq	%r9, _r9(%rip)
+	movq	%r10, _r10(%rip)
+	movq	%r11, _r11(%rip)
+	movq	%r12, _r12(%rip)
+	movq	%r13, _r13(%rip)
+	movq	%r14, _r14(%rip)
+	movq	%r15, _r15(%rip)
+	vmovdqu	%ymm0, _ymm_regs+0(%rip)
+	vmovdqu	%ymm1, _ymm_regs+32(%rip)
+	vmovdqu	%ymm2, _ymm_regs+64(%rip)
+	vmovdqu	%ymm3, _ymm_regs+96(%rip)
+	vmovdqu	%ymm4, _ymm_regs+128(%rip)
+	vmovdqu	%ymm5, _ymm_regs+160(%rip)
+	vmovdqu	%ymm6, _ymm_regs+192(%rip)
+	vmovdqu	%ymm7, _ymm_regs+224(%rip)
+	vmovdqu	%ymm8, _ymm_regs+256(%rip)
+	vmovdqu	%ymm9, _ymm_regs+288(%rip)
+	vmovdqu	%ymm10, _ymm_regs+320(%rip)
+	vmovdqu	%ymm11, _ymm_regs+352(%rip)
+	vmovdqu	%ymm12, _ymm_regs+384(%rip)
+	vmovdqu	%ymm13, _ymm_regs+416(%rip)
+	vmovdqu	%ymm14, _ymm_regs+448(%rip)
+	vmovdqu	%ymm15, _ymm_regs+480(%rip)
+	jmp	*_callthis(%rip)
+.LFE3:
+
+	.p2align 4,,15
+	.globl _snapshot_ret
+_snapshot_ret:
+	movq	%rdi, _rdi(%rip)
+	subq	$8, %rsp
+	call	*_callthis(%rip)
+	addq	$8, %rsp
+	movq	%rax, _rax(%rip)
+	movq	%rdx, _rdx(%rip)
+	vmovdqu	%ymm0, _ymm_regs+0(%rip)
+	vmovdqu	%ymm1, _ymm_regs+32(%rip)
+	fstpt	_x87_regs(%rip)
+	fstpt	_x87_regs+16(%rip)
+	fldt	_x87_regs+16(%rip)
+	fldt	_x87_regs(%rip)
+	ret
+
+	.globl	_callthis
+	.zerofill __DATA,__bss,_callthis,8,3
+	.globl	_rax
+	.zerofill __DATA,__bss,_rax,8,3
+	.globl	_rbx
+	.zerofill __DATA,__bss,_rbx,8,3
+	.globl	_rcx
+	.zerofill __DATA,__bss,_rcx,8,3
+	.globl	_rdx
+	.zerofill __DATA,__bss,_rdx,8,3
+	.globl	_rsi
+	.zerofill __DATA,__bss,_rsi,8,3
+	.globl	_rdi
+	.zerofill __DATA,__bss,_rdi,8,3
+	.globl	_rsp
+	.zerofill __DATA,__bss,_rsp,8,3
+	.globl	_rbp
+	.zerofill __DATA,__bss,_rbp,8,3
+	.globl	_r8
+	.zerofill __DATA,__bss,_r8,8,3
+	.globl	_r9
+	.zerofill __DATA,__bss,_r9,8,3
+	.globl	_r10
+	.zerofill __DATA,__bss,_r10,8,3
+	.globl	_r11
+	.zerofill __DATA,__bss,_r11,8,3
+	.globl	_r12
+	.zerofill __DATA,__bss,_r12,8,3
+	.globl	_r13
+	.zerofill __DATA,__bss,_r13,8,3
+	.globl	_r14
+	.zerofill __DATA,__bss,_r14,8,3
+	.globl	_r15
+	.zerofill __DATA,__bss,_r15,8,3
+	.globl	_ymm_regs
+	.zerofill __DATA,__bss,_ymm_regs,512,5
+	.globl	_x87_regs
+	.zerofill __DATA,__bss,_x87_regs,128,5
+	.globl	_volatile_var
+	.zerofill __DATA,__bss,_volatile_var,8,3
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp
index b6e0fed4cb4..28abb4e876b 100644
--- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp
@@ -36,9 +36,15 @@ set additional_flags "-W -Wall -mavx512f"
 
 foreach src [lsort [glob -nocomplain $srcdir/$subdir/test_*.c]] {
     if {[runtest_file_p $runtests $src]} {
-        c-torture-execute [list $src \
-                                $srcdir/$subdir/asm-support.S] \
-                                $additional_flags
+	if { ([istarget *-*-darwin*]) } then {
+	    c-torture-execute [list $src \
+				    $srcdir/$subdir/asm-support-darwin.S] \
+				    $additional_flags
+	} else {
+            c-torture-execute [list $src \
+                                    $srcdir/$subdir/asm-support.S] \
+                                    $additional_flags
+        }
     }
 }
 
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
index 64b24783833..f9710bae347 100644
--- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
@@ -51,8 +51,8 @@ typedef union {
 } X87_T;
 extern void (*callthis)(void);
 extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
-ZMM_T zmm_regs[32];
-X87_T x87_regs[8];
+extern ZMM_T zmm_regs[32];
+extern X87_T x87_regs[8];
 extern volatile unsigned long long volatile_var;
 extern void snapshot (void);
 extern void snapshot_ret (void);
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support-darwin.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support-darwin.S
new file mode 100644
index 00000000000..71b61b36b4f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support-darwin.S
@@ -0,0 +1,113 @@
+	.text
+	.p2align 4,,15
+	.globl _snapshot
+_snapshot:
+.LFB3:
+	movq	%rax, _rax(%rip)
+	movq	%rbx, _rbx(%rip)
+	movq	%rcx, _rcx(%rip)
+	movq	%rdx, _rdx(%rip)
+	movq	%rdi, _rdi(%rip)
+	movq	%rsi, _rsi(%rip)
+	movq	%rbp, _rbp(%rip)
+	movq	%rsp, _rsp(%rip)
+	movq	%r8, _r8(%rip)
+	movq	%r9, _r9(%rip)
+	movq	%r10, _r10(%rip)
+	movq	%r11, _r11(%rip)
+	movq	%r12, _r12(%rip)
+	movq	%r13, _r13(%rip)
+	movq	%r14, _r14(%rip)
+	movq	%r15, _r15(%rip)
+	vmovdqu32 %zmm0, _zmm_regs+0(%rip)
+	vmovdqu32 %zmm1, _zmm_regs+64(%rip)
+	vmovdqu32 %zmm2, _zmm_regs+128(%rip)
+	vmovdqu32 %zmm3, _zmm_regs+192(%rip)
+	vmovdqu32 %zmm4, _zmm_regs+256(%rip)
+	vmovdqu32 %zmm5, _zmm_regs+320(%rip)
+	vmovdqu32 %zmm6, _zmm_regs+384(%rip)
+	vmovdqu32 %zmm7, _zmm_regs+448(%rip)
+	vmovdqu32 %zmm8, _zmm_regs+512(%rip)
+	vmovdqu32 %zmm9, _zmm_regs+576(%rip)
+	vmovdqu32 %zmm10, _zmm_regs+640(%rip)
+	vmovdqu32 %zmm11, _zmm_regs+704(%rip)
+	vmovdqu32 %zmm12, _zmm_regs+768(%rip)
+	vmovdqu32 %zmm13, _zmm_regs+832(%rip)
+	vmovdqu32 %zmm14, _zmm_regs+896(%rip)
+	vmovdqu32 %zmm15, _zmm_regs+960(%rip)
+	vmovdqu32 %zmm16, _zmm_regs+1024(%rip)
+	vmovdqu32 %zmm17, _zmm_regs+1088(%rip)
+	vmovdqu32 %zmm18, _zmm_regs+1152(%rip)
+	vmovdqu32 %zmm19, _zmm_regs+1216(%rip)
+	vmovdqu32 %zmm20, _zmm_regs+1280(%rip)
+	vmovdqu32 %zmm21, _zmm_regs+1344(%rip)
+	vmovdqu32 %zmm22, _zmm_regs+1408(%rip)
+	vmovdqu32 %zmm23, _zmm_regs+1472(%rip)
+	vmovdqu32 %zmm24, _zmm_regs+1536(%rip)
+	vmovdqu32 %zmm25, _zmm_regs+1600(%rip)
+	vmovdqu32 %zmm26, _zmm_regs+1664(%rip)
+	vmovdqu32 %zmm27, _zmm_regs+1728(%rip)
+	vmovdqu32 %zmm28, _zmm_regs+1792(%rip)
+	vmovdqu32 %zmm29, _zmm_regs+1856(%rip)
+	vmovdqu32 %zmm30, _zmm_regs+1920(%rip)
+	vmovdqu32 %zmm31, _zmm_regs+1984(%rip)
+	jmp	*_callthis(%rip)
+.LFE3:
+
+	.p2align 4,,15
+	.globl _snapshot_ret
+_snapshot_ret:
+	movq	%rdi, _rdi(%rip)
+	subq	$8, %rsp
+	call	*_callthis(%rip)
+	addq	$8, %rsp
+	movq	%rax, _rax(%rip)
+	movq	%rdx, _rdx(%rip)
+	vmovdqu32	%zmm0, _zmm_regs+0(%rip)
+	vmovdqu32	%zmm1, _zmm_regs+64(%rip)
+	fstpt	_x87_regs(%rip)
+	fstpt	_x87_regs+16(%rip)
+	fldt	_x87_regs+16(%rip)
+	fldt	_x87_regs(%rip)
+	ret
+
+	.globl	_callthis
+	.zerofill __DATA,__bss,_callthis,8,3
+	.globl	_rax
+	.zerofill __DATA,__bss,_rax,8,3
+	.globl	_rbx
+	.zerofill __DATA,__bss,_rbx,8,3
+	.globl	_rcx
+	.zerofill __DATA,__bss,_rcx,8,3
+	.globl	_rdx
+	.zerofill __DATA,__bss,_rdx,8,3
+	.globl	_rsi
+	.zerofill __DATA,__bss,_rsi,8,3
+	.globl	_rdi
+	.zerofill __DATA,__bss,_rdi,8,3
+	.globl	_rsp
+	.zerofill __DATA,__bss,_rsp,8,3
+	.globl	_rbp
+	.zerofill __DATA,__bss,_rbp,8,3
+	.globl	_r8
+	.zerofill __DATA,__bss,_r8,8,3
+	.globl	_r9
+	.zerofill __DATA,__bss,_r9,8,3
+	.globl	_r10
+	.zerofill __DATA,__bss,_r10,8,3
+	.globl	_r11
+	.zerofill __DATA,__bss,_r11,8,3
+	.globl	_r12
+	.zerofill __DATA,__bss,_r12,8,3
+	.globl	_r13
+	.zerofill __DATA,__bss,_r13,8,3
+	.globl	_r14
+	.zerofill __DATA,__bss,_r14,8,3
+	.globl	_r15
+	.zerofill __DATA,__bss,_r15,8,3
+	.globl	_zmm_regs
+	.zerofill __DATA,__bss,_zmm_regs,2048,6
+	.globl	_x87_regs
+	.zerofill __DATA,__bss,_x87_regs,128,5
+	.globl	_volatile_var
+	.zerofill __DATA,__bss,_volatile_var,8,3
-- 
2.37.1 (Apple Git-137.1)


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] testsuite, X86, Darwin: Fix bf16 ABI tests for Mach-O/macOS ABI.
  2022-12-05 14:54       ` Iain Sandoe
@ 2022-12-05 21:07         ` Uros Bizjak
  2022-12-05 21:17           ` Iain Sandoe
  0 siblings, 1 reply; 8+ messages in thread
From: Uros Bizjak @ 2022-12-05 21:07 UTC (permalink / raw)
  To: Iain Sandoe; +Cc: GCC Patches, crazylht

On Mon, Dec 5, 2022 at 3:54 PM Iain Sandoe <iain@sandoe.co.uk> wrote:
>
> Hi Uros,
>
> > On 5 Dec 2022, at 10:37, Uros Bizjak via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Sun, Dec 4, 2022 at 9:30 PM Iain Sandoe <iain@sandoe.co.uk> wrote:
> >>
>
> >> gcc/testsuite/ChangeLog:
> >>
> >>        * gcc.target/x86_64/abi/bf16/args.h: Make xmm_regs, x87_regs extern.
> >>        * gcc.target/x86_64/abi/bf16/m256bf16/args.h: Likewise.
> >>        * gcc.target/x86_64/abi/bf16/m512bf16/args.h: Likewise.
> >>        * gcc.target/x86_64/abi/bf16/asm-support.S: Add Mach-O variant.
> >>        * gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S: Likewise.
> >>        * gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S: Likewise.
> >
> > Please note that in other directories asm-support-darwin.s is
> > introduced and included via .exp file. Is there a reason a different
> > approach is introduced here?
>
> Since it seems that testcases get added and amended without considering any
> sub-target apart from x86_64-linux-gnu (even by very experienced contributors),
> I was hoping that the Darwin section might prompt folks to remember that there
> are several other sub-targets.
>
> However, the main thing is to fix the tests .. so here’s a version using separate
> files.

 extern void (*callthis)(void);
 extern unsigned long long
rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
-XMM_T xmm_regs[16];
-X87_T x87_regs[8];
+extern XMM_T xmm_regs[16];
+extern X87_T x87_regs[8];

Do you still need this change? Existing test files are compiled without extern.

+    .globl    _callthis
+    .zerofill __DATA,__bss,_callthis,8,3
+    .globl    _rax
+    .zerofill __DATA,__bss,_rax,8,3
+    .globl    _rbx
+    .zerofill __DATA,__bss,_rbx,8,3
+    .globl    _rcx
+    .zerofill __DATA,__bss,_rcx,8,3
+    .globl    _rdx
+    .zerofill __DATA,__bss,_rdx,8,3
...

I wonder if the above approach is better than existing:

    .comm    _callthis,8
    .comm    _rax,8
    .comm    _rbx,8
    .comm    _rcx,8
    .comm    _rdx,8
...

It is strange to have two different approaches for similar tests. If
the new approach is better, we should also change existing asm-support
files.

Uros.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] testsuite, X86, Darwin: Fix bf16 ABI tests for Mach-O/macOS ABI.
  2022-12-05 21:07         ` Uros Bizjak
@ 2022-12-05 21:17           ` Iain Sandoe
  2022-12-05 21:22             ` Uros Bizjak
  0 siblings, 1 reply; 8+ messages in thread
From: Iain Sandoe @ 2022-12-05 21:17 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: GCC Patches, crazylht

Hi Uros,

> On 5 Dec 2022, at 21:07, Uros Bizjak via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
> 
> On Mon, Dec 5, 2022 at 3:54 PM Iain Sandoe <iain@sandoe.co.uk> wrote:
>> 
>> Hi Uros,
>> 
>>> On 5 Dec 2022, at 10:37, Uros Bizjak via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
>>> 
>>> On Sun, Dec 4, 2022 at 9:30 PM Iain Sandoe <iain@sandoe.co.uk> wrote:
>>>> 
>> 
>>>> gcc/testsuite/ChangeLog:
>>>> 
>>>>       * gcc.target/x86_64/abi/bf16/args.h: Make xmm_regs, x87_regs extern.
>>>>       * gcc.target/x86_64/abi/bf16/m256bf16/args.h: Likewise.
>>>>       * gcc.target/x86_64/abi/bf16/m512bf16/args.h: Likewise.
>>>>       * gcc.target/x86_64/abi/bf16/asm-support.S: Add Mach-O variant.
>>>>       * gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S: Likewise.
>>>>       * gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S: Likewise.
>>> 
>>> Please note that in other directories asm-support-darwin.s is
>>> introduced and included via .exp file. Is there a reason a different
>>> approach is introduced here?
>> 
>> Since it seems that testcases get added and amended without considering any
>> sub-target apart from x86_64-linux-gnu (even by very experienced contributors),
>> I was hoping that the Darwin section might prompt folks to remember that there
>> are several other sub-targets.
>> 
>> However, the main thing is to fix the tests .. so here’s a version using separate
>> files.
> 
> extern void (*callthis)(void);
> extern unsigned long long
> rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
> -XMM_T xmm_regs[16];
> -X87_T x87_regs[8];
> +extern XMM_T xmm_regs[16];
> +extern X87_T x87_regs[8];
> 
> Do you still need this change? Existing test files are compiled without extern.
> 
> +    .globl    _callthis
> +    .zerofill __DATA,__bss,_callthis,8,3
> +    .globl    _rax
> +    .zerofill __DATA,__bss,_rax,8,3
> +    .globl    _rbx
> +    .zerofill __DATA,__bss,_rbx,8,3
> +    .globl    _rcx
> +    .zerofill __DATA,__bss,_rcx,8,3
> +    .globl    _rdx
> +    .zerofill __DATA,__bss,_rdx,8,3
> ...
> 
> I wonder if the above approach is better than existing:
> 
>    .comm    _callthis,8
>    .comm    _rax,8
>    .comm    _rbx,8
>    .comm    _rcx,8
>    .comm    _rdx,8
> ...

As noted in the changelog, direct access to common data is not permitted in the Darwin
ABI [for x86_64, it would need to be _xxx@GOTPCREL(%rip)..] that’s why these have
been moved to bss.

> It is strange to have two different approaches for similar tests. If
> the new approach is better, we should also change existing asm-support
> files.

could be, I have not checked other case so far (extremely limited time at the moment)

Quite likely, the accesses work in the testcases, despite violating the ABI.

Iain

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] testsuite, X86, Darwin: Fix bf16 ABI tests for Mach-O/macOS ABI.
  2022-12-05 21:17           ` Iain Sandoe
@ 2022-12-05 21:22             ` Uros Bizjak
  0 siblings, 0 replies; 8+ messages in thread
From: Uros Bizjak @ 2022-12-05 21:22 UTC (permalink / raw)
  To: Iain Sandoe; +Cc: GCC Patches, crazylht

On Mon, Dec 5, 2022 at 10:17 PM Iain Sandoe <iain@sandoe.co.uk> wrote:
>
> Hi Uros,
>
> > On 5 Dec 2022, at 21:07, Uros Bizjak via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Mon, Dec 5, 2022 at 3:54 PM Iain Sandoe <iain@sandoe.co.uk> wrote:
> >>
> >> Hi Uros,
> >>
> >>> On 5 Dec 2022, at 10:37, Uros Bizjak via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
> >>>
> >>> On Sun, Dec 4, 2022 at 9:30 PM Iain Sandoe <iain@sandoe.co.uk> wrote:
> >>>>
> >>
> >>>> gcc/testsuite/ChangeLog:
> >>>>
> >>>>       * gcc.target/x86_64/abi/bf16/args.h: Make xmm_regs, x87_regs extern.
> >>>>       * gcc.target/x86_64/abi/bf16/m256bf16/args.h: Likewise.
> >>>>       * gcc.target/x86_64/abi/bf16/m512bf16/args.h: Likewise.
> >>>>       * gcc.target/x86_64/abi/bf16/asm-support.S: Add Mach-O variant.
> >>>>       * gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S: Likewise.
> >>>>       * gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S: Likewise.
> >>>
> >>> Please note that in other directories asm-support-darwin.s is
> >>> introduced and included via .exp file. Is there a reason a different
> >>> approach is introduced here?
> >>
> >> Since it seems that testcases get added and amended without considering any
> >> sub-target apart from x86_64-linux-gnu (even by very experienced contributors),
> >> I was hoping that the Darwin section might prompt folks to remember that there
> >> are several other sub-targets.
> >>
> >> However, the main thing is to fix the tests .. so here’s a version using separate
> >> files.
> >
> > extern void (*callthis)(void);
> > extern unsigned long long
> > rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
> > -XMM_T xmm_regs[16];
> > -X87_T x87_regs[8];
> > +extern XMM_T xmm_regs[16];
> > +extern X87_T x87_regs[8];
> >
> > Do you still need this change? Existing test files are compiled without extern.
> >
> > +    .globl    _callthis
> > +    .zerofill __DATA,__bss,_callthis,8,3
> > +    .globl    _rax
> > +    .zerofill __DATA,__bss,_rax,8,3
> > +    .globl    _rbx
> > +    .zerofill __DATA,__bss,_rbx,8,3
> > +    .globl    _rcx
> > +    .zerofill __DATA,__bss,_rcx,8,3
> > +    .globl    _rdx
> > +    .zerofill __DATA,__bss,_rdx,8,3
> > ...
> >
> > I wonder if the above approach is better than existing:
> >
> >    .comm    _callthis,8
> >    .comm    _rax,8
> >    .comm    _rbx,8
> >    .comm    _rcx,8
> >    .comm    _rdx,8
> > ...
>
> As noted in the changelog, direct access to common data is not permitted in the Darwin
> ABI [for x86_64, it would need to be _xxx@GOTPCREL(%rip)..] that’s why these have
> been moved to bss.

Thanks for the explanation!

The patch is OK.

> > It is strange to have two different approaches for similar tests. If
> > the new approach is better, we should also change existing asm-support
> > files.
>
> could be, I have not checked other case so far (extremely limited time at the moment)
>
> Quite likely, the accesses work in the testcases, despite violating the ABI.

This is never a good sign, it will break sooner or later...

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-12-05 21:22 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-04 11:51 [PATCH] testsuite, X86, Darwin: Fix bf16 ABI tests for Mach-O/macOS ABI Iain Sandoe
2022-12-04 20:20 ` Uros Bizjak
2022-12-04 20:30   ` Iain Sandoe
2022-12-05 10:37     ` Uros Bizjak
2022-12-05 14:54       ` Iain Sandoe
2022-12-05 21:07         ` Uros Bizjak
2022-12-05 21:17           ` Iain Sandoe
2022-12-05 21:22             ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).