From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1062) id CA4923858C1F; Thu, 25 May 2023 07:44:45 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org CA4923858C1F Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: Alan Modra To: bfd-cvs@sourceware.org Subject: [binutils-gdb] PR29189, dlltool delaylibs corrupt float/double arguments X-Act-Checkin: binutils-gdb X-Git-Author: Alan Modra X-Git-Refname: refs/heads/master X-Git-Oldrev: 7ea5b1997a573920f2cbc8b305276d172dc18277 X-Git-Newrev: 7529ff1fcdbe260a0ac84ee8f33f4fa4ee1ac455 Message-Id: <20230525074445.CA4923858C1F@sourceware.org> Date: Thu, 25 May 2023 07:44:45 +0000 (GMT) X-BeenThere: binutils-cvs@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Binutils-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 25 May 2023 07:44:45 -0000 https://sourceware.org/git/gitweb.cgi?p=3Dbinutils-gdb.git;h=3D7529ff1fcdbe= 260a0ac84ee8f33f4fa4ee1ac455 commit 7529ff1fcdbe260a0ac84ee8f33f4fa4ee1ac455 Author: Alan Modra Date: Mon May 15 10:44:29 2023 +0930 PR29189, dlltool delaylibs corrupt float/double arguments =20 PR 29189 * dlltool.c (i386_x64_trampoline): Save and restore xmm0-5. Ma= ke use of parameter save area for integer arg regs. Comment. Diff: --- binutils/dlltool.c | 52 +++++++++++++++++++++++++++++++++++++++-----------= -- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/binutils/dlltool.c b/binutils/dlltool.c index 31c864d7d5c..142d43aff81 100644 --- a/binutils/dlltool.c +++ b/binutils/dlltool.c @@ -583,22 +583,48 @@ static const char i386_trampoline[] =3D "\tpopl %%ecx\n" "\tjmp *%%eax\n"; =20 +/* Save integer arg regs in parameter space reserved by our caller + above the return address. Allocate space for six fp arg regs plus + parameter space possibly used by __delayLoadHelper2 plus alignment. + We enter with the stack offset from 16-byte alignment by the return + address, so allocate 96 + 32 + 8 =3D 136 bytes. Note that only the + first four xmm regs are used to pass fp args, but the first six + vector ymm (zmm too?) are used to pass vector args. We are + assuming that volatile vector regs are not modified inside + __delayLoadHelper2. However, it is known that at least xmm0 and + xmm1 are trashed in some versions of Microsoft dlls, and if xmm4 or + xmm5 are also used then that would trash the lower bits of ymm4 and + ymm5. If it turns out that vector insns with a vex prefix are used + then we'll need to save ymm0-5 here but that can't be done without + first testing cpuid and xcr0. */ static const char i386_x64_trampoline[] =3D - "\tsubq $72, %%rsp\n" - "\t.seh_stackalloc 72\n" + "\tsubq $136, %%rsp\n" + "\t.seh_stackalloc 136\n" "\t.seh_endprologue\n" - "\tmovq %%rcx, 64(%%rsp)\n" - "\tmovq %%rdx, 56(%%rsp)\n" - "\tmovq %%r8, 48(%%rsp)\n" - "\tmovq %%r9, 40(%%rsp)\n" - "\tmovq %%rax, %%rdx\n" - "\tleaq __DELAY_IMPORT_DESCRIPTOR_%s(%%rip), %%rcx\n" + "\tmovq %%rcx, 136+8(%%rsp)\n" + "\tmovq %%rdx, 136+16(%%rsp)\n" + "\tmovq %%r8, 136+24(%%rsp)\n" + "\tmovq %%r9, 136+32(%%rsp)\n" + "\tmovaps %%xmm0, 32(%%rsp)\n" + "\tmovaps %%xmm1, 48(%%rsp)\n" + "\tmovaps %%xmm2, 64(%%rsp)\n" + "\tmovaps %%xmm3, 80(%%rsp)\n" + "\tmovaps %%xmm4, 96(%%rsp)\n" + "\tmovaps %%xmm5, 112(%%rsp)\n" + "\tmovq %%rax, %%rdx\n" + "\tleaq __DELAY_IMPORT_DESCRIPTOR_%s(%%rip), %%rcx\n" "\tcall __delayLoadHelper2\n" - "\tmovq 40(%%rsp), %%r9\n" - "\tmovq 48(%%rsp), %%r8\n" - "\tmovq 56(%%rsp), %%rdx\n" - "\tmovq 64(%%rsp), %%rcx\n" - "\taddq $72, %%rsp\n" + "\tmovq 136+8(%%rsp), %%rcx\n" + "\tmovq 136+16(%%rsp), %%rdx\n" + "\tmovq 136+24(%%rsp), %%r8\n" + "\tmovq 136+32(%%rsp), %%r9\n" + "\tmovaps 32(%%rsp), %%xmm0\n" + "\tmovaps 48(%%rsp), %%xmm1\n" + "\tmovaps 64(%%rsp), %%xmm2\n" + "\tmovaps 80(%%rsp), %%xmm3\n" + "\tmovaps 96(%%rsp), %%xmm4\n" + "\tmovaps 112(%%rsp), %%xmm5\n" + "\taddq $136, %%rsp\n" "\tjmp *%%rax\n"; =20 struct mac