From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1386) id 14384388264E; Fri, 28 Jun 2024 06:20:21 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 14384388264E DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1719555621; bh=EsOoHsDnWletgy3W+rx5Z7R210hW/A9Z/FmbLtRNXj4=; h=From:To:Subject:Date:From; b=Wd+fiJfMD5tCLsenBYZE8td5XpO62TKGjb3K1+w/Lh4WjY1DCYvdQhai2XwUYW/He e5lP4zG1xCQG8uxsBHnfP2Fu/eJJ2FWGLoOOORD69pwfJXjR4HTE4bMFbLzTEIvJJJ mIVrWGKO9p1SxAQwN0yeXK04+lCGC5SVj5g9rp8Y= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: Jan Beulich To: binutils-cvs@sourceware.org Subject: [binutils-gdb] x86/APX: optimize {nf}-form rotate-by-width-less-1 X-Act-Checkin: binutils-gdb X-Git-Author: Jan Beulich X-Git-Refname: refs/heads/master X-Git-Oldrev: 0868b8999bbca960781e7d8bbbc363536193a694 X-Git-Newrev: c7eae03eab750f93b6460e883f25b71d46dd1c47 Message-Id: <20240628062021.14384388264E@sourceware.org> Date: Fri, 28 Jun 2024 06:20:21 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=3Dbinutils-gdb.git;h=3Dc7eae03eab75= 0f93b6460e883f25b71d46dd1c47 commit c7eae03eab750f93b6460e883f25b71d46dd1c47 Author: Jan Beulich Date: Fri Jun 28 08:19:32 2024 +0200 x86/APX: optimize {nf}-form rotate-by-width-less-1 =20 Unlike for the legacy forms, where there's a difference in the resulting EFLAGS.CF, for the NF variants the immediate can be got rid of in that case by switching to a 1-bit rotate in the opposite direction. Diff: --- gas/config/tc-i386.c | 22 ++++++++++++++++- gas/testsuite/gas/i386/x86-64-apx-nf-optimize.d | 32 +++++++++++++++++++++= ++++ gas/testsuite/gas/i386/x86-64-apx-nf.s | 20 ++++++++++++++++ opcodes/i386-opc.tbl | 22 ++++++++--------- opcodes/i386-tbl.h | 8 +++---- 5 files changed, 88 insertions(+), 16 deletions(-) diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index c51402a9ced..7ffacf7a769 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -4927,6 +4927,7 @@ optimize_encoding (void) } else if (!optimize_for_space && i.tm.base_opcode =3D=3D 0xd0 + && i.tm.extension_opcode =3D=3D 4 && (i.tm.opcode_space =3D=3D SPACE_BASE || i.tm.opcode_space =3D=3D SPACE_EVEXMAP4) && !i.mem_operands) @@ -4942,7 +4943,6 @@ optimize_encoding (void) shll $1, %rN, %rM -> addl %rN, %rN, %rM shlq $1, %rN, %rM -> addq %rN, %rN, %rM */ - gas_assert (i.tm.extension_opcode =3D=3D 4); i.tm.base_opcode =3D 0x00; i.tm.extension_opcode =3D None; if (i.operands >=3D 2) @@ -5403,6 +5403,26 @@ optimize_nf_encoding (void) i.imm_operands =3D 0; --i.operands; } + else if (i.tm.base_opcode =3D=3D 0xc0 + && i.op[0].imms->X_op =3D=3D O_constant + && i.op[0].imms->X_add_number + =3D=3D (i.types[i.operands - 1].bitfield.byte + || i.suffix =3D=3D BYTE_MNEM_SUFFIX + ? 7 : i.types[i.operands - 1].bitfield.word + || i.suffix =3D=3D WORD_MNEM_SUFFIX + ? 15 : 63 >> (i.types[i.operands - 1].bitfield.dword + || i.suffix =3D=3D LONG_MNEM_SUFFIX))) + { + /* Optimize: -O: + {nf} rol $osz-1, ... -> {nf} ror $1, ... + {nf} ror $osz-1, ... -> {nf} rol $1, ... + */ + gas_assert (i.tm.extension_opcode <=3D 1); + i.tm.extension_opcode ^=3D 1; + i.tm.base_opcode =3D 0xd0; + i.tm.operand_types[0].bitfield.imm1 =3D 1; + i.imm_operands =3D 0; + } } =20 static void diff --git a/gas/testsuite/gas/i386/x86-64-apx-nf-optimize.d b/gas/testsuit= e/gas/i386/x86-64-apx-nf-optimize.d index ac05943386f..10561692da3 100644 --- a/gas/testsuite/gas/i386/x86-64-apx-nf-optimize.d +++ b/gas/testsuite/gas/i386/x86-64-apx-nf-optimize.d @@ -1480,4 +1480,36 @@ Disassembly of section \.text: [ ]*[a-f0-9]+:[ ]*62 f4 74 1c ff 00[ ]+\{nf\} inc \(%rax\),%ecx [ ]*[a-f0-9]+:[ ]*62 f4 fc 0c ff 00[ ]+\{nf\} incq \(%rax\) [ ]*[a-f0-9]+:[ ]*62 f4 b4 1c ff 00[ ]+\{nf\} inc \(%rax\),%r9 +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c d0 ca[ ]+\{nf\} ror \$1,%dl +[ ]*[a-f0-9]+:[ ]*62 f4 7c 1c d0 ca[ ]+\{nf\} ror \$1,%dl,%al +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c d1 ca[ ]+\{nf\} ror \$1,%dx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c d1 ca[ ]+\{nf\} ror \$1,%dx,%ax +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c d1 ca[ ]+\{nf\} ror \$1,%edx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 1c d1 ca[ ]+\{nf\} ror \$1,%edx,%eax +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c d1 ca[ ]+\{nf\} ror \$1,%rdx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 1c d1 ca[ ]+\{nf\} ror \$1,%rdx,%rax +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c d0 0a[ ]+\{nf\} rorb \$1,\(%rdx\) +[ ]*[a-f0-9]+:[ ]*62 f4 7c 1c d0 0a[ ]+\{nf\} ror \$1,\(%rdx\),%al +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c d1 0a[ ]+\{nf\} rorw \$1,\(%rdx\) +[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c d1 0a[ ]+\{nf\} ror \$1,\(%rdx\),%ax +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c d1 0a[ ]+\{nf\} rorl \$1,\(%rdx\) +[ ]*[a-f0-9]+:[ ]*62 f4 7c 1c d1 0a[ ]+\{nf\} ror \$1,\(%rdx\),%eax +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c d1 0a[ ]+\{nf\} rorq \$1,\(%rdx\) +[ ]*[a-f0-9]+:[ ]*62 f4 fc 1c d1 0a[ ]+\{nf\} ror \$1,\(%rdx\),%rax +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c d0 c2[ ]+\{nf\} rol \$1,%dl +[ ]*[a-f0-9]+:[ ]*62 f4 7c 1c d0 c2[ ]+\{nf\} rol \$1,%dl,%al +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c d1 c2[ ]+\{nf\} rol \$1,%dx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c d1 c2[ ]+\{nf\} rol \$1,%dx,%ax +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c d1 c2[ ]+\{nf\} rol \$1,%edx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 1c d1 c2[ ]+\{nf\} rol \$1,%edx,%eax +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c d1 c2[ ]+\{nf\} rol \$1,%rdx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 1c d1 c2[ ]+\{nf\} rol \$1,%rdx,%rax +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c d0 02[ ]+\{nf\} rolb \$1,\(%rdx\) +[ ]*[a-f0-9]+:[ ]*62 f4 7c 1c d0 02[ ]+\{nf\} rol \$1,\(%rdx\),%al +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c d1 02[ ]+\{nf\} rolw \$1,\(%rdx\) +[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c d1 02[ ]+\{nf\} rol \$1,\(%rdx\),%ax +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c d1 02[ ]+\{nf\} roll \$1,\(%rdx\) +[ ]*[a-f0-9]+:[ ]*62 f4 7c 1c d1 02[ ]+\{nf\} rol \$1,\(%rdx\),%eax +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c d1 02[ ]+\{nf\} rolq \$1,\(%rdx\) +[ ]*[a-f0-9]+:[ ]*62 f4 fc 1c d1 02[ ]+\{nf\} rol \$1,\(%rdx\),%rax #pass diff --git a/gas/testsuite/gas/i386/x86-64-apx-nf.s b/gas/testsuite/gas/i38= 6/x86-64-apx-nf.s index acb4ba639a0..14f65267317 100644 --- a/gas/testsuite/gas/i386/x86-64-apx-nf.s +++ b/gas/testsuite/gas/i386/x86-64-apx-nf.s @@ -1433,3 +1433,23 @@ optimize: {nf} \op\()q $-1, (%rax) {nf} \op $-1, (%rax), %r9 .endr + + .irp dir, l, r + {nf} ro\dir $7, %dl + {nf} ro\dir $7, %dl, %al + {nf} ro\dir $15, %dx + {nf} ro\dir $15, %dx, %ax + {nf} ro\dir $31, %edx + {nf} ro\dir $31, %edx, %eax + {nf} ro\dir $63, %rdx + {nf} ro\dir $63, %rdx, %rax + + {nf} ro\dir\()b $7, (%rdx) + {nf} ro\dir $7, (%rdx), %al + {nf} ro\dir\()w $15, (%rdx) + {nf} ro\dir $15, (%rdx), %ax + {nf} ro\dir\()l $31, (%rdx) + {nf} ro\dir $31, (%rdx), %eax + {nf} ro\dir\()q $63, (%rdx) + {nf} ro\dir $63, (%rdx), %rax + .endr diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index 2715c5eb29c..bf4ecdb5a4a 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -446,22 +446,22 @@ imulzu, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|= EVexMap4|NF/*|ZU*/, { Imm16 =20
=20 - + =20 , 0xd0/, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVexM= ap4||, { Imm1, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex= , Reg8|Reg16|Reg32|Reg64 } , 0xd0/, 0, W|Modrm|No_sSuf|, { Imm1, Reg8|Reg16|Reg3= 2|Reg64|Unspecified|BaseIndex } , 0xd0/, APX_F, W|Modrm|No_sSuf|EVexMap4||, { = Imm1, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex } -, 0xc0/, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVexM= ap4|, { , Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg= 8|Reg16|Reg32|Reg64 } +, 0xc0/, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVexM= ap4||, { , Reg8|Reg16|Reg32|Reg64|Unspecified|Base= Index, Reg8|Reg16|Reg32|Reg64 } , 0xc0/, i186, W|Modrm|No_sSuf, { , Reg8|Reg16|Reg32|= Reg64|Unspecified|BaseIndex } -, 0xc0/, APX_F, W|Modrm|No_sSuf|EVexMap4|, { ,= Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex } +, 0xc0/, APX_F, W|Modrm|No_sSuf|EVexMap4||, { = , Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex } , 0xd2/, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVexM= ap4|, { ShiftCount, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Re= g8|Reg16|Reg32|Reg64 } , 0xd2/, 0, W|Modrm|No_sSuf, { ShiftCount, Reg8|Reg16|Reg32|Re= g64|Unspecified|BaseIndex } , 0xd2/, APX_F, W|Modrm|No_sSuf|EVexMap4|, { ShiftCount= , Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex } diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h index 17cf8dc4b4b..c69ecb1adf0 100644 --- a/opcodes/i386-tbl.h +++ b/opcodes/i386-tbl.h @@ -3652,7 +3652,7 @@ static const insn_template i386_optab[] =3D 0, 0, 0, 0, 1, 0 } } } }, { MN_rol, 0xc0, 3, SPACE_EVEXMAP4, 0, { 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 3, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0 }, { { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -3674,7 +3674,7 @@ static const insn_template i386_optab[] =3D 0, 0, 0, 0, 1, 0 } } } }, { MN_rol, 0xc0, 2, SPACE_EVEXMAP4, 0, { 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0 }, { { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -3756,7 +3756,7 @@ static const insn_template i386_optab[] =3D 0, 0, 0, 0, 1, 0 } } } }, { MN_ror, 0xc0, 3, SPACE_EVEXMAP4, 1, { 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 3, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0 }, { { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -3778,7 +3778,7 @@ static const insn_template i386_optab[] =3D 0, 0, 0, 0, 1, 0 } } } }, { MN_ror, 0xc0, 2, SPACE_EVEXMAP4, 1, { 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0 }, { { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },