From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 8702 invoked by alias); 18 Apr 2007 16:03:02 -0000 Received: (qmail 8689 invoked by uid 22791); 18 Apr 2007 16:03:00 -0000 X-Spam-Check-By: sourceware.org Received: from smtp105.sbc.mail.mud.yahoo.com (HELO smtp105.sbc.mail.mud.yahoo.com) (68.142.198.204) by sourceware.org (qpsmtpd/0.31) with SMTP; Wed, 18 Apr 2007 17:02:51 +0100 Received: (qmail 55602 invoked from network); 18 Apr 2007 16:02:49 -0000 Received: from unknown (HELO lucon.org) (hjjean@sbcglobal.net@75.61.81.46 with login) by smtp105.sbc.mail.mud.yahoo.com with SMTP; 18 Apr 2007 16:02:49 -0000 X-YMail-OSG: Le9IUewVM1mvyT.3qmmF1SqXTkMh9S5Lh8x1YRFL8vsXWyE4kmzJ4gXNxCf4BTg1AFHElnwVQa9v_4YUxWNOae7uidZlFRaX.klLQIpxGKFinU0234TBwTmTUKKKkA-- Received: by lucon.org (Postfix, from userid 500) id D89E046EEA9; Wed, 18 Apr 2007 09:02:47 -0700 (PDT) Date: Wed, 18 Apr 2007 17:15:00 -0000 From: "H. J. Lu" To: binutils@sources.redhat.com Subject: PATCH: Add SSE4.2 support Message-ID: <20070418160247.GB10097@lucon.org> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.4.2.2i Mailing-List: contact binutils-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: binutils-owner@sourceware.org X-SW-Source: 2007-04/txt/msg00235.txt.bz2 I am checkin in this patch for SSE4.2 support: http://www.intel.com/software/penryn H.J. ---- gas/ 2006-12-22 H.J. Lu * config/tc-i386.c (cpu_arch): Add .sse4.2 and .sse4. (match_template): Handle operand size for crc32 in SSE4.2. (process_suffix): Handle operand type for crc32 in SSE4.2. (output_insn): Support SSE4.2. gas/testsuite/ 2006-12-22 H.J. Lu * gas/i386/i386.exp: Add sse4.2 and x86-64-sse4.2. * gas/i386/sse4_2.d: New file. * gas/i386/sse4_2.s: Likewise. * gas/i386/x86-64-sse4_2.d: Likewise. * gas/i386/x86-64-sse4_2.s: Likewise. opcodes/ 2006-12-22 H.J. Lu * i386-dis.c (CRC32_Fixup): New. (PREGRP85, PREGRP86, PREGRP87, PREGRP88, PREGRP89, PREGRP90, PREGRP91): New. (threebyte_0x38_uses_DATA_prefix): Updated for SSE4.2. (threebyte_0x3a_uses_DATA_prefix): Likewise. (prefix_user_table): Add PREGRP85, PREGRP86, PREGRP87, PREGRP88, PREGRP89, PREGRP90 and PREGRP91. (three_byte_table): Likewise. * i386-opc.c (i386_optab): Add SSE4.2 opcodes. * gas/config/tc-i386.h (CpuSSE4_2): New. (CpuSSE4): Likewise. (CpuUnknownFlags): Add CpuSSE4_2. --- binutils/gas/config/tc-i386.c.sse42 2007-04-12 12:53:59.000000000 -0700 +++ binutils/gas/config/tc-i386.c 2007-04-12 12:53:59.000000000 -0700 @@ -500,6 +500,10 @@ static const arch_entry cpu_arch[] = CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3}, {".sse4.1", PROCESSOR_UNKNOWN, CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuSSE4_1}, + {".sse4.2", PROCESSOR_UNKNOWN, + CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuSSE4}, + {".sse4", PROCESSOR_UNKNOWN, + CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuSSE4}, {".3dnow", PROCESSOR_UNKNOWN, CpuMMX|Cpu3dnow}, {".3dnowa", PROCESSOR_UNKNOWN, @@ -2640,9 +2644,10 @@ match_template (void) || !MATCH (overlap1, i.types[1], operand_types[1]) /* monitor in SSE3 is a very special case. The first register and the second register may have different - sizes. */ + sizes. The same applies to crc32 in SSE4.2. */ || !((t->base_opcode == 0x0f01 && t->extension_opcode == 0xc8) + || t->base_opcode == 0xf20f38f1 || CONSISTENT_REGISTER_MATCH (overlap0, i.types[0], operand_types[0], overlap1, i.types[1], @@ -2829,19 +2834,30 @@ process_suffix (void) { /* We take i.suffix from the last register operand specified, Destination register type is more significant than source - register type. */ - int op; - - for (op = i.operands; --op >= 0;) - if ((i.types[op] & Reg) - && !(i.tm.operand_types[op] & InOutPortReg)) - { - i.suffix = ((i.types[op] & Reg8) ? BYTE_MNEM_SUFFIX : - (i.types[op] & Reg16) ? WORD_MNEM_SUFFIX : - (i.types[op] & Reg64) ? QWORD_MNEM_SUFFIX : + register type. crc32 in SSE4.2 prefers source register + type. */ + if (i.tm.base_opcode == 0xf20f38f1) + { + if ((i.types[0] & Reg)) + i.suffix = ((i.types[0] & Reg16) ? WORD_MNEM_SUFFIX : LONG_MNEM_SUFFIX); - break; - } + } + + if (!i.suffix) + { + int op; + + for (op = i.operands; --op >= 0;) + if ((i.types[op] & Reg) + && !(i.tm.operand_types[op] & InOutPortReg)) + { + i.suffix = ((i.types[op] & Reg8) ? BYTE_MNEM_SUFFIX : + (i.types[op] & Reg16) ? WORD_MNEM_SUFFIX : + (i.types[op] & Reg64) ? QWORD_MNEM_SUFFIX : + LONG_MNEM_SUFFIX); + break; + } + } } else if (i.suffix == BYTE_MNEM_SUFFIX) { @@ -3929,9 +3945,11 @@ output_insn (void) unsigned int prefix; /* All opcodes on i386 have either 1 or 2 bytes. SSSE3 and - SSE4.1 instructions have 3 bytes. We may use one more higher - byte to specify a prefix the instruction requires. */ - if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4_1)) != 0) + SSE4 instructions have 3 bytes. We may use one more higher + byte to specify a prefix the instruction requires. Exclude + instructions which are in both SSE4 and ABM. */ + if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4)) != 0 + && (i.tm.cpu_flags & CpuABM) == 0) { if (i.tm.base_opcode & 0xff000000) { @@ -3972,7 +3990,8 @@ output_insn (void) } else { - if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4_1)) != 0) + if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4)) != 0 + && (i.tm.cpu_flags & CpuABM) == 0) { p = frag_more (3); *p++ = (i.tm.base_opcode >> 16) & 0xff; --- binutils/gas/testsuite/gas/i386/i386.exp.sse42 2007-04-12 12:53:59.000000000 -0700 +++ binutils/gas/testsuite/gas/i386/i386.exp 2007-04-12 12:53:59.000000000 -0700 @@ -87,6 +87,7 @@ if [expr ([istarget "i*86-*-*"] || [ist run_dump_test "addr16" run_dump_test "addr32" run_dump_test "sse4_1" + run_dump_test "sse4_2" # These tests require support for 8 and 16 bit relocs, # so we only run them for ELF and COFF targets. @@ -173,6 +174,7 @@ if [expr ([istarget "i*86-*-*"] || [ista run_dump_test "x86-64-nops-1-nocona" run_dump_test "x86-64-nops-1-merom" run_dump_test "x86-64-sse4_1" + run_dump_test "x86-64-sse4_2" if { ![istarget "*-*-aix*"] && ![istarget "*-*-beos*"] --- binutils/gas/testsuite/gas/i386/sse4_2.d.sse42 2007-04-12 12:53:59.000000000 -0700 +++ binutils/gas/testsuite/gas/i386/sse4_2.d 2007-04-12 12:53:59.000000000 -0700 @@ -0,0 +1,37 @@ +#objdump: -dw +#name: i386 SSE4.2 + +.*: file format .* + +Disassembly of section .text: + +0+000 : +[ ]*[0-9a-f]+: f2 0f 38 f1 19 crc32l \(%ecx\),%ebx +[ ]*[0-9a-f]+: f2 0f 38 f0 d9 crc32b %cl,%ebx +[ ]*[0-9a-f]+: 66 f2 0f 38 f1 d9 crc32w %cx,%ebx +[ ]*[0-9a-f]+: f2 0f 38 f1 d9 crc32l %ecx,%ebx +[ ]*[0-9a-f]+: f2 0f 38 f0 19 crc32b \(%ecx\),%ebx +[ ]*[0-9a-f]+: 66 f2 0f 38 f1 19 crc32w \(%ecx\),%ebx +[ ]*[0-9a-f]+: f2 0f 38 f1 19 crc32l \(%ecx\),%ebx +[ ]*[0-9a-f]+: f2 0f 38 f0 d9 crc32b %cl,%ebx +[ ]*[0-9a-f]+: 66 f2 0f 38 f1 d9 crc32w %cx,%ebx +[ ]*[0-9a-f]+: f2 0f 38 f1 d9 crc32l %ecx,%ebx +[ ]*[0-9a-f]+: 66 0f 38 37 01 pcmpgtq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 37 c1 pcmpgtq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 61 01 00 pcmpestri \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 61 c1 00 pcmpestri \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 60 01 01 pcmpestrm \$0x1,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 60 c1 01 pcmpestrm \$0x1,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 63 01 02 pcmpistri \$0x2,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 63 c1 02 pcmpistri \$0x2,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 62 01 03 pcmpistrm \$0x3,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 62 c1 03 pcmpistrm \$0x3,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 f3 0f b8 19 popcnt \(%ecx\),%bx +[ ]*[0-9a-f]+: f3 0f b8 19 popcnt \(%ecx\),%ebx +[ ]*[0-9a-f]+: 66 f3 0f b8 19 popcnt \(%ecx\),%bx +[ ]*[0-9a-f]+: f3 0f b8 19 popcnt \(%ecx\),%ebx +[ ]*[0-9a-f]+: 66 f3 0f b8 d9 popcnt %cx,%bx +[ ]*[0-9a-f]+: f3 0f b8 d9 popcnt %ecx,%ebx +[ ]*[0-9a-f]+: 66 f3 0f b8 d9 popcnt %cx,%bx +[ ]*[0-9a-f]+: f3 0f b8 d9 popcnt %ecx,%ebx +#pass --- binutils/gas/testsuite/gas/i386/sse4_2.s.sse42 2007-04-12 12:53:59.000000000 -0700 +++ binutils/gas/testsuite/gas/i386/sse4_2.s 2007-04-12 12:53:59.000000000 -0700 @@ -0,0 +1,34 @@ +# Streaming SIMD extensions 4.2 Instructions + + .text +foo: + crc32 (%ecx),%ebx + crc32 %cl,%ebx + crc32 %cx,%ebx + crc32 %ecx,%ebx + crc32b (%ecx),%ebx + crc32w (%ecx),%ebx + crc32l (%ecx),%ebx + crc32b %cl,%ebx + crc32w %cx,%ebx + crc32l %ecx,%ebx + pcmpgtq (%ecx),%xmm0 + pcmpgtq %xmm1,%xmm0 + pcmpestri $0x0,(%ecx),%xmm0 + pcmpestri $0x0,%xmm1,%xmm0 + pcmpestrm $0x1,(%ecx),%xmm0 + pcmpestrm $0x1,%xmm1,%xmm0 + pcmpistri $0x2,(%ecx),%xmm0 + pcmpistri $0x2,%xmm1,%xmm0 + pcmpistrm $0x3,(%ecx),%xmm0 + pcmpistrm $0x3,%xmm1,%xmm0 + popcnt (%ecx),%bx + popcnt (%ecx),%ebx + popcntw (%ecx),%bx + popcntl (%ecx),%ebx + popcnt %cx,%bx + popcnt %ecx,%ebx + popcntw %cx,%bx + popcntl %ecx,%ebx + + .p2align 4,0 --- binutils/gas/testsuite/gas/i386/x86-64-sse4_2.d.sse42 2007-04-12 12:53:59.000000000 -0700 +++ binutils/gas/testsuite/gas/i386/x86-64-sse4_2.d 2007-04-12 12:53:59.000000000 -0700 @@ -0,0 +1,47 @@ +#objdump: -dw +#name: x86-64 SSE4.2 + +.*: file format .* + +Disassembly of section .text: + +0+000 : +[ ]*[0-9a-f]+: f2 0f 38 f1 19 crc32l \(%rcx\),%ebx +[ ]*[0-9a-f]+: f2 48 0f 38 f1 19 crc32q \(%rcx\),%rbx +[ ]*[0-9a-f]+: f2 0f 38 f0 d9 crc32b %cl,%ebx +[ ]*[0-9a-f]+: f2 48 0f 38 f0 d9 crc32b %cl,%rbx +[ ]*[0-9a-f]+: 66 f2 0f 38 f1 d9 crc32w %cx,%ebx +[ ]*[0-9a-f]+: f2 0f 38 f1 d9 crc32l %ecx,%ebx +[ ]*[0-9a-f]+: f2 48 0f 38 f1 d9 crc32q %rcx,%rbx +[ ]*[0-9a-f]+: f2 0f 38 f0 19 crc32b \(%rcx\),%ebx +[ ]*[0-9a-f]+: 66 f2 0f 38 f1 19 crc32w \(%rcx\),%ebx +[ ]*[0-9a-f]+: f2 0f 38 f1 19 crc32l \(%rcx\),%ebx +[ ]*[0-9a-f]+: f2 48 0f 38 f1 19 crc32q \(%rcx\),%rbx +[ ]*[0-9a-f]+: f2 0f 38 f0 d9 crc32b %cl,%ebx +[ ]*[0-9a-f]+: f2 48 0f 38 f0 d9 crc32b %cl,%rbx +[ ]*[0-9a-f]+: 66 f2 0f 38 f1 d9 crc32w %cx,%ebx +[ ]*[0-9a-f]+: f2 0f 38 f1 d9 crc32l %ecx,%ebx +[ ]*[0-9a-f]+: f2 48 0f 38 f1 d9 crc32q %rcx,%rbx +[ ]*[0-9a-f]+: 66 0f 38 37 01 pcmpgtq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 37 c1 pcmpgtq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 61 01 00 pcmpestri \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 61 c1 00 pcmpestri \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 60 01 01 pcmpestrm \$0x1,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 60 c1 01 pcmpestrm \$0x1,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 63 01 02 pcmpistri \$0x2,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 63 c1 02 pcmpistri \$0x2,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 62 01 03 pcmpistrm \$0x3,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 62 c1 03 pcmpistrm \$0x3,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 f3 0f b8 19 popcnt \(%rcx\),%bx +[ ]*[0-9a-f]+: f3 0f b8 19 popcnt \(%rcx\),%ebx +[ ]*[0-9a-f]+: f3 48 0f b8 19 popcnt \(%rcx\),%rbx +[ ]*[0-9a-f]+: 66 f3 0f b8 19 popcnt \(%rcx\),%bx +[ ]*[0-9a-f]+: f3 0f b8 19 popcnt \(%rcx\),%ebx +[ ]*[0-9a-f]+: f3 48 0f b8 19 popcnt \(%rcx\),%rbx +[ ]*[0-9a-f]+: 66 f3 0f b8 d9 popcnt %cx,%bx +[ ]*[0-9a-f]+: f3 0f b8 d9 popcnt %ecx,%ebx +[ ]*[0-9a-f]+: f3 48 0f b8 d9 popcnt %rcx,%rbx +[ ]*[0-9a-f]+: 66 f3 0f b8 d9 popcnt %cx,%bx +[ ]*[0-9a-f]+: f3 0f b8 d9 popcnt %ecx,%ebx +[ ]*[0-9a-f]+: f3 48 0f b8 d9 popcnt %rcx,%rbx +#pass --- binutils/gas/testsuite/gas/i386/x86-64-sse4_2.s.sse42 2007-04-12 12:53:59.000000000 -0700 +++ binutils/gas/testsuite/gas/i386/x86-64-sse4_2.s 2007-04-12 12:53:59.000000000 -0700 @@ -0,0 +1,44 @@ +# Streaming SIMD extensions 4.2 Instructions + + .text +foo: + crc32 (%rcx),%ebx + crc32 (%rcx),%rbx + crc32 %cl,%ebx + crc32 %cl,%rbx + crc32 %cx,%ebx + crc32 %ecx,%ebx + crc32 %rcx,%rbx + crc32b (%rcx),%ebx + crc32w (%rcx),%ebx + crc32l (%rcx),%ebx + crc32q (%rcx),%rbx + crc32b %cl,%ebx + crc32b %cl,%rbx + crc32w %cx,%ebx + crc32l %ecx,%ebx + crc32q %rcx,%rbx + pcmpgtq (%rcx),%xmm0 + pcmpgtq %xmm1,%xmm0 + pcmpestri $0x0,(%rcx),%xmm0 + pcmpestri $0x0,%xmm1,%xmm0 + pcmpestrm $0x1,(%rcx),%xmm0 + pcmpestrm $0x1,%xmm1,%xmm0 + pcmpistri $0x2,(%rcx),%xmm0 + pcmpistri $0x2,%xmm1,%xmm0 + pcmpistrm $0x3,(%rcx),%xmm0 + pcmpistrm $0x3,%xmm1,%xmm0 + popcnt (%rcx),%bx + popcnt (%rcx),%ebx + popcnt (%rcx),%rbx + popcntw (%rcx),%bx + popcntl (%rcx),%ebx + popcntq (%rcx),%rbx + popcnt %cx,%bx + popcnt %ecx,%ebx + popcnt %rcx,%rbx + popcntw %cx,%bx + popcntl %ecx,%ebx + popcntq %rcx,%rbx + + .p2align 4,0 --- binutils/opcodes/i386-dis.c.sse42 2007-04-12 12:53:59.000000000 -0700 +++ binutils/opcodes/i386-dis.c 2007-04-12 12:56:19.000000000 -0700 @@ -99,6 +99,7 @@ static void VMX_Fixup (int, int); static void REP_Fixup (int, int); static void CMPXCHG8B_Fixup (int, int); static void XMM_Fixup (int, int); +static void CRC32_Fixup (int, int); struct dis_private { /* Points to first byte not fetched. */ @@ -521,6 +522,13 @@ fetch_data (struct disassemble_info *inf #define PREGRP83 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 83 } } #define PREGRP84 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 84 } } #define PREGRP85 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 85 } } +#define PREGRP86 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 86 } } +#define PREGRP87 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 87 } } +#define PREGRP88 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 88 } } +#define PREGRP89 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 89 } } +#define PREGRP90 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 90 } } +#define PREGRP91 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 91 } } +#define PREGRP92 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 92 } } #define X86_64_0 NULL, { { NULL, X86_64_SPECIAL }, { NULL, 0 } } @@ -1287,7 +1295,7 @@ static const unsigned char threebyte_0x3 /* 00 */ 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0, /* 0f */ /* 10 */ 0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0, /* 1f */ /* 20 */ 1,1,1,1,1,1,0,0,1,1,1,1,0,0,0,0, /* 2f */ - /* 30 */ 1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1, /* 3f */ + /* 30 */ 1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1, /* 3f */ /* 40 */ 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 4f */ /* 50 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 5f */ /* 60 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 6f */ @@ -1323,7 +1331,7 @@ static const unsigned char threebyte_0x3 /* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */ /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */ /* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */ - /* f0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */ + /* f0 */ 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */ /* ------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; @@ -2471,6 +2479,62 @@ static const struct dis386 prefix_user_t { "mpsadbw", { XM, EX, Ib } }, { "(bad)", { XX } }, }, + + /* PREGRP86 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pcmpgtq", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP87 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "crc32", { Gdq, { CRC32_Fixup, b_mode } } }, + }, + + /* PREGRP88 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "crc32", { Gdq, { CRC32_Fixup, v_mode } } }, + }, + + /* PREGRP89 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pcmpestrm", { XM, EX, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP90 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pcmpestri", { XM, EX, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP91 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pcmpistrm", { XM, EX, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP92 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pcmpistri", { XM, EX, Ib } }, + { "(bad)", { XX } }, + }, }; static const struct dis386 x86_64_table[][2] = { @@ -2557,7 +2621,7 @@ static const struct dis386 three_byte_ta { PREGRP57 }, { PREGRP58 }, { "(bad)", { XX } }, - { "(bad)", { XX } }, + { PREGRP86 }, /* 38 */ { PREGRP59 }, { PREGRP60 }, @@ -2766,8 +2830,8 @@ static const struct dis386 three_byte_ta { "(bad)", { XX } }, { "(bad)", { XX } }, /* f0 */ - { "(bad)", { XX } }, - { "(bad)", { XX } }, + { PREGRP87 }, + { PREGRP88 }, { "(bad)", { XX } }, { "(bad)", { XX } }, { "(bad)", { XX } }, @@ -2895,10 +2959,10 @@ static const struct dis386 three_byte_ta { "(bad)", { XX } }, { "(bad)", { XX } }, /* 60 */ - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, + { PREGRP89 }, + { PREGRP90 }, + { PREGRP91 }, + { PREGRP92 }, { "(bad)", { XX } }, { "(bad)", { XX } }, { "(bad)", { XX } }, @@ -6247,3 +6311,65 @@ XMM_Fixup (int reg, int sizeflag ATTRIBU sprintf (scratchbuf, "%%xmm%d", reg); oappend (scratchbuf + intel_syntax); } + +static void +CRC32_Fixup (int bytemode, int sizeflag) +{ + /* Add proper suffix to "crc32". */ + char *p = obuf + strlen (obuf); + + switch (bytemode) + { + case b_mode: + *p++ = 'b'; + break; + case v_mode: + USED_REX (REX_W); + if (rex & REX_W) + *p++ = 'q'; + else if ((prefixes & PREFIX_DATA)) + { + *p++ = 'w'; + used_prefixes |= (prefixes & PREFIX_DATA); + } + else + *p++ = 'l'; + break; + default: + oappend (INTERNAL_DISASSEMBLER_ERROR); + break; + } + *p = '\0'; + + if (modrm.mod == 3) + { + int add; + + /* Skip mod/rm byte. */ + MODRM_CHECK; + codep++; + + USED_REX (REX_B); + add = (rex & REX_B) ? 8 : 0; + if (bytemode == b_mode) + { + USED_REX (0); + if (rex) + oappend (names8rex[modrm.rm + add]); + else + oappend (names8[modrm.rm + add]); + } + else + { + USED_REX (REX_W); + if (rex & REX_W) + oappend (names64[modrm.rm + add]); + else if ((prefixes & PREFIX_DATA)) + oappend (names16[modrm.rm + add]); + else + oappend (names32[modrm.rm + add]); + } + } + else + OP_E (v_mode, sizeflag); +} --- binutils/opcodes/i386-opc.c.sse42 2007-04-12 12:53:59.000000000 -0700 +++ binutils/opcodes/i386-opc.c 2007-04-12 12:53:59.000000000 -0700 @@ -1435,6 +1435,18 @@ const template i386_optab[] = {"roundsd", 3, 0x660f3a0b,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } }, {"roundss", 3, 0x660f3a0a,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LongMem, RegXMM } }, +/* Streaming SIMD extensions 4.2 Instructions. */ + +{"pcmpgtq", 2, 0x660f3837,X, CpuSSE4_2, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pcmpestri", 3, 0x660f3a61,X, CpuSSE4_2, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } }, +{"pcmpestrm", 3, 0x660f3a60,X, CpuSSE4_2, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } }, +{"pcmpistri", 3, 0x660f3a63,X, CpuSSE4_2, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } }, +{"pcmpistrm", 3, 0x660f3a62,X, CpuSSE4_2, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } }, +{"crc32b", 2, 0xf20f38f0,X, CpuSSE4_2, NoSuf|IgnoreSize|Modrm, { Reg8|ByteMem, Reg32|Reg64, 0 } }, +{"crc32", 2, 0xf20f38f0,X, CpuSSE4_2, NoSuf|IgnoreSize|Modrm, { Reg8, Reg32|Reg64, 0 } }, +{"crc32", 2, 0xf20f38f1,X, CpuSSE4_2, wl_Suf|Modrm, { WordReg|WordMem, Reg32, 0 } }, +{"crc32", 2, 0xf20f38f1,X, CpuSSE4_2|Cpu64, q_Suf|IgnoreSize|Modrm|Rex64, { Reg64|LLongMem, Reg64, 0 } }, + /* AMD 3DNow! instructions. */ {"prefetch", 1, 0x0f0d, 0, Cpu3dnow, NoSuf|IgnoreSize|Modrm, { ByteMem, 0, 0 } }, @@ -1497,7 +1509,7 @@ const template i386_optab[] = {"insertq", 4, 0xf20f78, X, CpuSSE4a, NoSuf|IgnoreSize|Modrm, { Imm8, Imm8, RegXMM, RegXMM} }, /* ABM instructions */ -{"popcnt", 2, 0xf30fb8, X, CpuABM, wlq_Suf|Modrm, { WordReg|WordMem, WordReg, 0} }, +{"popcnt", 2, 0xf30fb8, X, CpuABM|CpuSSE4_2, wlq_Suf|Modrm, { WordReg|WordMem, WordReg, 0} }, {"lzcnt", 2, 0xf30fbd, X, CpuABM, wlq_Suf|Modrm, { WordReg|WordMem, WordReg, 0} }, --- binutils/opcodes/i386-opc.h.sse42 2007-04-12 12:53:59.000000000 -0700 +++ binutils/opcodes/i386-opc.h 2007-04-12 12:53:59.000000000 -0700 @@ -70,6 +70,10 @@ typedef struct template #define CpuSSE4a 0x100000 /* SSE4a New Instuctions required */ #define CpuABM 0x200000 /* ABM New Instructions required */ #define CpuSSE4_1 0x400000 /* SSE4.1 Instructions required */ +#define CpuSSE4_2 0x800000 /* SSE4.2 Instructions required */ + +/* SSE4.1/4.2 Instructions required */ +#define CpuSSE4 (CpuSSE4_1|CpuSSE4_2) /* These flags are set by gas depending on the flag_code. */ #define Cpu64 0x4000000 /* 64bit support required */ @@ -79,7 +83,7 @@ typedef struct template #define CpuUnknownFlags (Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686 \ |CpuP4|CpuSledgehammer|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuVMX \ |Cpu3dnow|Cpu3dnowA|CpuK6|CpuPadLock|CpuSVME|CpuSSSE3|CpuSSE4_1 \ - |CpuABM|CpuSSE4a) + |CpuSSE4_2|CpuABM|CpuSSE4a) /* the bits in opcode_modifier are used to generate the final opcode from the base_opcode. These bits also are used to detect alternate forms of