From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id 7F7953857811; Wed, 15 Sep 2021 09:26:39 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 7F7953857811 From: "crazylht at gmail dot com" To: gcc-bugs@gcc.gnu.org Subject: [Bug target/102327] gcc/config/i386/i386-expand.c:14678: Suspicious coding ? Date: Wed, 15 Sep 2021 09:26:39 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: changed X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: target X-Bugzilla-Version: 12.0 X-Bugzilla-Keywords: X-Bugzilla-Severity: normal X-Bugzilla-Who: crazylht at gmail dot com X-Bugzilla-Status: NEW X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: 12.0 X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: Message-ID: In-Reply-To: References: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 X-BeenThere: gcc-bugs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-bugs mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 15 Sep 2021 09:26:39 -0000 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D102327 --- Comment #3 from Hongtao.liu --- Here's optimization i did for v8hf/v16hf/v32hf vec_set/init/extract. diff --git a/origin.s b/after.s index e43e09c..5ea1fb6 100644 --- a/origin.s +++ b/after.s @@ -6,25 +6,13 @@ vec_init_v8hf: .LFB0: .cfi_startproc - vmovw %xmm0, %eax - vmovw %xmm2, %esi - vmovw %xmm4, %edx - vmovw %xmm6, %ecx - vmovd %eax, %xmm0 - vmovw %xmm1, %eax - vmovd %esi, %xmm1 - vmovd %ecx, %xmm2 - vpinsrw $1, %eax, %xmm0, %xmm0 - vmovw %xmm3, %eax - vpinsrw $1, %eax, %xmm1, %xmm3 - vmovw %xmm5, %eax - vmovd %edx, %xmm1 - vpinsrw $1, %eax, %xmm1, %xmm1 - vmovw %xmm7, %eax - vpunpckldq %xmm3, %xmm0, %xmm0 - vpinsrw $1, %eax, %xmm2, %xmm2 - vpunpckldq %xmm2, %xmm1, %xmm1 - vpunpcklqdq %xmm1, %xmm0, %xmm0 + vpunpcklwd %xmm1, %xmm0, %xmm0 + vpunpcklwd %xmm3, %xmm2, %xmm2 + vpunpcklwd %xmm5, %xmm4, %xmm4 + vpunpcklwd %xmm7, %xmm6, %xmm6 + vpunpckldq %xmm2, %xmm0, %xmm0 + vpunpckldq %xmm6, %xmm4, %xmm4 + vpunpcklqdq %xmm4, %xmm0, %xmm0 ret .cfi_endproc .LFE0: @@ -35,8 +23,7 @@ vec_init_v8hf: vec_extract_v8hf_4: .LFB1: .cfi_startproc - vpextrw $4, %xmm0, %eax - vmovw %eax, %xmm0 + vpsrldq $8, %xmm0, %xmm0 ret .cfi_endproc .LFE1: @@ -47,8 +34,7 @@ vec_extract_v8hf_4: vec_extract_v16hf_3: .LFB2: .cfi_startproc - vpextrw $3, %xmm0, %eax - vmovw %eax, %xmm0 + vpsrldq $6, %xmm0, %xmm0 ret .cfi_endproc .LFE2: @@ -71,8 +57,7 @@ vec_extract_v16hf_15: .LFB4: .cfi_startproc vextracti128 $0x1, %ymm0, %xmm0 - vpextrw $7, %xmm0, %eax - vmovw %eax, %xmm0 + vpsrldq $14, %xmm0, %xmm0 ret .cfi_endproc .LFE4: @@ -83,8 +68,7 @@ vec_extract_v16hf_15: vec_extract_v32hf_5: .LFB5: .cfi_startproc - vpextrw $5, %xmm0, %eax - vmovw %eax, %xmm0 + vpsrldq $10, %xmm0, %xmm0 ret .cfi_endproc .LFE5: @@ -107,8 +91,7 @@ vec_extract_v32hf_14: .LFB7: .cfi_startproc vextracti128 $0x1, %ymm0, %xmm0 - vpextrw $6, %xmm0, %eax - vmovw %eax, %xmm0 + vpsrldq $12, %xmm0, %xmm0 ret .cfi_endproc .LFE7: @@ -144,8 +127,7 @@ vec_extract_v32hf_28: .cfi_startproc vextracti64x4 $0x1, %zmm0, %ymm0 vextracti128 $0x1, %ymm0, %xmm0 - vpextrw $4, %xmm0, %eax - vmovw %eax, %xmm0 + vpsrldq $8, %xmm0, %xmm0 ret .cfi_endproc .LFE10: @@ -156,8 +138,8 @@ vec_extract_v32hf_28: vec_set_v8hf_4: .LFB11: .cfi_startproc - vmovw %xmm1, %eax - vpinsrw $4, %eax, %xmm0, %xmm0 + vpbroadcastw %xmm1, %xmm1 + vpblendw $16, %xmm1, %xmm0, %xmm0 ret .cfi_endproc .LFE11: @@ -168,9 +150,9 @@ vec_set_v8hf_4: vec_set_v16hf_3: .LFB12: .cfi_startproc - vmovw %xmm1, %eax - vpinsrw $3, %eax, %xmm0, %xmm2 - vinserti128 $0x0, %xmm2, %ymm0, %ymm0 + vpbroadcastw %xmm1, %ymm1 + vpblendw $8, %ymm1, %ymm0, %ymm1 + vpblendd $15, %ymm1, %ymm0, %ymm0 ret .cfi_endproc .LFE12: @@ -181,9 +163,9 @@ vec_set_v16hf_3: vec_set_v16hf_8: .LFB13: .cfi_startproc - vextracti128 $0x1, %ymm0, %xmm2 - vmovsh %xmm1, %xmm2, %xmm2 - vinserti128 $0x1, %xmm2, %ymm0, %ymm0 + vpbroadcastw %xmm1, %ymm1 + vpblendw $1, %ymm1, %ymm0, %ymm1 + vpblendd $240, %ymm1, %ymm0, %ymm0 ret .cfi_endproc .LFE13: @@ -194,10 +176,9 @@ vec_set_v16hf_8: vec_set_v16hf_15: .LFB14: .cfi_startproc - vextracti128 $0x1, %ymm0, %xmm2 - vmovw %xmm1, %eax - vpinsrw $7, %eax, %xmm2, %xmm2 - vinserti128 $0x1, %xmm2, %ymm0, %ymm0 + vpbroadcastw %xmm1, %ymm1 + vpblendw $128, %ymm1, %ymm0, %ymm1 + vpblendd $240, %ymm1, %ymm0, %ymm0 ret .cfi_endproc .LFE14: @@ -208,7 +189,8 @@ vec_set_v16hf_15: vec_set_v32hf_5: .LFB15: .cfi_startproc - kmovd .LC0(%rip), %k1 + movl $32, %eax + kmovd %eax, %k1 vpbroadcastw %xmm1, %zmm0{%k1} ret .cfi_endproc @@ -220,7 +202,8 @@ vec_set_v32hf_5: vec_set_v32hf_8: .LFB16: .cfi_startproc - kmovd .LC1(%rip), %k1 + movl $256, %eax + kmovd %eax, %k1 vpbroadcastw %xmm1, %zmm0{%k1} ret .cfi_endproc @@ -232,7 +215,8 @@ vec_set_v32hf_8: vec_set_v32hf_14: .LFB17: .cfi_startproc - kmovd .LC2(%rip), %k1 + movl $16384, %eax + kmovd %eax, %k1 vpbroadcastw %xmm1, %zmm0{%k1} ret .cfi_endproc @@ -244,7 +228,8 @@ vec_set_v32hf_14: vec_set_v32hf_16: .LFB18: .cfi_startproc - kmovd .LC3(%rip), %k1 + movl $65536, %eax + kmovd %eax, %k1 vpbroadcastw %xmm1, %zmm0{%k1} ret .cfi_endproc @@ -256,7 +241,8 @@ vec_set_v32hf_16: vec_set_v32hf_24: .LFB19: .cfi_startproc - kmovd .LC4(%rip), %k1 + movl $16777216, %eax + kmovd %eax, %k1 vpbroadcastw %xmm1, %zmm0{%k1} ret .cfi_endproc @@ -268,30 +254,12 @@ vec_set_v32hf_24: vec_set_v32hf_28: .LFB20: .cfi_startproc - kmovd .LC5(%rip), %k1 + movl $268435456, %eax + kmovd %eax, %k1 vpbroadcastw %xmm1, %zmm0{%k1} ret .cfi_endproc .LFE20: .size vec_set_v32hf_28, .-vec_set_v32hf_28 - .section .rodata.cst4,"aM",@progbits,4 - .align 4 -.LC0: - .long 32 - .align 4 -.LC1: - .long 256 - .align 4 -.LC2: - .long 16384 - .align 4 -.LC3: - .long 65536 - .align 4 -.LC4: - .long 16777216 - .align 4 -.LC5: - .long 268435456 .ident "GCC: (GNU) 12.0.0 20210913 (experimental)" .section .note.GNU-stack,"",@progbits=