public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
From: "crazylht at gmail dot com" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug target/102327] gcc/config/i386/i386-expand.c:14678: Suspicious coding ?
Date: Wed, 15 Sep 2021 09:26:39 +0000	[thread overview]
Message-ID: <bug-102327-4-ItB8zVS7I8@http.gcc.gnu.org/bugzilla/> (raw)
In-Reply-To: <bug-102327-4@http.gcc.gnu.org/bugzilla/>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102327

--- Comment #3 from Hongtao.liu <crazylht at gmail dot com> ---
Here's optimization i did for v8hf/v16hf/v32hf vec_set/init/extract.

diff --git a/origin.s b/after.s
index e43e09c..5ea1fb6 100644
--- a/origin.s
+++ b/after.s
@@ -6,25 +6,13 @@
 vec_init_v8hf:
 .LFB0:
        .cfi_startproc
-       vmovw   %xmm0, %eax
-       vmovw   %xmm2, %esi
-       vmovw   %xmm4, %edx
-       vmovw   %xmm6, %ecx
-       vmovd   %eax, %xmm0
-       vmovw   %xmm1, %eax
-       vmovd   %esi, %xmm1
-       vmovd   %ecx, %xmm2
-       vpinsrw $1, %eax, %xmm0, %xmm0
-       vmovw   %xmm3, %eax
-       vpinsrw $1, %eax, %xmm1, %xmm3
-       vmovw   %xmm5, %eax
-       vmovd   %edx, %xmm1
-       vpinsrw $1, %eax, %xmm1, %xmm1
-       vmovw   %xmm7, %eax
-       vpunpckldq      %xmm3, %xmm0, %xmm0
-       vpinsrw $1, %eax, %xmm2, %xmm2
-       vpunpckldq      %xmm2, %xmm1, %xmm1
-       vpunpcklqdq     %xmm1, %xmm0, %xmm0
+       vpunpcklwd      %xmm1, %xmm0, %xmm0
+       vpunpcklwd      %xmm3, %xmm2, %xmm2
+       vpunpcklwd      %xmm5, %xmm4, %xmm4
+       vpunpcklwd      %xmm7, %xmm6, %xmm6
+       vpunpckldq      %xmm2, %xmm0, %xmm0
+       vpunpckldq      %xmm6, %xmm4, %xmm4
+       vpunpcklqdq     %xmm4, %xmm0, %xmm0
        ret
        .cfi_endproc
 .LFE0:
@@ -35,8 +23,7 @@ vec_init_v8hf:
 vec_extract_v8hf_4:
 .LFB1:
        .cfi_startproc
-       vpextrw $4, %xmm0, %eax
-       vmovw   %eax, %xmm0
+       vpsrldq $8, %xmm0, %xmm0
        ret
        .cfi_endproc
 .LFE1:
@@ -47,8 +34,7 @@ vec_extract_v8hf_4:
 vec_extract_v16hf_3:
 .LFB2:
        .cfi_startproc
-       vpextrw $3, %xmm0, %eax
-       vmovw   %eax, %xmm0
+       vpsrldq $6, %xmm0, %xmm0
        ret
        .cfi_endproc
 .LFE2:
@@ -71,8 +57,7 @@ vec_extract_v16hf_15:
 .LFB4:
        .cfi_startproc
        vextracti128    $0x1, %ymm0, %xmm0
-       vpextrw $7, %xmm0, %eax
-       vmovw   %eax, %xmm0
+       vpsrldq $14, %xmm0, %xmm0
        ret
        .cfi_endproc
 .LFE4:
@@ -83,8 +68,7 @@ vec_extract_v16hf_15:
 vec_extract_v32hf_5:
 .LFB5:
        .cfi_startproc
-       vpextrw $5, %xmm0, %eax
-       vmovw   %eax, %xmm0
+       vpsrldq $10, %xmm0, %xmm0
        ret
        .cfi_endproc
 .LFE5:
@@ -107,8 +91,7 @@ vec_extract_v32hf_14:
 .LFB7:
        .cfi_startproc
        vextracti128    $0x1, %ymm0, %xmm0
-       vpextrw $6, %xmm0, %eax
-       vmovw   %eax, %xmm0
+       vpsrldq $12, %xmm0, %xmm0
        ret
        .cfi_endproc
 .LFE7:
@@ -144,8 +127,7 @@ vec_extract_v32hf_28:
        .cfi_startproc
        vextracti64x4   $0x1, %zmm0, %ymm0
        vextracti128    $0x1, %ymm0, %xmm0
-       vpextrw $4, %xmm0, %eax
-       vmovw   %eax, %xmm0
+       vpsrldq $8, %xmm0, %xmm0
        ret
        .cfi_endproc
 .LFE10:
@@ -156,8 +138,8 @@ vec_extract_v32hf_28:
 vec_set_v8hf_4:
 .LFB11:
        .cfi_startproc
-       vmovw   %xmm1, %eax
-       vpinsrw $4, %eax, %xmm0, %xmm0
+       vpbroadcastw    %xmm1, %xmm1
+       vpblendw        $16, %xmm1, %xmm0, %xmm0
        ret
        .cfi_endproc
 .LFE11:
@@ -168,9 +150,9 @@ vec_set_v8hf_4:
 vec_set_v16hf_3:
 .LFB12:
        .cfi_startproc
-       vmovw   %xmm1, %eax
-       vpinsrw $3, %eax, %xmm0, %xmm2
-       vinserti128     $0x0, %xmm2, %ymm0, %ymm0
+       vpbroadcastw    %xmm1, %ymm1
+       vpblendw        $8, %ymm1, %ymm0, %ymm1
+       vpblendd        $15, %ymm1, %ymm0, %ymm0
        ret
        .cfi_endproc
 .LFE12:
@@ -181,9 +163,9 @@ vec_set_v16hf_3:
 vec_set_v16hf_8:
 .LFB13:
        .cfi_startproc
-       vextracti128    $0x1, %ymm0, %xmm2
-       vmovsh  %xmm1, %xmm2, %xmm2
-       vinserti128     $0x1, %xmm2, %ymm0, %ymm0
+       vpbroadcastw    %xmm1, %ymm1
+       vpblendw        $1, %ymm1, %ymm0, %ymm1
+       vpblendd        $240, %ymm1, %ymm0, %ymm0
        ret
        .cfi_endproc
 .LFE13:
@@ -194,10 +176,9 @@ vec_set_v16hf_8:
 vec_set_v16hf_15:
 .LFB14:
        .cfi_startproc
-       vextracti128    $0x1, %ymm0, %xmm2
-       vmovw   %xmm1, %eax
-       vpinsrw $7, %eax, %xmm2, %xmm2
-       vinserti128     $0x1, %xmm2, %ymm0, %ymm0
+       vpbroadcastw    %xmm1, %ymm1
+       vpblendw        $128, %ymm1, %ymm0, %ymm1
+       vpblendd        $240, %ymm1, %ymm0, %ymm0
        ret
        .cfi_endproc
 .LFE14:
@@ -208,7 +189,8 @@ vec_set_v16hf_15:
 vec_set_v32hf_5:
 .LFB15:
        .cfi_startproc
-       kmovd   .LC0(%rip), %k1
+       movl    $32, %eax
+       kmovd   %eax, %k1
        vpbroadcastw    %xmm1, %zmm0{%k1}
        ret
        .cfi_endproc
@@ -220,7 +202,8 @@ vec_set_v32hf_5:
 vec_set_v32hf_8:
 .LFB16:
        .cfi_startproc
-       kmovd   .LC1(%rip), %k1
+       movl    $256, %eax
+       kmovd   %eax, %k1
        vpbroadcastw    %xmm1, %zmm0{%k1}
        ret
        .cfi_endproc
@@ -232,7 +215,8 @@ vec_set_v32hf_8:
 vec_set_v32hf_14:
 .LFB17:
        .cfi_startproc
-       kmovd   .LC2(%rip), %k1
+       movl    $16384, %eax
+       kmovd   %eax, %k1
        vpbroadcastw    %xmm1, %zmm0{%k1}
        ret
        .cfi_endproc
@@ -244,7 +228,8 @@ vec_set_v32hf_14:
 vec_set_v32hf_16:
 .LFB18:
        .cfi_startproc
-       kmovd   .LC3(%rip), %k1
+       movl    $65536, %eax
+       kmovd   %eax, %k1
        vpbroadcastw    %xmm1, %zmm0{%k1}
        ret
        .cfi_endproc
@@ -256,7 +241,8 @@ vec_set_v32hf_16:
 vec_set_v32hf_24:
 .LFB19:
        .cfi_startproc
-       kmovd   .LC4(%rip), %k1
+       movl    $16777216, %eax
+       kmovd   %eax, %k1
        vpbroadcastw    %xmm1, %zmm0{%k1}
        ret
        .cfi_endproc
@@ -268,30 +254,12 @@ vec_set_v32hf_24:
 vec_set_v32hf_28:
 .LFB20:
        .cfi_startproc
-       kmovd   .LC5(%rip), %k1
+       movl    $268435456, %eax
+       kmovd   %eax, %k1
        vpbroadcastw    %xmm1, %zmm0{%k1}
        ret
        .cfi_endproc
 .LFE20:
        .size   vec_set_v32hf_28, .-vec_set_v32hf_28
-       .section        .rodata.cst4,"aM",@progbits,4
-       .align 4
-.LC0:
-       .long   32
-       .align 4
-.LC1:
-       .long   256
-       .align 4
-.LC2:
-       .long   16384
-       .align 4
-.LC3:
-       .long   65536
-       .align 4
-.LC4:
-       .long   16777216
-       .align 4
-.LC5:
-       .long   268435456
        .ident  "GCC: (GNU) 12.0.0 20210913 (experimental)"
        .section        .note.GNU-stack,"",@progbits

  parent reply	other threads:[~2021-09-15  9:26 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-14 15:00 [Bug target/102327] New: " dcb314 at hotmail dot com
2021-09-14 15:01 ` [Bug target/102327] " dcb314 at hotmail dot com
2021-09-14 16:05 ` hjl.tools at gmail dot com
2021-09-15  1:10 ` crazylht at gmail dot com
2021-09-15  9:26 ` crazylht at gmail dot com [this message]
2021-09-15 10:49 ` cvs-commit at gcc dot gnu.org
2021-09-15 10:49 ` crazylht at gmail dot com
2021-09-15 20:39 ` pinskia at gcc dot gnu.org

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-102327-4-ItB8zVS7I8@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).