public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/redhat/heads/gcc-10-branch)] i386: Optimize {, v}{, p}movmsk{b, ps, pd} followed by sign extension [PR91824]
@ 2020-01-30 20:58 Jakub Jelinek
0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2020-01-30 20:58 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:d37c81f476c17d292943189335d745c3fb817b7d
commit d37c81f476c17d292943189335d745c3fb817b7d
Author: Jakub Jelinek <jakub@redhat.com>
Date: Thu Jan 30 09:41:00 2020 +0100
i386: Optimize {,v}{,p}movmsk{b,ps,pd} followed by sign extension [PR91824]
Some time ago, patterns were added to optimize move mask followed by zero
extension from 32 bits to 64 bit. As the testcase shows, the intrinsics
actually return int, not unsigned int, so it will happen quite often that
one actually needs sign extension instead of zero extension. Except for
vpmovmskb with 256-bit operand, sign vs. zero extension doesn't make a
difference, as we know the bit 31 will not be set (the source will have 2 or
4 doubles, 4 or 8 floats or 16 or 32 chars).
So, for the floating point patterns, this patch just uses a code iterator
so that we handle both zero extend and sign extend, and for the byte one
adds a separate pattern for the 128-bit operand.
2020-01-30 Jakub Jelinek <jakub@redhat.com>
PR target/91824
* config/i386/sse.md
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext): Renamed to ...
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext): ... this. Use
any_extend code iterator instead of always zero_extend.
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt): Renamed to ...
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt): ... this.
Use any_extend code iterator instead of always zero_extend.
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift): Renamed to ...
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift): ... this.
Use any_extend code iterator instead of always zero_extend.
(*sse2_pmovmskb_ext): New define_insn.
(*sse2_pmovmskb_ext_lt): New define_insn_and_split.
* gcc.target/i386/pr91824-2.c: New test.
Diff:
---
gcc/ChangeLog | 14 ++++++
gcc/config/i386/sse.md | 55 +++++++++++++++++++----
gcc/testsuite/ChangeLog | 3 ++
gcc/testsuite/gcc.target/i386/pr91824-2.c | 73 +++++++++++++++++++++++++++++++
4 files changed, 137 insertions(+), 8 deletions(-)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c86b9c2..4beb639 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,6 +1,20 @@
2020-01-30 Jakub Jelinek <jakub@redhat.com>
PR target/91824
+ * config/i386/sse.md
+ (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext): Renamed to ...
+ (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext): ... this. Use
+ any_extend code iterator instead of always zero_extend.
+ (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt): Renamed to ...
+ (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt): ... this.
+ Use any_extend code iterator instead of always zero_extend.
+ (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift): Renamed to ...
+ (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift): ... this.
+ Use any_extend code iterator instead of always zero_extend.
+ (*sse2_pmovmskb_ext): New define_insn.
+ (*sse2_pmovmskb_ext_lt): New define_insn_and_split.
+
+ PR target/91824
* config/i386/i386.md (*popcountsi2_zext): New define_insn_and_split.
(*popcountsi2_zext_falsedep): New define_insn.
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index abbd879..f5ff2e9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15815,9 +15815,9 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
-(define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
+(define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext"
[(set (match_operand:DI 0 "register_operand" "=r")
- (zero_extend:DI
+ (any_extend:DI
(unspec:SI
[(match_operand:VF_128_256 1 "register_operand" "x")]
UNSPEC_MOVMSK)))]
@@ -15844,9 +15844,9 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
-(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt"
+(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt"
[(set (match_operand:DI 0 "register_operand" "=r")
- (zero_extend:DI
+ (any_extend:DI
(unspec:SI
[(lt:VF_128_256
(match_operand:<sseintvecmode> 1 "register_operand" "x")
@@ -15856,7 +15856,7 @@
"#"
"&& reload_completed"
[(set (match_dup 0)
- (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
+ (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
"operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
[(set_attr "type" "ssemov")
(set_attr "prefix" "maybe_vex")
@@ -15880,9 +15880,9 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
-(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift"
+(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift"
[(set (match_operand:DI 0 "register_operand" "=r")
- (zero_extend:DI
+ (any_extend:DI
(unspec:SI
[(subreg:VF_128_256
(ashiftrt:<sseintvecmode>
@@ -15893,7 +15893,7 @@
"#"
"&& reload_completed"
[(set (match_dup 0)
- (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
+ (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
"operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
[(set_attr "type" "ssemov")
(set_attr "prefix" "maybe_vex")
@@ -15932,6 +15932,23 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
+(define_insn "*sse2_pmovmskb_ext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (sign_extend:DI
+ (unspec:SI
+ [(match_operand:V16QI 1 "register_operand" "x")]
+ UNSPEC_MOVMSK)))]
+ "TARGET_64BIT && TARGET_SSE2"
+ "%vpmovmskb\t{%1, %k0|%k0, %1}"
+ [(set_attr "type" "ssemov")
+ (set (attr "prefix_data16")
+ (if_then_else
+ (match_test "TARGET_AVX")
+ (const_string "*")
+ (const_string "1")))
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SI")])
+
(define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI
@@ -15975,6 +15992,28 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
+(define_insn_and_split "*sse2_pmovmskb_ext_lt"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (sign_extend:DI
+ (unspec:SI
+ [(lt:V16QI (match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "const0_operand" "C"))]
+ UNSPEC_MOVMSK)))]
+ "TARGET_64BIT && TARGET_SSE2"
+ "#"
+ ""
+ [(set (match_dup 0)
+ (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
+ ""
+ [(set_attr "type" "ssemov")
+ (set (attr "prefix_data16")
+ (if_then_else
+ (match_test "TARGET_AVX")
+ (const_string "*")
+ (const_string "1")))
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SI")])
+
(define_expand "sse2_maskmovdqu"
[(set (match_operand:V16QI 0 "memory_operand")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 9b36606..842e419 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,6 +1,9 @@
2020-01-30 Jakub Jelinek <jakub@redhat.com>
PR target/91824
+ * gcc.target/i386/pr91824-2.c: New test.
+
+ PR target/91824
* gcc.target/i386/pr91824-1.c: New test.
2020-01-30 Bin Cheng <bin.cheng@linux.alibaba.com>
diff --git a/gcc/testsuite/gcc.target/i386/pr91824-2.c b/gcc/testsuite/gcc.target/i386/pr91824-2.c
new file mode 100644
index 0000000..bdf1295
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr91824-2.c
@@ -0,0 +1,73 @@
+/* PR target/91824 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2" } */
+/* { dg-final { scan-assembler-not "cltq" } } */
+/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
+
+#include <x86intrin.h>
+
+unsigned long long
+f1 (__m128i x)
+{
+ return _mm_movemask_epi8 (x);
+}
+
+unsigned long long
+f2 (__m128i x)
+{
+ return (unsigned) _mm_movemask_epi8 (x);
+}
+
+unsigned long long
+f3 (__m128 x)
+{
+ return _mm_movemask_ps (x);
+}
+
+unsigned long long
+f4 (__m128 x)
+{
+ return (unsigned) _mm_movemask_ps (x);
+}
+
+unsigned long long
+f5 (__m128d x)
+{
+ return _mm_movemask_pd (x);
+}
+
+unsigned long long
+f6 (__m128d x)
+{
+ return (unsigned) _mm_movemask_pd (x);
+}
+
+unsigned long long
+f7 (__m256 x)
+{
+ return _mm256_movemask_ps (x);
+}
+
+unsigned long long
+f8 (__m256 x)
+{
+ return (unsigned) _mm256_movemask_ps (x);
+}
+
+unsigned long long
+f9 (__m256d x)
+{
+ return _mm256_movemask_pd (x);
+}
+
+unsigned long long
+f10 (__m256d x)
+{
+ return (unsigned) _mm256_movemask_pd (x);
+}
+
+unsigned long long
+f11 (__m256i x)
+{
+ return (unsigned) _mm256_movemask_epi8 (x);
+}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2020-01-30 20:58 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-30 20:58 [gcc(refs/vendors/redhat/heads/gcc-10-branch)] i386: Optimize {, v}{, p}movmsk{b, ps, pd} followed by sign extension [PR91824] Jakub Jelinek
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).