From: Uros Bizjak <ubizjak@gmail.com>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Subject: [PATCH] i386: Introduce V2QImode vectorized logic [PR103861]
Date: Tue, 4 Jan 2022 19:46:50 +0100 [thread overview]
Message-ID: <CAFULd4bZTu5miELBtvwGWWSZX9z-xEvs4bCuC9ySD0C5VXYDNw@mail.gmail.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 845 bytes --]
Add V2QImode logic operations with SSE and GP registers and split
them to V4QImode SSE instructions or SImode GP instructions.
The patch also fixes PR target/103900.
2022-01-04 Uroš Bizjak <ubizjak@gmail.com>
gcc/ChangeLog:
PR target/103861
* config/i386/mmx.md (one_cmplv2qi3): New insn pattern.
(one_cmplv2qi3 splitters): New post-reload splitters.
(*andnotv2qi3): New insn pattern.
(andnotv2qi3 splitters): New post-reload splitters.
(<any_logic:code>v2qi3): New insn pattern.
(<any_logic:insn>v2qi3 splitters): New post-reload splitters.
gcc/testsuite/ChangeLog:
PR target/103861
* gcc.target/i386/warn-vect-op-2.c: Adjust warnings.
* gcc.target/i386/pr103900.c: New test.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Pushed to master.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 7244 bytes --]
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 5b33d3cfc1c..fc8ec5e4d49 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2745,6 +2745,45 @@
"TARGET_SSE2"
"operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));")
+(define_insn "one_cmplv2qi2"
+ [(set (match_operand:V2QI 0 "register_operand" "=r,&x,&v")
+ (not:V2QI
+ (match_operand:V2QI 1 "register_operand" "0,x,v")))]
+ ""
+ "#"
+ [(set_attr "isa" "*,sse2,avx512vl")
+ (set_attr "type" "negnot,sselog,sselog")
+ (set_attr "mode" "SI,TI,TI")])
+
+(define_split
+ [(set (match_operand:V2QI 0 "general_reg_operand")
+ (not:V2QI
+ (match_operand:V2QI 1 "general_reg_operand")))]
+ "reload_completed"
+ [(set (match_dup 0)
+ (not:SI (match_dup 1)))]
+{
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[0] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+ [(set (match_operand:V2QI 0 "sse_reg_operand")
+ (not:V2QI
+ (match_operand:V2QI 1 "sse_reg_operand")))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0)
+ (xor:V4QI
+ (match_dup 0) (match_dup 1)))]
+{
+ emit_insn
+ (gen_rtx_SET (gen_rtx_REG (V16QImode, REGNO (operands[0])),
+ CONSTM1_RTX (V16QImode)));
+
+ operands[1] = gen_lowpart (V4QImode, operands[1]);
+ operands[0] = gen_lowpart (V4QImode, operands[0]);
+})
+
(define_insn "mmx_andnot<mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
(and:MMXMODEI
@@ -2775,6 +2814,69 @@
(set_attr "type" "sselog")
(set_attr "mode" "TI")])
+(define_insn "*andnotv2qi3"
+ [(set (match_operand:V2QI 0 "register_operand" "=&r,r,x,x,v")
+ (and:V2QI
+ (not:V2QI (match_operand:V2QI 1 "register_operand" "0,r,0,x,v"))
+ (match_operand:V2QI 2 "register_operand" "r,r,x,x,v")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "#"
+ [(set_attr "isa" "*,bmi,sse2_noavx,avx,avx512vl")
+ (set_attr "type" "alu,bitmanip,sselog,sselog,sselog")
+ (set_attr "mode" "SI,SI,TI,TI,TI")])
+
+(define_split
+ [(set (match_operand:V2QI 0 "general_reg_operand")
+ (and:V2QI
+ (not:V2QI (match_operand:V2QI 1 "general_reg_operand"))
+ (match_operand:V2QI 2 "general_reg_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_BMI && reload_completed"
+ [(parallel
+ [(set (match_dup 0)
+ (and:SI (not:SI (match_dup 1)) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[0] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+ [(set (match_operand:V2QI 0 "general_reg_operand")
+ (and:V2QI
+ (not:V2QI (match_operand:V2QI 1 "general_reg_operand"))
+ (match_operand:V2QI 2 "general_reg_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_BMI && reload_completed"
+ [(set (match_dup 0)
+ (not:SI (match_dup 1)))
+ (parallel
+ [(set (match_dup 0)
+ (and:SI (match_dup 0) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[0] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+ [(set (match_operand:V2QI 0 "sse_reg_operand")
+ (and:V2QI
+ (not:V2QI (match_operand:V2QI 1 "sse_reg_operand"))
+ (match_operand:V2QI 2 "sse_reg_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0)
+ (and:V4QI (not:V4QI (match_dup 1)) (match_dup 2)))]
+{
+ operands[2] = gen_lowpart (V4QImode, operands[2]);
+ operands[1] = gen_lowpart (V4QImode, operands[1]);
+ operands[0] = gen_lowpart (V4QImode, operands[0]);
+})
+
(define_expand "mmx_<code><mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand")
(any_logic:MMXMODEI
@@ -2821,6 +2923,50 @@
(set_attr "type" "sselog")
(set_attr "mode" "TI")])
+(define_insn "<code>v2qi3"
+ [(set (match_operand:V2QI 0 "register_operand" "=r,x,x,v")
+ (any_logic:V2QI
+ (match_operand:V2QI 1 "register_operand" "%0,0,x,v")
+ (match_operand:V2QI 2 "register_operand" "r,x,x,v")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "#"
+ [(set_attr "isa" "*,sse2_noavx,avx,avx512vl")
+ (set_attr "type" "alu,sselog,sselog,sselog")
+ (set_attr "mode" "SI,TI,TI,TI")])
+
+(define_split
+ [(set (match_operand:V2QI 0 "general_reg_operand")
+ (any_logic:V2QI
+ (match_operand:V2QI 1 "general_reg_operand")
+ (match_operand:V2QI 2 "general_reg_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(parallel
+ [(set (match_dup 0)
+ (any_logic:SI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[0] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+ [(set (match_operand:V2QI 0 "sse_reg_operand")
+ (any_logic:V2QI
+ (match_operand:V2QI 1 "sse_reg_operand")
+ (match_operand:V2QI 2 "sse_reg_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0)
+ (any_logic:V4QI (match_dup 1) (match_dup 2)))]
+{
+ operands[2] = gen_lowpart (V4QImode, operands[2]);
+ operands[1] = gen_lowpart (V4QImode, operands[1]);
+ operands[0] = gen_lowpart (V4QImode, operands[0]);
+})
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel integral element swizzling
diff --git a/gcc/testsuite/gcc.target/i386/pr103900.c b/gcc/testsuite/gcc.target/i386/pr103900.c
new file mode 100644
index 00000000000..8793b492a05
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103900.c
@@ -0,0 +1,25 @@
+/* PR target/103900 */
+/* { dg-do compile } */
+/* { dg-options "-O -fno-tree-dce -fno-tree-dse" } */
+
+typedef unsigned char __attribute__((__vector_size__(2))) T;
+typedef unsigned char __attribute__((__vector_size__(32))) U;
+typedef int __attribute__((__vector_size__(64))) V;
+typedef int __attribute__((__vector_size__(32))) W;
+T foo0_v128u8_0;
+U foo0_v256u8_0;
+T foo0_v16u16_0;
+int foo0_v128u64_0, foo0_v512u64_0;
+
+void
+foo0() {
+ V v512u128_0;
+ T v16u8_0;
+ foo0_v128u64_0 += (short)v16u8_0;
+ T v16u8_1 = ~__builtin_shufflevector(foo0_v128u8_0, foo0_v256u8_0, 0, 5);
+ W v256u128_1;
+ V v512u8_r =
+ foo0_v512u64_0 + v512u128_0;
+ (union {U b;}){}.b + (U)v256u128_1;
+ T v16u8_r = v16u8_0 + v16u8_1 + foo0_v16u16_0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/warn-vect-op-2.c b/gcc/testsuite/gcc.target/i386/warn-vect-op-2.c
index 5e378b6bd04..4560f7070bb 100644
--- a/gcc/testsuite/gcc.target/i386/warn-vect-op-2.c
+++ b/gcc/testsuite/gcc.target/i386/warn-vect-op-2.c
@@ -14,7 +14,7 @@ int main (int argc, char *argv[])
v0 + v1, /* { dg-warning "expanded piecewise" } */
v0 - v1, /* { dg-warning "expanded piecewise" } */
v0 > v1, /* { dg-warning "expanded piecewise" } */
- v0 & v1, /* { dg-warning "expanded in parallel" } */
+ v0 & v1, /* { dg-warning "expanded piecewise" } */
__builtin_shuffle (v0, v1), /* { dg-warning "expanded piecewise" } */
__builtin_shuffle (v0, v1, v1) /* { dg-warning "expanded piecewise" } */
};
reply other threads:[~2022-01-04 18:47 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAFULd4bZTu5miELBtvwGWWSZX9z-xEvs4bCuC9ySD0C5VXYDNw@mail.gmail.com \
--to=ubizjak@gmail.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).