From: Jan Beulich <jbeulich@suse.com>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Cc: Hongtao Liu <hongtao.liu@intel.com>,
Kirill Yukhin <kirill.yukhin@gmail.com>
Subject: [PATCH 4/5] x86: further PR target/100711-like splitting
Date: Wed, 21 Jun 2023 08:27:51 +0200 [thread overview]
Message-ID: <b2607ae7-045a-d1bc-2cc8-d2f114677cb6@suse.com> (raw)
In-Reply-To: <04f99abe-a563-d093-23b7-4abf0f91633d@suse.com>
With respective two-operand bitwise operations now expressable by a
single VPTERNLOG, add splitters to also deal with ior and xor
counterparts of the original and-only case. Note that the splitters need
to be separate, as the placement of "not" differs in the final insns
(*iornot<mode>3, *xnor<mode>3) which are intended to pick up one half of
the result.
gcc/
* config/i386/sse.md: New splitters to simplify
not;vec_duplicate;{ior,xor} as vec_duplicate;{iornot,xnor}.
gcc/testsuite/
* gcc.target/i386/pr100711-4.c: New test.
* gcc.target/i386/pr100711-5.c: New test.
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17366,6 +17366,36 @@
(match_dup 2)))]
"operands[3] = gen_reg_rtx (<MODE>mode);")
+(define_split
+ [(set (match_operand:VI 0 "register_operand")
+ (ior:VI
+ (vec_duplicate:VI
+ (not:<ssescalarmode>
+ (match_operand:<ssescalarmode> 1 "nonimmediate_operand")))
+ (match_operand:VI 2 "vector_operand")))]
+ "<MODE_SIZE> == 64 || TARGET_AVX512VL
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
+ [(set (match_dup 3)
+ (vec_duplicate:VI (match_dup 1)))
+ (set (match_dup 0)
+ (ior:VI (not:VI (match_dup 3)) (match_dup 2)))]
+ "operands[3] = gen_reg_rtx (<MODE>mode);")
+
+(define_split
+ [(set (match_operand:VI 0 "register_operand")
+ (xor:VI
+ (vec_duplicate:VI
+ (not:<ssescalarmode>
+ (match_operand:<ssescalarmode> 1 "nonimmediate_operand")))
+ (match_operand:VI 2 "vector_operand")))]
+ "<MODE_SIZE> == 64 || TARGET_AVX512VL
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
+ [(set (match_dup 3)
+ (vec_duplicate:VI (match_dup 1)))
+ (set (match_dup 0)
+ (not:VI (xor:VI (match_dup 3) (match_dup 2))))]
+ "operands[3] = gen_reg_rtx (<MODE>mode);")
+
(define_insn "*andnot<mode>3_mask"
[(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
(vec_merge:VI48_AVX512VL
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100711-4.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+
+typedef char v64qi __attribute__ ((vector_size (64)));
+typedef short v32hi __attribute__ ((vector_size (64)));
+typedef int v16si __attribute__ ((vector_size (64)));
+typedef long long v8di __attribute__((vector_size (64)));
+
+v64qi foo_v64qi (char a, v64qi b)
+{
+ return (__extension__ (v64qi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b;
+}
+
+v32hi foo_v32hi (short a, v32hi b)
+{
+ return (__extension__ (v32hi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b;
+}
+
+v16si foo_v16si (int a, v16si b)
+{
+ return (__extension__ (v16si) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b;
+}
+
+v8di foo_v8di (long long a, v8di b)
+{
+ return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b;
+}
+
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xbb" 4 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xbb" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xdd" 2 { target { ia32 } } } } */
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100711-5.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+
+typedef char v64qi __attribute__ ((vector_size (64)));
+typedef short v32hi __attribute__ ((vector_size (64)));
+typedef int v16si __attribute__ ((vector_size (64)));
+typedef long long v8di __attribute__((vector_size (64)));
+
+v64qi foo_v64qi (char a, v64qi b)
+{
+ return (__extension__ (v64qi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b;
+}
+
+v32hi foo_v32hi (short a, v32hi b)
+{
+ return (__extension__ (v32hi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b;
+}
+
+v16si foo_v16si (int a, v16si b)
+{
+ return (__extension__ (v16si) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b;
+}
+
+v8di foo_v8di (long long a, v8di b)
+{
+ return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b;
+}
+
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x99" 4 } } */
next prev parent reply other threads:[~2023-06-21 6:27 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-21 6:24 [PATCH 0/5] x86: make better use of VPTERNLOG{D,Q} Jan Beulich
2023-06-21 6:25 ` [PATCH 1/5] x86: use VPTERNLOG for further bitwise two-vector operations Jan Beulich
2023-06-25 4:42 ` Hongtao Liu
2023-06-25 5:52 ` Jan Beulich
2023-06-25 7:13 ` Hongtao Liu
2023-06-25 7:23 ` Hongtao Liu
2023-06-25 7:30 ` Hongtao Liu
2023-06-25 13:35 ` Jan Beulich
2023-06-26 0:42 ` Hongtao Liu
2023-06-21 6:27 ` [PATCH 2/5] x86: use VPTERNLOG also for certain andnot forms Jan Beulich
2023-06-25 4:58 ` Hongtao Liu
2023-06-21 6:27 ` [PATCH 3/5] x86: allow memory operand for AVX2 splitter for PR target/100711 Jan Beulich
2023-06-25 4:58 ` Hongtao Liu
2023-06-21 6:27 ` Jan Beulich [this message]
2023-06-25 5:06 ` [PATCH 4/5] x86: further PR target/100711-like splitting Hongtao Liu
2023-06-25 6:16 ` Jan Beulich
2023-06-25 6:27 ` Hongtao Liu
2023-06-21 6:28 ` [PATCH 5/5] x86: yet more " Jan Beulich
2023-06-25 5:12 ` Hongtao Liu
2023-06-25 6:25 ` Jan Beulich
2023-06-25 6:35 ` Hongtao Liu
2023-06-25 6:41 ` Hongtao Liu
2023-11-06 11:10 ` Jan Beulich
2023-11-06 13:48 ` Hongtao Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=b2607ae7-045a-d1bc-2cc8-d2f114677cb6@suse.com \
--to=jbeulich@suse.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=hongtao.liu@intel.com \
--cc=kirill.yukhin@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).