From: Jan Beulich <jbeulich@suse.com>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Cc: Hongtao Liu <hongtao.liu@intel.com>,
Kirill Yukhin <kirill.yukhin@gmail.com>
Subject: [PATCH 1/5] x86: use VPTERNLOG for further bitwise two-vector operations
Date: Wed, 21 Jun 2023 08:25:52 +0200 [thread overview]
Message-ID: <457ffad0-9ecd-3e19-f5ab-6153ce4b8bad@suse.com> (raw)
In-Reply-To: <04f99abe-a563-d093-23b7-4abf0f91633d@suse.com>
All combinations of and, ior, xor, and not involving two operands can be
expressed that way in a single insn.
gcc/
PR target/93768
* config/i386/i386.cc (ix86_rtx_costs): Further special-case
bitwise vector operations.
* config/i386/sse.md (*iornot<mode>3): New insn.
(*xnor<mode>3): Likewise.
(*<nlogic><mode>3): Likewise.
(andor): New code iterator.
(nlogic): New code attribute.
(ternlog_nlogic): Likewise.
gcc/testsuite/
PR target/93768
gcc.target/i386/avx512-binop-not-1.h: New.
gcc.target/i386/avx512-binop-not-2.h: New.
gcc.target/i386/avx512f-orn-si-zmm-1.c: New test.
gcc.target/i386/avx512f-orn-si-zmm-2.c: New test.
---
The use of VI matches that in e.g. one_cmpl<mode>2 /
<mask_codefor>one_cmpl<mode>2<mask_name> and *andnot<mode>3, despite
(here and there)
- V64QI and V32HI being needlessly excluded when AVX512BW isn't enabled,
- V<n>TI not being covered,
- vector modes more narrow than 16 bytes not being covered.
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -21178,6 +21178,32 @@ ix86_rtx_costs (rtx x, machine_mode mode
return false;
case IOR:
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+ {
+ /* (ior (not ...) ...) can be a single insn in AVX512. */
+ if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
+ && (GET_MODE_SIZE (mode) == 64
+ || (TARGET_AVX512VL
+ && (GET_MODE_SIZE (mode) == 32
+ || GET_MODE_SIZE (mode) == 16))))
+ {
+ rtx right = GET_CODE (XEXP (x, 1)) != NOT
+ ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
+
+ *total = ix86_vec_cost (mode, cost->sse_op)
+ + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
+ outer_code, opno, speed)
+ + rtx_cost (right, mode, outer_code, opno, speed);
+ return true;
+ }
+ *total = ix86_vec_cost (mode, cost->sse_op);
+ }
+ else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+ *total = cost->add * 2;
+ else
+ *total = cost->add;
+ return false;
+
case XOR:
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
*total = ix86_vec_cost (mode, cost->sse_op);
@@ -21198,11 +21224,20 @@ ix86_rtx_costs (rtx x, machine_mode mode
/* pandn is a single instruction. */
if (GET_CODE (XEXP (x, 0)) == NOT)
{
+ rtx right = XEXP (x, 1);
+
+ /* (and (not ...) (not ...)) can be a single insn in AVX512. */
+ if (GET_CODE (right) == NOT && TARGET_AVX512F
+ && (GET_MODE_SIZE (mode) == 64
+ || (TARGET_AVX512VL
+ && (GET_MODE_SIZE (mode) == 32
+ || GET_MODE_SIZE (mode) == 16))))
+ right = XEXP (right, 0);
+
*total = ix86_vec_cost (mode, cost->sse_op)
+ rtx_cost (XEXP (XEXP (x, 0), 0), mode,
outer_code, opno, speed)
- + rtx_cost (XEXP (x, 1), mode,
- outer_code, opno, speed);
+ + rtx_cost (right, mode, outer_code, opno, speed);
return true;
}
else if (GET_CODE (XEXP (x, 1)) == NOT)
@@ -21260,8 +21295,25 @@ ix86_rtx_costs (rtx x, machine_mode mode
case NOT:
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
- // vnot is pxor -1.
- *total = ix86_vec_cost (mode, cost->sse_op) + 1;
+ {
+ /* (not (xor ...)) can be a single insn in AVX512. */
+ if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
+ && (GET_MODE_SIZE (mode) == 64
+ || (TARGET_AVX512VL
+ && (GET_MODE_SIZE (mode) == 32
+ || GET_MODE_SIZE (mode) == 16))))
+ {
+ *total = ix86_vec_cost (mode, cost->sse_op)
+ + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
+ outer_code, opno, speed)
+ + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
+ outer_code, opno, speed);
+ return true;
+ }
+
+ // vnot is pxor -1.
+ *total = ix86_vec_cost (mode, cost->sse_op) + 1;
+ }
else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
*total = cost->add * 2;
else
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17616,6 +17616,98 @@
operands[2] = force_reg (V1TImode, CONSTM1_RTX (V1TImode));
})
+(define_insn "*iornot<mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=v,v,v,v")
+ (ior:VI
+ (not:VI
+ (match_operand:VI 1 "bcst_vector_operand" "v,Br,v,m"))
+ (match_operand:VI 2 "bcst_vector_operand" "vBr,v,m,v")))]
+ "(<MODE_SIZE> == 64 || TARGET_AVX512VL
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
+ && (register_operand (operands[1], <MODE>mode)
+ || register_operand (operands[2], <MODE>mode))"
+{
+ if (!register_operand (operands[1], <MODE>mode))
+ {
+ if (TARGET_AVX512VL)
+ return "vpternlog<ternlogsuffix>\t{$0xdd, %1, %2, %0|%0, %2, %1, 0xdd}";
+ return "vpternlog<ternlogsuffix>\t{$0xdd, %g1, %g2, %g0|%g0, %g2, %g1, 0xdd}";
+ }
+ if (TARGET_AVX512VL)
+ return "vpternlog<ternlogsuffix>\t{$0xbb, %2, %1, %0|%0, %1, %2, 0xbb}";
+ return "vpternlog<ternlogsuffix>\t{$0xbb, %g2, %g1, %g0|%g0, %g1, %g2, 0xbb}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set (attr "mode")
+ (if_then_else (match_test "TARGET_AVX512VL")
+ (const_string "<sseinsnmode>")
+ (const_string "XI")))
+ (set (attr "enabled")
+ (if_then_else (eq_attr "alternative" "2,3")
+ (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
+ (const_string "*")))])
+
+(define_insn "*xnor<mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=v,v")
+ (not:VI
+ (xor:VI
+ (match_operand:VI 1 "bcst_vector_operand" "%v,v")
+ (match_operand:VI 2 "bcst_vector_operand" "vBr,m"))))]
+ "(<MODE_SIZE> == 64 || TARGET_AVX512VL
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
+ && (register_operand (operands[1], <MODE>mode)
+ || register_operand (operands[2], <MODE>mode))"
+{
+ if (TARGET_AVX512VL)
+ return "vpternlog<ternlogsuffix>\t{$0x99, %2, %1, %0|%0, %1, %2, 0x99}";
+ else
+ return "vpternlog<ternlogsuffix>\t{$0x99, %g2, %g1, %g0|%g0, %g1, %g2, 0x99}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set (attr "mode")
+ (if_then_else (match_test "TARGET_AVX512VL")
+ (const_string "<sseinsnmode>")
+ (const_string "XI")))
+ (set (attr "enabled")
+ (if_then_else (eq_attr "alternative" "1")
+ (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
+ (const_string "*")))])
+
+(define_code_iterator andor [and ior])
+(define_code_attr nlogic [(and "nor") (ior "nand")])
+(define_code_attr ternlog_nlogic [(and "0x11") (ior "0x77")])
+
+(define_insn "*<nlogic><mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=v,v")
+ (andor:VI
+ (not:VI (match_operand:VI 1 "bcst_vector_operand" "%v,v"))
+ (not:VI (match_operand:VI 2 "bcst_vector_operand" "vBr,m"))))]
+ "(<MODE_SIZE> == 64 || TARGET_AVX512VL
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
+ && (register_operand (operands[1], <MODE>mode)
+ || register_operand (operands[2], <MODE>mode))"
+{
+ if (TARGET_AVX512VL)
+ return "vpternlog<ternlogsuffix>\t{$<ternlog_nlogic>, %2, %1, %0|%0, %1, %2, <ternlog_nlogic>}";
+ else
+ return "vpternlog<ternlogsuffix>\t{$<ternlog_nlogic>, %g2, %g1, %g0|%g0, %g1, %g2, <ternlog_nlogic>}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set (attr "mode")
+ (if_then_else (match_test "TARGET_AVX512VL")
+ (const_string "<sseinsnmode>")
+ (const_string "XI")))
+ (set (attr "enabled")
+ (if_then_else (eq_attr "alternative" "1")
+ (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
+ (const_string "*")))])
+
(define_mode_iterator AVX512ZEXTMASK
[(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512-binop-not-1.h
@@ -0,0 +1,13 @@
+#include <immintrin.h>
+
+#define PASTER2(x,y) x##y
+#define PASTER3(x,y,z) _mm##x##_##y##_##z
+#define OP(vec, op, suffix) PASTER3 (vec, op, suffix)
+#define DUP(vec, suffix, val) PASTER3 (vec, set1, suffix) (val)
+
+type
+foo (type x, SCALAR *f)
+{
+ return OP (vec, op, suffix) (x, OP (vec, xor, suffix) (DUP (vec, suffix, *f),
+ DUP (vec, suffix, ~0)));
+}
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512-binop-not-2.h
@@ -0,0 +1,13 @@
+#include <immintrin.h>
+
+#define PASTER2(x,y) x##y
+#define PASTER3(x,y,z) _mm##x##_##y##_##z
+#define OP(vec, op, suffix) PASTER3 (vec, op, suffix)
+#define DUP(vec, suffix, val) PASTER3 (vec, set1, suffix) (val)
+
+type
+foo (type x, SCALAR *f)
+{
+ return OP (vec, op, suffix) (OP (vec, xor, suffix) (x, DUP (vec, suffix, ~0)),
+ DUP (vec, suffix, *f));
+}
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0xdd, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vpbroadcast" } } */
+
+#define type __m512i
+#define vec 512
+#define op or
+#define suffix epi32
+#define SCALAR int
+
+#include "avx512-binop-not-1.h"
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0xbb, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vpbroadcast" } } */
+
+#define type __m512i
+#define vec 512
+#define op or
+#define suffix epi32
+#define SCALAR int
+
+#include "avx512-binop-not-2.h"
next prev parent reply other threads:[~2023-06-21 6:25 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-21 6:24 [PATCH 0/5] x86: make better use of VPTERNLOG{D,Q} Jan Beulich
2023-06-21 6:25 ` Jan Beulich [this message]
2023-06-25 4:42 ` [PATCH 1/5] x86: use VPTERNLOG for further bitwise two-vector operations Hongtao Liu
2023-06-25 5:52 ` Jan Beulich
2023-06-25 7:13 ` Hongtao Liu
2023-06-25 7:23 ` Hongtao Liu
2023-06-25 7:30 ` Hongtao Liu
2023-06-25 13:35 ` Jan Beulich
2023-06-26 0:42 ` Hongtao Liu
2023-06-21 6:27 ` [PATCH 2/5] x86: use VPTERNLOG also for certain andnot forms Jan Beulich
2023-06-25 4:58 ` Hongtao Liu
2023-06-21 6:27 ` [PATCH 3/5] x86: allow memory operand for AVX2 splitter for PR target/100711 Jan Beulich
2023-06-25 4:58 ` Hongtao Liu
2023-06-21 6:27 ` [PATCH 4/5] x86: further PR target/100711-like splitting Jan Beulich
2023-06-25 5:06 ` Hongtao Liu
2023-06-25 6:16 ` Jan Beulich
2023-06-25 6:27 ` Hongtao Liu
2023-06-21 6:28 ` [PATCH 5/5] x86: yet more " Jan Beulich
2023-06-25 5:12 ` Hongtao Liu
2023-06-25 6:25 ` Jan Beulich
2023-06-25 6:35 ` Hongtao Liu
2023-06-25 6:41 ` Hongtao Liu
2023-11-06 11:10 ` Jan Beulich
2023-11-06 13:48 ` Hongtao Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=457ffad0-9ecd-3e19-f5ab-6153ce4b8bad@suse.com \
--to=jbeulich@suse.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=hongtao.liu@intel.com \
--cc=kirill.yukhin@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).