From: Uros Bizjak <ubizjak@gmail.com>
To: liuhongt <hongtao.liu@intel.com>
Cc: gcc-patches@gcc.gnu.org, crazylht@gmail.com, hjl.tools@gmail.com
Subject: Re: [PATCH] [x86] Add define_insn_and_split to support general version of "kxnor".
Date: Tue, 11 Oct 2022 11:04:29 +0200 [thread overview]
Message-ID: <CAFULd4bbB-cg5ttarrbCg5yj3pzSkP+3RTBVzk-6ENEZT6vmFA@mail.gmail.com> (raw)
In-Reply-To: <20221011080316.1778261-1-hongtao.liu@intel.com>
On Tue, Oct 11, 2022 at 10:03 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> For genereal_reg_operand, it will be splitted into xor + not.
> For mask_reg_operand, it will be splitted with UNSPEC_MASK_OP just
> like what we did for other logic operations.
>
> The patch will optimize xor+not to kxnor when possible.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu.
> Ok for trunk?
>
> gcc/ChangeLog:
>
> * config/i386/i386.md (*notxor<mode>_1): New post_reload
> define_insn_and_split.
> (*notxorqi_1): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr107093.c: New test.
OK with a small fix below.
Thanks,
Uros.
> ---
> gcc/config/i386/i386.md | 71 ++++++++++++++++++++++++
> gcc/testsuite/gcc.target/i386/pr107093.c | 38 +++++++++++++
> 2 files changed, 109 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr107093.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 1be9b669909..228edba2b40 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -10826,6 +10826,39 @@ (define_insn "*<code><mode>_1"
> (set_attr "type" "alu, alu, msklog")
> (set_attr "mode" "<MODE>")])
>
> +(define_insn_and_split "*notxor<mode>_1"
> + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
> + (not:SWI248
> + (xor:SWI248
> + (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k")
> + (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,k"))))
> + (clobber (reg:CC FLAGS_REG))]
> + "ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
> + "#"
> + "&& reload_completed"
> + [(parallel
> + [(set (match_dup 0)
> + (xor:SWI248 (match_dup 1) (match_dup 2)))
> + (clobber (reg:CC FLAGS_REG))])
> + (set (match_dup 0)
> + (not:SWI248 (match_dup 1)))]
(not:SWI248 (match_dup 0))
in the above RTX.
> +{
> + if (MASK_REGNO_P (REGNO (operands[0])))
> + {
> + emit_insn (gen_kxnor<mode> (operands[0], operands[1], operands[2]));
> + DONE;
> + }
> +}
> + [(set (attr "isa")
> + (cond [(eq_attr "alternative" "2")
> + (if_then_else (eq_attr "mode" "SI,DI")
> + (const_string "avx512bw")
> + (const_string "avx512f"))
> + ]
> + (const_string "*")))
> + (set_attr "type" "alu, alu, msklog")
> + (set_attr "mode" "<MODE>")])
> +
> (define_insn_and_split "*iordi_1_bts"
> [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
> (ior:DI
> @@ -10959,6 +10992,44 @@ (define_insn "*<code>qi_1"
> (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
> (symbol_ref "true")))])
>
> +(define_insn_and_split "*notxorqi_1"
> + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
> + (not:QI
> + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
> + (match_operand:QI 2 "general_operand" "qn,m,rn,k"))))
> + (clobber (reg:CC FLAGS_REG))]
> + "ix86_binary_operator_ok (XOR, QImode, operands)"
> + "#"
> + "&& reload_completed"
> + [(parallel
> + [(set (match_dup 0)
> + (xor:QI (match_dup 1) (match_dup 2)))
> + (clobber (reg:CC FLAGS_REG))])
> + (set (match_dup 0)
> + (not:QI (match_dup 0)))]
> +{
> + if (mask_reg_operand (operands[0], QImode))
> + {
> + emit_insn (gen_kxnorqi (operands[0], operands[1], operands[2]));
> + DONE;
> + }
> +}
> + [(set_attr "isa" "*,*,*,avx512f")
> + (set_attr "type" "alu,alu,alu,msklog")
> + (set (attr "mode")
> + (cond [(eq_attr "alternative" "2")
> + (const_string "SI")
> + (and (eq_attr "alternative" "3")
> + (match_test "!TARGET_AVX512DQ"))
> + (const_string "HI")
> + ]
> + (const_string "QI")))
> + ;; Potential partial reg stall on alternative 2.
> + (set (attr "preferred_for_speed")
> + (cond [(eq_attr "alternative" "2")
> + (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
> + (symbol_ref "true")))])
> +
> ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
> (define_insn_and_split "*<code><mode>_1_slp"
> [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
> diff --git a/gcc/testsuite/gcc.target/i386/pr107093.c b/gcc/testsuite/gcc.target/i386/pr107093.c
> new file mode 100644
> index 00000000000..23e30cbac0f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107093.c
> @@ -0,0 +1,38 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512bw -O2 -mavx512vl" } */
> +/* { dg-final { scan-assembler-times {(?n)kxnor[bwqd]} 4 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times {(?n)kxnor[bwdq]} 3 { target ia32 } } } */
> +
> +#include<immintrin.h>
> +
> +__m512i
> +foo (__m512i a, __m512i b, __m512i c, __m512i d)
> +{
> + __mmask32 k1 = _mm512_cmp_epi16_mask (a, b, 1);
> + __mmask32 k2 = _mm512_cmp_epi16_mask (c, d, 2);
> + return _mm512_mask_mov_epi16 (a, ~(k1 ^ k2), c);
> +}
> +
> +__m512i
> +foo1 (__m512i a, __m512i b, __m512i c, __m512i d)
> +{
> + __mmask16 k1 = _mm512_cmp_epi32_mask (a, b, 1);
> + __mmask16 k2 = _mm512_cmp_epi32_mask (c, d, 2);
> + return _mm512_mask_mov_epi32 (a, ~(k1 ^ k2), c);
> +}
> +
> +__m512i
> +foo2 (__m512i a, __m512i b, __m512i c, __m512i d)
> +{
> + __mmask64 k1 = _mm512_cmp_epi8_mask (a, b, 1);
> + __mmask64 k2 = _mm512_cmp_epi8_mask (c, d, 2);
> + return _mm512_mask_mov_epi8 (a, ~(k1 ^ k2), c);
> +}
> +
> +__m512i
> +foo3 (__m512i a, __m512i b, __m512i c, __m512i d)
> +{
> + __mmask8 k1 = _mm512_cmp_epi64_mask (a, b, 1);
> + __mmask8 k2 = _mm512_cmp_epi64_mask (c, d, 2);
> + return _mm512_mask_mov_epi64 (a, ~(k1 ^ k2), c);
> +}
> --
> 2.27.0
>
next prev parent reply other threads:[~2022-10-11 9:04 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-11 8:03 liuhongt
2022-10-11 9:04 ` Uros Bizjak [this message]
2022-10-11 13:58 ` Jakub Jelinek
2022-10-12 0:56 ` Liu, Hongtao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAFULd4bbB-cg5ttarrbCg5yj3pzSkP+3RTBVzk-6ENEZT6vmFA@mail.gmail.com \
--to=ubizjak@gmail.com \
--cc=crazylht@gmail.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=hjl.tools@gmail.com \
--cc=hongtao.liu@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).