From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2078) id A7CBB3858C20; Tue, 11 Oct 2022 09:23:06 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org A7CBB3858C20 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1665480186; bh=HNTWomN6djxxlVW6zJ4qs1T6+YqyEjwBsH7ct3WE5DI=; h=From:To:Subject:Date:From; b=uFS2qU5oXOvhrOki2bJSfXAu7/qXs+0cwFYZoj4wTkmd0/TKnPNhBWgehX/Qvk+Bj bh4AkSjcVaFSREvjZiVtbrCHwRqSU7QeLCFcmwrt4ke19OZ5LVe4XYgkKt/PgzlUOh dWr2sRtwN2a4KDyhYoBCWNM6bE5vpxvftsg1F3QU= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: hongtao Liu To: gcc-cvs@gcc.gnu.org Subject: [gcc r13-3218] Add define_insn_and_split to support general version of "kxnor". X-Act-Checkin: gcc X-Git-Author: liuhongt X-Git-Refname: refs/heads/master X-Git-Oldrev: c4d15dddf6b9eacb36f535807ad2ee364af46e04 X-Git-Newrev: 498ad738690b3c464f901d63dcd4d0f49a50dd00 Message-Id: <20221011092306.A7CBB3858C20@sourceware.org> Date: Tue, 11 Oct 2022 09:23:06 +0000 (GMT) List-Id: https://gcc.gnu.org/g:498ad738690b3c464f901d63dcd4d0f49a50dd00 commit r13-3218-g498ad738690b3c464f901d63dcd4d0f49a50dd00 Author: liuhongt Date: Mon Oct 10 11:31:48 2022 +0800 Add define_insn_and_split to support general version of "kxnor". For genereal_reg_operand, it will be splitted into xor + not. For mask_reg_operand, it will be splitted with UNSPEC_MASK_OP just like what we did for other logic operations. The patch will optimize xor+not to kxnor when possible. gcc/ChangeLog: PR target/107093 * config/i386/i386.md (*notxor_1): New post_reload define_insn_and_split. (*notxorqi_1): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr107093.c: New test. Diff: --- gcc/config/i386/i386.md | 71 ++++++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr107093.c | 38 +++++++++++++++++ 2 files changed, 109 insertions(+) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 947513701b9..9390dd5be88 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -10826,6 +10826,39 @@ (set_attr "type" "alu, alu, msklog") (set_attr "mode" "")]) +(define_insn_and_split "*notxor_1" + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k") + (not:SWI248 + (xor:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k") + (match_operand:SWI248 2 "" "r,,k")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (XOR, mode, operands)" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (xor:SWI248 (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) + (not:SWI248 (match_dup 0)))] +{ + if (MASK_REGNO_P (REGNO (operands[0]))) + { + emit_insn (gen_kxnor (operands[0], operands[1], operands[2])); + DONE; + } +} + [(set (attr "isa") + (cond [(eq_attr "alternative" "2") + (if_then_else (eq_attr "mode" "SI,DI") + (const_string "avx512bw") + (const_string "avx512f")) + ] + (const_string "*"))) + (set_attr "type" "alu, alu, msklog") + (set_attr "mode" "")]) + (define_insn_and_split "*iordi_1_bts" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ior:DI @@ -10959,6 +10992,44 @@ (symbol_ref "!TARGET_PARTIAL_REG_STALL")] (symbol_ref "true")))]) +(define_insn_and_split "*notxorqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k") + (not:QI + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k") + (match_operand:QI 2 "general_operand" "qn,m,rn,k")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (XOR, QImode, operands)" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (xor:QI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) + (not:QI (match_dup 0)))] +{ + if (mask_reg_operand (operands[0], QImode)) + { + emit_insn (gen_kxnorqi (operands[0], operands[1], operands[2])); + DONE; + } +} + [(set_attr "isa" "*,*,*,avx512f") + (set_attr "type" "alu,alu,alu,msklog") + (set (attr "mode") + (cond [(eq_attr "alternative" "2") + (const_string "SI") + (and (eq_attr "alternative" "3") + (match_test "!TARGET_AVX512DQ")) + (const_string "HI") + ] + (const_string "QI"))) + ;; Potential partial reg stall on alternative 2. + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "2") + (symbol_ref "!TARGET_PARTIAL_REG_STALL")] + (symbol_ref "true")))]) + ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*_1_slp" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+,&")) diff --git a/gcc/testsuite/gcc.target/i386/pr107093.c b/gcc/testsuite/gcc.target/i386/pr107093.c new file mode 100644 index 00000000000..23e30cbac0f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr107093.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -O2 -mavx512vl" } */ +/* { dg-final { scan-assembler-times {(?n)kxnor[bwqd]} 4 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times {(?n)kxnor[bwdq]} 3 { target ia32 } } } */ + +#include + +__m512i +foo (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask32 k1 = _mm512_cmp_epi16_mask (a, b, 1); + __mmask32 k2 = _mm512_cmp_epi16_mask (c, d, 2); + return _mm512_mask_mov_epi16 (a, ~(k1 ^ k2), c); +} + +__m512i +foo1 (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask16 k1 = _mm512_cmp_epi32_mask (a, b, 1); + __mmask16 k2 = _mm512_cmp_epi32_mask (c, d, 2); + return _mm512_mask_mov_epi32 (a, ~(k1 ^ k2), c); +} + +__m512i +foo2 (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask64 k1 = _mm512_cmp_epi8_mask (a, b, 1); + __mmask64 k2 = _mm512_cmp_epi8_mask (c, d, 2); + return _mm512_mask_mov_epi8 (a, ~(k1 ^ k2), c); +} + +__m512i +foo3 (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask8 k1 = _mm512_cmp_epi64_mask (a, b, 1); + __mmask8 k2 = _mm512_cmp_epi64_mask (c, d, 2); + return _mm512_mask_mov_epi64 (a, ~(k1 ^ k2), c); +}