From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1363) id 9BA05385841B; Thu, 13 Jan 2022 19:49:10 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 9BA05385841B MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="utf-8" From: Uros Bizjak To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-6562] i386: Introduce V2QImode vectorized shifts [PR103861] X-Act-Checkin: gcc X-Git-Author: Uros Bizjak X-Git-Refname: refs/heads/master X-Git-Oldrev: b0e5163960eceab701a1a25dfa049e394fe5b3de X-Git-Newrev: 7a7d8c3f6167fd45658ddbfa32adcfd2acc98eb4 Message-Id: <20220113194910.9BA05385841B@sourceware.org> Date: Thu, 13 Jan 2022 19:49:10 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 13 Jan 2022 19:49:10 -0000 https://gcc.gnu.org/g:7a7d8c3f6167fd45658ddbfa32adcfd2acc98eb4 commit r12-6562-g7a7d8c3f6167fd45658ddbfa32adcfd2acc98eb4 Author: Uros Bizjak Date: Thu Jan 13 20:48:18 2022 +0100 i386: Introduce V2QImode vectorized shifts [PR103861] Add V2QImode shift operations and split them to synthesized double HI/LO QImode operations with integer registers. Also robustify arithmetic split patterns. 2022-01-13 Uroš Bizjak gcc/ChangeLog: PR target/103861 * config/i386/i386.md (*ashlqi_ext_2): New insn pattern. (*qi_ext_2): Ditto. * config/i386/mmx.md (v2qi): New insn_and_split pattern. gcc/testsuite/ChangeLog: PR target/103861 * gcc.target/i386/pr103861.c (shl,ashr,lshr): New tests. Diff: --- gcc/config/i386/i386.md | 81 ++++++++++++++++++++++++++++++++ gcc/config/i386/mmx.md | 44 +++++++++++++++-- gcc/testsuite/gcc.target/i386/pr103861.c | 7 +++ 3 files changed, 128 insertions(+), 4 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index bcaaa4993b1..c2acb1dbd90 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -12413,6 +12413,54 @@ (const_string "*"))) (set_attr "mode" "")]) +(define_insn "*ashlqi_ext_2" + [(set (zero_extract:SWI248 + (match_operand:SWI248 0 "register_operand" "+Q") + (const_int 8) + (const_int 8)) + (subreg:SWI248 + (ashift:QI + (subreg:QI + (zero_extract:SWI248 + (match_operand:SWI248 1 "register_operand" "0") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 2 "nonmemory_operand" "cI")) 0)) + (clobber (reg:CC FLAGS_REG))] + "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ + rtx_equal_p (operands[0], operands[1])" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{b}\t%h0, %h0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{b}\t%h0"; + else + return "sal{b}\t{%2, %h0|%h0, %2}"; + } +} + [(set (attr "type") + (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD") + (match_operand 2 "const1_operand")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) + ;; See comment above `ashl3' about how this works. (define_expand "3" @@ -13143,6 +13191,39 @@ (const_string "0") (const_string "*"))) (set_attr "mode" "")]) + +(define_insn "*qi_ext_2" + [(set (zero_extract:SWI248 + (match_operand:SWI248 0 "register_operand" "+Q") + (const_int 8) + (const_int 8)) + (subreg:SWI248 + (any_shiftrt:QI + (subreg:QI + (zero_extract:SWI248 + (match_operand:SWI248 1 "register_operand" "0") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 2 "nonmemory_operand" "cI")) 0)) + (clobber (reg:CC FLAGS_REG))] + "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ + rtx_equal_p (operands[0], operands[1])" +{ + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "{b}\t%h0"; + else + return "{b}\t{%2, %h0|%h0, %2}"; +} + [(set_attr "type" "ishift") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) ;; Rotate instructions diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 3d99a5e851b..782da220f98 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1657,7 +1657,8 @@ (neg:V2QI (match_operand:V2QI 1 "general_reg_operand"))) (clobber (reg:CC FLAGS_REG))] - "reload_completed" + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && reload_completed" [(parallel [(set (strict_low_part (match_dup 0)) (neg:QI (match_dup 1))) @@ -1683,7 +1684,8 @@ (neg:V2QI (match_operand:V2QI 1 "sse_reg_operand"))) (clobber (reg:CC FLAGS_REG))] - "reload_completed" + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && TARGET_SSE2 && reload_completed" [(set (match_dup 0) (match_dup 2)) (set (match_dup 0) (minus:V16QI (match_dup 0) (match_dup 1)))] @@ -1757,7 +1759,8 @@ (match_operand:V2QI 1 "general_reg_operand") (match_operand:V2QI 2 "general_reg_operand"))) (clobber (reg:CC FLAGS_REG))] - "reload_completed" + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && reload_completed" [(parallel [(set (strict_low_part (match_dup 0)) (plusminus:QI (match_dup 1) (match_dup 2))) @@ -1790,7 +1793,8 @@ (match_operand:V2QI 1 "sse_reg_operand") (match_operand:V2QI 2 "sse_reg_operand"))) (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE2 && reload_completed" + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && TARGET_SSE2 && reload_completed" [(set (match_dup 0) (plusminus:V16QI (match_dup 1) (match_dup 2)))] { @@ -2387,6 +2391,38 @@ (const_string "0"))) (set_attr "mode" "TI")]) +(define_insn_and_split "v2qi3" + [(set (match_operand:V2QI 0 "register_operand" "=Q") + (any_shift:V2QI + (match_operand:V2QI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "cI"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" + "#" + "&& reload_completed" + [(parallel + [(set (zero_extract:HI (match_dup 3) (const_int 8) (const_int 8)) + (subreg:HI + (any_shift:QI + (subreg:QI + (zero_extract:HI (match_dup 4) + (const_int 8) + (const_int 8)) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (strict_low_part (match_dup 0)) + (any_shift:QI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[4] = lowpart_subreg (HImode, operands[1], V2QImode); + operands[3] = lowpart_subreg (HImode, operands[0], V2QImode); + operands[1] = lowpart_subreg (QImode, operands[1], V2QImode); + operands[0] = lowpart_subreg (QImode, operands[0], V2QImode); +} + [(set_attr "type" "multi") + (set_attr "mode" "QI")]) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel integral comparisons diff --git a/gcc/testsuite/gcc.target/i386/pr103861.c b/gcc/testsuite/gcc.target/i386/pr103861.c index 158717645b6..064b617774b 100644 --- a/gcc/testsuite/gcc.target/i386/pr103861.c +++ b/gcc/testsuite/gcc.target/i386/pr103861.c @@ -3,6 +3,7 @@ /* { dg-options "-O2 -dp" } */ typedef char __v2qi __attribute__ ((__vector_size__ (2))); +typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2))); __v2qi and (__v2qi a, __v2qi b) { return a & b; }; @@ -20,4 +21,10 @@ __v2qi minus (__v2qi a, __v2qi b) { return a - b; }; __v2qi neg (__v2qi a) { return -a; }; +__v2qi shl (__v2qi a, int b) { return a << b; }; + +__v2qi ashr (__v2qi a, int b) { return a >> b; }; + +__v2qu lshr (__v2qu a, int b) { return a >> b; }; + /* { dg-final { scan-assembler-not "insvhi" } } */