From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by sourceware.org (Postfix) with ESMTP id 5FE0A3858C60 for ; Fri, 15 Oct 2021 14:55:39 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 5FE0A3858C60 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 0C22A2F; Fri, 15 Oct 2021 07:55:39 -0700 (PDT) Received: from localhost (unknown [10.32.98.88]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 8A58D3F66F; Fri, 15 Oct 2021 07:55:38 -0700 (PDT) From: Richard Sandiford To: Christophe Lyon via Gcc-patches Mail-Followup-To: Christophe Lyon via Gcc-patches , Christophe Lyon , richard.sandiford@arm.com Subject: Re: [PATCH v2 09/14] arm: Fix vcond_mask expander for MVE (PR target/100757) References: <20211013101554.2732342-1-christophe.lyon@foss.st.com> <20211013101554.2732342-10-christophe.lyon@foss.st.com> Date: Fri, 15 Oct 2021 15:55:37 +0100 In-Reply-To: <20211013101554.2732342-10-christophe.lyon@foss.st.com> (Christophe Lyon via Gcc-patches's message of "Wed, 13 Oct 2021 12:15:29 +0200") Message-ID: User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.3 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain X-Spam-Status: No, score=-12.4 required=5.0 tests=BAYES_00, GIT_PATCH_0, KAM_DMARC_STATUS, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 15 Oct 2021 14:55:42 -0000 Christophe Lyon via Gcc-patches writes: > @@ -31086,36 +31087,20 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1, > case NE: > if (TARGET_HAVE_MVE) > { > - rtx vpr_p0; > - if (vcond_mve) > - vpr_p0 = target; > - else > - vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode)); > - > switch (GET_MODE_CLASS (cmp_mode)) > { > case MODE_VECTOR_INT: > - emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1))); > + emit_insn (gen_mve_vcmpq (code, cmp_mode, target, op0, force_reg (cmp_mode, op1))); Pre-existing nit: long line. Same for later calls in the same function. Richard > break; > case MODE_VECTOR_FLOAT: > if (TARGET_HAVE_MVE_FLOAT) > - emit_insn (gen_mve_vcmpq_f (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1))); > + emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target, op0, force_reg (cmp_mode, op1))); > else > gcc_unreachable (); > break; > default: > gcc_unreachable (); > } > - > - /* If we are not expanding a vcond, build the result here. */ > - if (!vcond_mve) > - { > - rtx zero = gen_reg_rtx (cmp_result_mode); > - rtx one = gen_reg_rtx (cmp_result_mode); > - emit_move_insn (zero, CONST0_RTX (cmp_result_mode)); > - emit_move_insn (one, CONST1_RTX (cmp_result_mode)); > - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0)); > - } > } > else > emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1)); > @@ -31127,23 +31112,7 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1, > case GEU: > case GTU: > if (TARGET_HAVE_MVE) > - { > - rtx vpr_p0; > - if (vcond_mve) > - vpr_p0 = target; > - else > - vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode)); > - > - emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1))); > - if (!vcond_mve) > - { > - rtx zero = gen_reg_rtx (cmp_result_mode); > - rtx one = gen_reg_rtx (cmp_result_mode); > - emit_move_insn (zero, CONST0_RTX (cmp_result_mode)); > - emit_move_insn (one, CONST1_RTX (cmp_result_mode)); > - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0)); > - } > - } > + emit_insn (gen_mve_vcmpq (code, cmp_mode, target, op0, force_reg (cmp_mode, op1))); > else > emit_insn (gen_neon_vc (code, cmp_mode, target, > op0, force_reg (cmp_mode, op1))); > @@ -31154,23 +31123,7 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1, > case LEU: > case LTU: > if (TARGET_HAVE_MVE) > - { > - rtx vpr_p0; > - if (vcond_mve) > - vpr_p0 = target; > - else > - vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode)); > - > - emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, vpr_p0, force_reg (cmp_mode, op1), op0)); > - if (!vcond_mve) > - { > - rtx zero = gen_reg_rtx (cmp_result_mode); > - rtx one = gen_reg_rtx (cmp_result_mode); > - emit_move_insn (zero, CONST0_RTX (cmp_result_mode)); > - emit_move_insn (one, CONST1_RTX (cmp_result_mode)); > - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0)); > - } > - } > + emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target, force_reg (cmp_mode, op1), op0)); > else > emit_insn (gen_neon_vc (swap_condition (code), cmp_mode, > target, force_reg (cmp_mode, op1), op0)); > @@ -31185,8 +31138,8 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1, > rtx gt_res = gen_reg_rtx (cmp_result_mode); > rtx alt_res = gen_reg_rtx (cmp_result_mode); > rtx_code alt_code = (code == LTGT ? LT : LE); > - if (arm_expand_vector_compare (gt_res, GT, op0, op1, true, vcond_mve) > - || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true, vcond_mve)) > + if (arm_expand_vector_compare (gt_res, GT, op0, op1, true) > + || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true)) > gcc_unreachable (); > emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode, > gt_res, alt_res))); > @@ -31206,19 +31159,15 @@ arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode) > { > /* When expanding for MVE, we do not want to emit a (useless) vpsel in > arm_expand_vector_compare, and another one here. */ > - bool vcond_mve=false; > rtx mask; > > if (TARGET_HAVE_MVE) > - { > - vcond_mve=true; > - mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode)); > - } > + mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ()); > else > mask = gen_reg_rtx (cmp_result_mode); > > bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]), > - operands[4], operands[5], true, vcond_mve); > + operands[4], operands[5], true); > if (inverted) > std::swap (operands[1], operands[2]); > if (TARGET_NEON) > @@ -31226,20 +31175,20 @@ arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode) > mask, operands[1], operands[2])); > else > { > - machine_mode cmp_mode = GET_MODE (operands[4]); > - rtx vpr_p0 = mask; > - rtx zero = gen_reg_rtx (cmp_mode); > - rtx one = gen_reg_rtx (cmp_mode); > - emit_move_insn (zero, CONST0_RTX (cmp_mode)); > - emit_move_insn (one, CONST1_RTX (cmp_mode)); > + machine_mode cmp_mode = GET_MODE (operands[0]); > + > switch (GET_MODE_CLASS (cmp_mode)) > { > case MODE_VECTOR_INT: > - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, operands[0], one, zero, vpr_p0)); > + emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0], > + operands[1], operands[2], mask)); > break; > case MODE_VECTOR_FLOAT: > if (TARGET_HAVE_MVE_FLOAT) > - emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0], one, zero, vpr_p0)); > + emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0], > + operands[1], operands[2], mask)); > + else > + gcc_unreachable (); > break; > default: > gcc_unreachable (); > @@ -34149,4 +34098,15 @@ arm_mode_base_reg_class (machine_mode mode) > > struct gcc_target targetm = TARGET_INITIALIZER; > > +/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */ > + > +opt_machine_mode > +arm_get_mask_mode (machine_mode mode) > +{ > + if (TARGET_HAVE_MVE) > + return arm_mode_to_pred_mode (mode); > + > + return default_get_mask_mode (mode); > +} > + > #include "gt-arm.h" > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md > index 9da78657798..fb25cac1cfd 100644 > --- a/gcc/config/arm/mve.md > +++ b/gcc/config/arm/mve.md > @@ -10540,3 +10540,57 @@ (define_insn "*mve_mov" > vmsr%?\t P0, %1 > vmrs%?\t %0, P0" > ) > + > +;; Expanders for vec_cmp and vcond > + > +(define_expand "vec_cmp" > + [(set (match_operand: 0 "s_register_operand") > + (match_operator: 1 "comparison_operator" > + [(match_operand:MVE_VLD_ST 2 "s_register_operand") > + (match_operand:MVE_VLD_ST 3 "reg_or_zero_operand")]))] > + "TARGET_HAVE_MVE > + && (! || flag_unsafe_math_optimizations)" > +{ > + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > + operands[2], operands[3], false); > + DONE; > +}) > + > +(define_expand "vec_cmpu" > + [(set (match_operand: 0 "s_register_operand") > + (match_operator: 1 "comparison_operator" > + [(match_operand:MVE_2 2 "s_register_operand") > + (match_operand:MVE_2 3 "reg_or_zero_operand")]))] > + "TARGET_HAVE_MVE" > +{ > + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > + operands[2], operands[3], false); > + DONE; > +}) > + > +(define_expand "vcond_mask_" > + [(set (match_operand:MVE_VLD_ST 0 "s_register_operand") > + (if_then_else:MVE_VLD_ST > + (match_operand: 3 "s_register_operand") > + (match_operand:MVE_VLD_ST 1 "s_register_operand") > + (match_operand:MVE_VLD_ST 2 "s_register_operand")))] > + "TARGET_HAVE_MVE" > +{ > + switch (GET_MODE_CLASS (mode)) > + { > + case MODE_VECTOR_INT: > + emit_insn (gen_mve_vpselq (VPSELQ_S, mode, operands[0], > + operands[1], operands[2], operands[3])); > + break; > + case MODE_VECTOR_FLOAT: > + if (TARGET_HAVE_MVE_FLOAT) > + emit_insn (gen_mve_vpselq_f (mode, operands[0], > + operands[1], operands[2], operands[3])); > + else > + gcc_unreachable (); > + break; > + default: > + gcc_unreachable (); > + } > + DONE; > +}) > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md > index 8b0a396947c..28310d93a4e 100644 > --- a/gcc/config/arm/neon.md > +++ b/gcc/config/arm/neon.md > @@ -1394,6 +1394,45 @@ (define_insn "*us_sub_neon" > [(set_attr "type" "neon_qsub")] > ) > > +(define_expand "vec_cmp" > + [(set (match_operand: 0 "s_register_operand") > + (match_operator: 1 "comparison_operator" > + [(match_operand:VDQWH 2 "s_register_operand") > + (match_operand:VDQWH 3 "reg_or_zero_operand")]))] > + "TARGET_NEON > + && (! || flag_unsafe_math_optimizations)" > +{ > + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > + operands[2], operands[3], false); > + DONE; > +}) > + > +(define_expand "vec_cmpu" > + [(set (match_operand:VDQIW 0 "s_register_operand") > + (match_operator:VDQIW 1 "comparison_operator" > + [(match_operand:VDQIW 2 "s_register_operand") > + (match_operand:VDQIW 3 "reg_or_zero_operand")]))] > + "TARGET_NEON" > +{ > + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > + operands[2], operands[3], false); > + DONE; > +}) > + > +(define_expand "vcond_mask_" > + [(set (match_operand:VDQWH 0 "s_register_operand") > + (if_then_else:VDQWH > + (match_operand: 3 "s_register_operand") > + (match_operand:VDQWH 1 "s_register_operand") > + (match_operand:VDQWH 2 "s_register_operand")))] > + "TARGET_NEON > + && (! || flag_unsafe_math_optimizations)" > +{ > + emit_insn (gen_neon_vbsl (operands[0], operands[3], operands[1], > + operands[2])); > + DONE; > +}) > + > ;; Patterns for builtins. > > ; good for plain vadd, vaddq. > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md > index 68de4f0f943..9b461a76155 100644 > --- a/gcc/config/arm/vec-common.md > +++ b/gcc/config/arm/vec-common.md > @@ -363,33 +363,6 @@ (define_expand "vlshr3" > } > }) > > -(define_expand "vec_cmp" > - [(set (match_operand: 0 "s_register_operand") > - (match_operator: 1 "comparison_operator" > - [(match_operand:VDQWH 2 "s_register_operand") > - (match_operand:VDQWH 3 "reg_or_zero_operand")]))] > - "ARM_HAVE__ARITH > - && !TARGET_REALLY_IWMMXT > - && (! || flag_unsafe_math_optimizations)" > -{ > - arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > - operands[2], operands[3], false, false); > - DONE; > -}) > - > -(define_expand "vec_cmpu" > - [(set (match_operand:VDQIW 0 "s_register_operand") > - (match_operator:VDQIW 1 "comparison_operator" > - [(match_operand:VDQIW 2 "s_register_operand") > - (match_operand:VDQIW 3 "reg_or_zero_operand")]))] > - "ARM_HAVE__ARITH > - && !TARGET_REALLY_IWMMXT" > -{ > - arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > - operands[2], operands[3], false, false); > - DONE; > -}) > - > ;; Conditional instructions. These are comparisons with conditional moves for > ;; vectors. They perform the assignment: > ;; > @@ -461,31 +434,6 @@ (define_expand "vcondu" > DONE; > }) > > -(define_expand "vcond_mask_" > - [(set (match_operand:VDQWH 0 "s_register_operand") > - (if_then_else:VDQWH > - (match_operand: 3 "s_register_operand") > - (match_operand:VDQWH 1 "s_register_operand") > - (match_operand:VDQWH 2 "s_register_operand")))] > - "ARM_HAVE__ARITH > - && !TARGET_REALLY_IWMMXT > - && (! || flag_unsafe_math_optimizations)" > -{ > - if (TARGET_NEON) > - { > - emit_insn (gen_neon_vbsl (mode, operands[0], operands[3], > - operands[1], operands[2])); > - } > - else if (TARGET_HAVE_MVE) > - { > - emit_insn (gen_mve_vpselq (VPSELQ_S, mode, operands[0], > - operands[1], operands[2], operands[3])); > - } > - else > - gcc_unreachable (); > - DONE; > -}) > - > (define_expand "vec_load_lanesoi" > [(set (match_operand:OI 0 "s_register_operand") > (unspec:OI [(match_operand:OI 1 "neon_struct_operand")