Hi All, This adds an implementation for conditional branch optab for AArch64. For 128-bit vectors we generate: cmhi v1.4s, v1.4s, v0.4s umaxp v1.4s, v1.4s, v1.4s fmov x3, d1 cbnz x3, .L8 and of 64-bit vector we can omit the compression: cmhi v1.2s, v1.2s, v0.2s fmov x2, d1 cbz x2, .L13 I did also want to provide a version that mixes SVE and NEON so I can use the SVE CMHI instructions with a NEON register. So concretely for a 128-bit vector you'd get: ptrue p0.s, vl4 .L3: ... cmplo p2.s, p0/z, z0.s, z2.s b.any .L6 ... cmp w2, 200 bne .L3 However I ran into an issue where cbranch is not the thing that does the comparison. And if I use combine to do it then the resulting ptrue wouldn't be floated outside the loop. Is there a way to currently do this? or does a mid-end pass need to be changed for this? Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * config/aarch64/aarch64-simd.md (cbranch4): New. gcc/testsuite/ChangeLog: * lib/target-supports.exp: Enable AArch64 generically. --- inline copy of patch -- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 5386043739a9b2e328bfb2fc9067da8feeac1a92..e53d339ea20492812a3faa7c20ed945255321b11 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3795,6 +3795,41 @@ (define_expand "vcond_mask_" DONE; }) +;; Patterns comparing two vectors to produce a sets flagsi. + +(define_expand "cbranch4" + [(set (pc) + (if_then_else + (match_operator 0 "aarch64_equality_operator" + [(match_operand:VDQ_BHSI 1 "register_operand") + (match_operand:VDQ_BHSI 2 "aarch64_simd_reg_or_zero")]) + (label_ref (match_operand 3 "")) + (pc)))] + "TARGET_SIMD" +{ + rtx tmp = gen_reg_rtx (mode); + + /* For 64-bit vectors we need no reductions. */ + if (known_eq (128, GET_MODE_BITSIZE (mode))) + { + /* Always reduce using a V4SI. */ + rtx reduc = simplify_gen_subreg (V4SImode, operands[1], mode, 0); + rtx res = gen_reg_rtx (V4SImode); + emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc)); + emit_move_insn (tmp, simplify_gen_subreg (mode, res, V4SImode, 0)); + } + else + tmp = operands[1]; + + rtx val = gen_reg_rtx (DImode); + emit_move_insn (val, simplify_gen_subreg (DImode, tmp, mode, 0)); + + rtx cc_reg = aarch64_gen_compare_reg (NE, val, const0_rtx); + rtx cmp_rtx = gen_rtx_fmt_ee (NE, DImode, cc_reg, operands[2]); + emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3])); + DONE; +}) + ;; Patterns comparing two vectors to produce a mask. (define_expand "vec_cmp" diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 5cbf54bd2a23dfdc5dc7b148b0dc6ed4c63814ae..8964cbd6610a718711546d312e89cee937d210e8 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -3653,8 +3653,7 @@ proc check_effective_target_vect_int { } { proc check_effective_target_vect_early_break { } { return [check_cached_effective_target_indexed vect_early_break { expr { - ([istarget aarch64*-*-*] - && [check_effective_target_aarch64_sve]) + [istarget aarch64*-*-*] }}] } # Return 1 if the target supports hardware vectorization of complex additions of --