diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 90118c6348e9614bef580d1dc94c0c1841dd5204..cd5ec35c3f53028f14828bd70a92924f62524c15 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3830,6 +3830,46 @@ (define_expand "vcond_mask_" DONE; }) +;; Patterns comparing two vectors and conditionally jump + +(define_expand "cbranch4" + [(set (pc) + (if_then_else + (match_operator 0 "aarch64_equality_operator" + [(match_operand:VDQ_I 1 "register_operand") + (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero")]) + (label_ref (match_operand 3 "")) + (pc)))] + "TARGET_SIMD" +{ + auto code = GET_CODE (operands[0]); + rtx tmp = operands[1]; + + /* If comparing against a non-zero vector we have to do a comparison first + so we can have a != 0 comparison with the result. */ + if (operands[2] != CONST0_RTX (mode)) + emit_insn (gen_vec_cmp (tmp, operands[0], operands[1], + operands[2])); + + /* For 64-bit vectors we need no reductions. */ + if (known_eq (128, GET_MODE_BITSIZE (mode))) + { + /* Always reduce using a V4SI. */ + rtx reduc = gen_lowpart (V4SImode, tmp); + rtx res = gen_reg_rtx (V4SImode); + emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc)); + emit_move_insn (tmp, gen_lowpart (mode, res)); + } + + rtx val = gen_reg_rtx (DImode); + emit_move_insn (val, gen_lowpart (DImode, tmp)); + + rtx cc_reg = aarch64_gen_compare_reg (code, val, const0_rtx); + rtx cmp_rtx = gen_rtx_fmt_ee (code, DImode, cc_reg, const0_rtx); + emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3])); + DONE; +}) + ;; Patterns comparing two vectors to produce a mask. (define_expand "vec_cmp" diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c new file mode 100644 index 0000000000000000000000000000000000000000..c0363c3787270507d7902bb2ac0e39faef63a852 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c @@ -0,0 +1,124 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#pragma GCC target "+nosve" + +#define N 640 +int a[N] = {0}; +int b[N] = {0}; + + +/* +** f1: +** ... +** cmgt v[0-9]+.4s, v[0-9]+.4s, #0 +** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** fmov x[0-9]+, d[0-9]+ +** cbnz x[0-9]+, \.L[0-9]+ +** ... +*/ +void f1 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] > 0) + break; + } +} + +/* +** f2: +** ... +** cmge v[0-9]+.4s, v[0-9]+.4s, #0 +** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** fmov x[0-9]+, d[0-9]+ +** cbnz x[0-9]+, \.L[0-9]+ +** ... +*/ +void f2 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] >= 0) + break; + } +} + +/* +** f3: +** ... +** cmeq v[0-9]+.4s, v[0-9]+.4s, #0 +** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** fmov x[0-9]+, d[0-9]+ +** cbnz x[0-9]+, \.L[0-9]+ +** ... +*/ +void f3 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] == 0) + break; + } +} + +/* +** f4: +** ... +** cmtst v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** fmov x[0-9]+, d[0-9]+ +** cbnz x[0-9]+, \.L[0-9]+ +** ... +*/ +void f4 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] != 0) + break; + } +} + +/* +** f5: +** ... +** cmlt v[0-9]+.4s, v[0-9]+.4s, #0 +** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** fmov x[0-9]+, d[0-9]+ +** cbnz x[0-9]+, \.L[0-9]+ +** ... +*/ +void f5 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] < 0) + break; + } +} + +/* +** f6: +** ... +** cmle v[0-9]+.4s, v[0-9]+.4s, #0 +** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** fmov x[0-9]+, d[0-9]+ +** cbnz x[0-9]+, \.L[0-9]+ +** ... +*/ +void f6 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] <= 0) + break; + } +}