diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index c06dd4fd6f85e07f0d4a77992b2bc06f04a1935b..33799dc35a1b90dd60d7e487ec41c5d84fb215a5 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3883,31 +3883,58 @@ (define_expand "cbranch4" "TARGET_SIMD" { auto code = GET_CODE (operands[0]); - rtx tmp = operands[1]; - - /* If comparing against a non-zero vector we have to do a comparison first - so we can have a != 0 comparison with the result. */ - if (operands[2] != CONST0_RTX (mode)) - emit_insn (gen_vec_cmp (tmp, operands[0], operands[1], - operands[2])); - - /* For 64-bit vectors we need no reductions. */ - if (known_eq (128, GET_MODE_BITSIZE (mode))) + /* If SVE is available, lets borrow some instructions. We will optimize + these further later in combine. */ + if (TARGET_SVE) { - /* Always reduce using a V4SI. */ - rtx reduc = gen_lowpart (V4SImode, tmp); - rtx res = gen_reg_rtx (V4SImode); - emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc)); - emit_move_insn (tmp, gen_lowpart (mode, res)); + machine_mode full_mode = aarch64_full_sve_mode (mode).require (); + rtx in1 = lowpart_subreg (full_mode, operands[1], mode); + rtx in2 = lowpart_subreg (full_mode, operands[2], mode); + + machine_mode pred_mode = aarch64_sve_pred_mode (full_mode); + rtx_vector_builder builder (VNx16BImode, 16, 2); + for (unsigned int i = 0; i < 16; ++i) + builder.quick_push (CONST1_RTX (BImode)); + for (unsigned int i = 0; i < 16; ++i) + builder.quick_push (CONST0_RTX (BImode)); + rtx ptrue = force_reg (VNx16BImode, builder.build ()); + rtx cast_ptrue = gen_lowpart (pred_mode, ptrue); + rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode); + + rtx tmp = gen_reg_rtx (pred_mode); + aarch64_expand_sve_vec_cmp_int (tmp, reverse_condition (code), in1, in2); + emit_insn (gen_aarch64_ptest (pred_mode, ptrue, cast_ptrue, ptrue_flag, tmp)); + operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM); + operands[2] = const0_rtx; } + else + { + rtx tmp = operands[1]; - rtx val = gen_reg_rtx (DImode); - emit_move_insn (val, gen_lowpart (DImode, tmp)); + /* If comparing against a non-zero vector we have to do a comparison first + so we can have a != 0 comparison with the result. */ + if (operands[2] != CONST0_RTX (mode)) + emit_insn (gen_vec_cmp (tmp, operands[0], operands[1], + operands[2])); - rtx cc_reg = aarch64_gen_compare_reg (code, val, const0_rtx); - rtx cmp_rtx = gen_rtx_fmt_ee (code, DImode, cc_reg, const0_rtx); - emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3])); - DONE; + /* For 64-bit vectors we need no reductions. */ + if (known_eq (128, GET_MODE_BITSIZE (mode))) + { + /* Always reduce using a V4SI. */ + rtx reduc = gen_lowpart (V4SImode, tmp); + rtx res = gen_reg_rtx (V4SImode); + emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc)); + emit_move_insn (tmp, gen_lowpart (mode, res)); + } + + rtx val = gen_reg_rtx (DImode); + emit_move_insn (val, gen_lowpart (DImode, tmp)); + + rtx cc_reg = aarch64_gen_compare_reg (code, val, const0_rtx); + rtx cmp_rtx = gen_rtx_fmt_ee (code, DImode, cc_reg, const0_rtx); + emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3])); + DONE; + } }) ;; Avdanced SIMD lacks a vector != comparison, but this is a quite common diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 5a652d8536a0ef9461f40da7b22834e683e73ceb..d9cc5c7e5629691e7abba7a18e308d35082e027d 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -8123,6 +8123,105 @@ (define_insn "*aarch64_pred_cmp_wide_ptest" "cmp\t%0., %1/z, %2., %3.d" ) +;; Predicated integer comparisons over Advanced SIMD arguments in which only +;; the flags result is interesting. +(define_insn "*aarch64_pred_cmp_neon_ptest" + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:VNx16BI 1 "register_operand" "Upl") + (match_operand 4) + (match_operand:SI 5 "aarch64_sve_ptrue_flag") + (unspec:VNx4BI + [(match_operand:VNx4BI 6 "register_operand" "Upl") + (match_operand:SI 7 "aarch64_sve_ptrue_flag") + (EQL:VNx4BI + (subreg:SVE_FULL_BHSI + (neg: + (UCOMPARISONS: + (match_operand: 2 "register_operand" "w") + (match_operand: 3 "aarch64_simd_reg_or_zero" "w"))) 0) + (match_operand:SVE_FULL_BHSI 8 "aarch64_simd_imm_zero" "Dz"))] + UNSPEC_PRED_Z)] + UNSPEC_PTEST)) + (clobber (match_scratch:VNx4BI 0 "=Upa"))] + "TARGET_SVE + && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" +{ + operands[2] = lowpart_subreg (mode, operands[2], mode); + operands[3] = lowpart_subreg (mode, operands[3], mode); + if (EQ == ) + std::swap (operands[2], operands[3]); + + return "cmp\t%0., %1/z, %2., %3."; +} +) + +;; Predicated integer comparisons over Advanced SIMD arguments in which only +;; the flags result is interesting. +(define_insn "*aarch64_pred_cmpeq_neon_ptest" + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:VNx16BI 1 "register_operand" "Upl") + (match_operand 4) + (match_operand:SI 5 "aarch64_sve_ptrue_flag") + (unspec:VNx4BI + [(match_operand:VNx4BI 6 "register_operand" "Upl") + (match_operand:SI 7 "aarch64_sve_ptrue_flag") + (EQL:VNx4BI + (subreg:SVE_FULL_BHSI + (neg: + (eq: + (match_operand: 2 "register_operand" "w") + (match_operand: 3 "aarch64_simd_reg_or_zero" "w"))) 0) + (match_operand:SVE_FULL_BHSI 8 "aarch64_simd_imm_zero" "Dz"))] + UNSPEC_PRED_Z)] + UNSPEC_PTEST)) + (clobber (match_scratch:VNx4BI 0 "=Upa"))] + "TARGET_SVE + && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" +{ + operands[2] = lowpart_subreg (mode, operands[2], mode); + operands[3] = lowpart_subreg (mode, operands[3], mode); + if (EQ == ) + std::swap (operands[2], operands[3]); + + return "cmpeq\t%0., %1/z, %2., %3."; +} +) + +;; Same as the above but version for == and != +(define_insn "*aarch64_pred_cmpne_neon_ptest" + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:VNx16BI 1 "register_operand" "Upl") + (match_operand 4) + (match_operand:SI 5 "aarch64_sve_ptrue_flag") + (unspec:VNx4BI + [(match_operand:VNx4BI 6 "register_operand" "Upl") + (match_operand:SI 7 "aarch64_sve_ptrue_flag") + (EQL:VNx4BI + (subreg:SVE_FULL_BHSI + (plus: + (eq: + (match_operand: 2 "register_operand" "w") + (match_operand: 3 "aarch64_simd_reg_or_zero" "w")) + (match_operand: 9 "aarch64_simd_imm_minus_one" "i")) 0) + (match_operand:SVE_FULL_BHSI 8 "aarch64_simd_imm_zero" "Dz"))] + UNSPEC_PRED_Z)] + UNSPEC_PTEST)) + (clobber (match_scratch:VNx4BI 0 "=Upa"))] + "TARGET_SVE + && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" +{ + operands[2] = lowpart_subreg (mode, operands[2], mode); + operands[3] = lowpart_subreg (mode, operands[3], mode); + if (EQ == ) + std::swap (operands[2], operands[3]); + + return "cmpne\t%0., %1/z, %2., %3."; +} +) + ;; ------------------------------------------------------------------------- ;; ---- [INT] While tests ;; ------------------------------------------------------------------------- @@ -8602,7 +8701,7 @@ (define_expand "cbranch4" ) ;; See "Description of UNSPEC_PTEST" above for details. -(define_insn "aarch64_ptest" +(define_insn "@aarch64_ptest" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa") (match_operand 1) diff --git a/gcc/genemit.cc b/gcc/genemit.cc index 1ce0564076d8b0d39542f49dd51e5df01cc83c35..73309ca00ec0aa3cd76c85e04535bac44cb2f354 100644 --- a/gcc/genemit.cc +++ b/gcc/genemit.cc @@ -906,6 +906,7 @@ from the machine description file `md'. */\n\n"); printf ("#include \"tm-constrs.h\"\n"); printf ("#include \"ggc.h\"\n"); printf ("#include \"target.h\"\n\n"); + printf ("#include \"rtx-vector-builder.h\"\n\n"); /* Read the machine description. */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_1.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_1.c new file mode 100644 index 0000000000000000000000000000000000000000..c281cfccbe12f0ac8c01ede563dbe325237902c9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_1.c @@ -0,0 +1,117 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#define N 640 +int a[N] = {0}; +int b[N] = {0}; + + +/* +** f1: +** ... +** cmpgt p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 +** ptest p[0-9]+, p[0-9]+.b +** b.any \.L[0-9]+ +** ... +*/ +void f1 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] > 0) + break; + } +} + +/* +** f2: +** ... +** cmpge p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 +** ptest p[0-9]+, p[0-9]+.b +** b.any \.L[0-9]+ +** ... +*/ +void f2 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] >= 0) + break; + } +} + +/* +** f3: +** ... +** cmpeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 +** ptest p[0-9]+, p[0-9]+.b +** b.any \.L[0-9]+ +** ... +*/ +void f3 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] == 0) + break; + } +} + +/* +** f4: +** ... +** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 +** ptest p[0-9]+, p[0-9]+.b +** b.any \.L[0-9]+ +** ... +*/ +void f4 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] != 0) + break; + } +} + +/* +** f5: +** ... +** cmplt p[0-9]+.s, p7/z, z[0-9]+.s, #0 +** ptest p[0-9]+, p[0-9]+.b +** b.any .L[0-9]+ +** ... +*/ +void f5 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] < 0) + break; + } +} + +/* +** f6: +** ... +** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 +** ptest p[0-9]+, p[0-9]+.b +** b.any \.L[0-9]+ +** ... +*/ +void f6 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] <= 0) + break; + } +} + diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_2.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_2.c new file mode 100644 index 0000000000000000000000000000000000000000..f1ca3eafc5ae33393a7df9b5e40fa3420a79bfc2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_2.c @@ -0,0 +1,114 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 --param=aarch64-autovec-preference=1" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#define N 640 +int a[N] = {0}; +int b[N] = {0}; + + +/* +** f1: +** ... +** cmgt v[0-9]+.4s, v[0-9]+.4s, #0 +** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 +** b.any \.L[0-9]+ +** ... +*/ +void f1 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] > 0) + break; + } +} + +/* +** f2: +** ... +** cmge v[0-9]+.4s, v[0-9]+.4s, #0 +** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 +** b.any \.L[0-9]+ +** ... +*/ +void f2 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] >= 0) + break; + } +} + +/* +** f3: +** ... +** cmpeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, z[0-9]+.s +** b.any \.L[0-9]+ +** ... +*/ +void f3 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] == 0) + break; + } +} + +/* +** f4: +** ... +** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, z[0-9]+.s +** b.any \.L[0-9]+ +** ... +*/ +void f4 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] != 0) + break; + } +} + +/* +** f5: +** ... +** cmlt v[0-9]+.4s, v[0-9]+.4s, #0 +** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 +** b.any \.L[0-9]+ +** ... +*/ +void f5 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] < 0) + break; + } +} + +/* +** f6: +** ... +** cmle v[0-9]+.4s, v[0-9]+.4s, #0 +** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 +** b.any \.L[0-9]+ +** ... +*/ +void f6 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] <= 0) + break; + } +}