diff --git gcc/config/aarch64/aarch64-sve.md gcc/config/aarch64/aarch64-sve.md index 5cd591b..d9fbc79 100644 --- gcc/config/aarch64/aarch64-sve.md +++ gcc/config/aarch64/aarch64-sve.md @@ -2109,6 +2109,18 @@ } ) +;; Unpredicated ieee floating-point MIN/MAX reduction. +(define_expand "reduc__scal_" + [(set (match_operand: 0 "register_operand") + (unspec: [(match_dup 2) + (match_operand:SVE_F 1 "register_operand")] + FMAXMINNMV))] + "TARGET_SVE" + { + operands[2] = force_reg (mode, CONSTM1_RTX (mode)); + } +) + ;; Predicated floating-point MIN/MAX reduction. (define_insn "*reduc__scal_" [(set (match_operand: 0 "register_operand" "=w") diff --git gcc/config/aarch64/iterators.md gcc/config/aarch64/iterators.md index 524e4e6..ccc9f9d 100644 --- gcc/config/aarch64/iterators.md +++ gcc/config/aarch64/iterators.md @@ -474,6 +474,8 @@ UNSPEC_COND_DIV ; Used in aarch64-sve.md. UNSPEC_COND_MAX ; Used in aarch64-sve.md. UNSPEC_COND_MIN ; Used in aarch64-sve.md. + UNSPEC_COND_FMAX ; Used in aarch64-sve.md. + UNSPEC_COND_FMIN ; Used in aarch64-sve.md. UNSPEC_COND_FMLA ; Used in aarch64-sve.md. UNSPEC_COND_FMLS ; Used in aarch64-sve.md. UNSPEC_COND_FNMLA ; Used in aarch64-sve.md. @@ -1458,6 +1460,8 @@ (define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV UNSPEC_FMAXNMV UNSPEC_FMINNMV]) +(define_int_iterator FMAXMINNMV [UNSPEC_FMAXNMV UNSPEC_FMINNMV]) + (define_int_iterator BITWISEV [UNSPEC_ANDV UNSPEC_IORV UNSPEC_XORV]) (define_int_iterator LOGICALF [UNSPEC_ANDF UNSPEC_IORF UNSPEC_XORF]) @@ -1569,7 +1573,8 @@ (define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_ADD UNSPEC_COND_SUB UNSPEC_COND_MUL UNSPEC_COND_DIV - UNSPEC_COND_MAX UNSPEC_COND_MIN]) + UNSPEC_COND_MAX UNSPEC_COND_MIN + UNSPEC_COND_FMAX UNSPEC_COND_FMIN]) (define_int_iterator SVE_COND_FP_TERNARY [UNSPEC_COND_FMLA UNSPEC_COND_FMLS @@ -1616,7 +1621,9 @@ (UNSPEC_COND_FMLA "fma") (UNSPEC_COND_FMLS "fnma") (UNSPEC_COND_FNMLA "fnms") - (UNSPEC_COND_FNMLS "fms")]) + (UNSPEC_COND_FNMLS "fms") + (UNSPEC_COND_FMAX "fmax") + (UNSPEC_COND_FMIN "fmin")]) (define_int_attr maxmin_uns [(UNSPEC_UMAXV "umax") (UNSPEC_UMINV "umin") @@ -1631,6 +1638,10 @@ (UNSPEC_FMAXNM "fmax") (UNSPEC_FMINNM "fmin")]) + +(define_int_attr fmaxmin_uns [(UNSPEC_FMAXNMV "fmax") + (UNSPEC_FMINNMV "fmin")]) + (define_int_attr maxmin_uns_op [(UNSPEC_UMAXV "umax") (UNSPEC_UMINV "umin") (UNSPEC_SMAXV "smax") @@ -1832,14 +1843,18 @@ (UNSPEC_COND_MUL "fmul") (UNSPEC_COND_DIV "fdiv") (UNSPEC_COND_MAX "fmaxnm") - (UNSPEC_COND_MIN "fminnm")]) + (UNSPEC_COND_MIN "fminnm") + (UNSPEC_COND_FMAX "fmaxnm") + (UNSPEC_COND_FMIN "fminnm")]) (define_int_attr sve_fp_op_rev [(UNSPEC_COND_ADD "fadd") (UNSPEC_COND_SUB "fsubr") (UNSPEC_COND_MUL "fmul") (UNSPEC_COND_DIV "fdivr") (UNSPEC_COND_MAX "fmaxnm") - (UNSPEC_COND_MIN "fminnm")]) + (UNSPEC_COND_MIN "fminnm") + (UNSPEC_COND_FMAX "fmaxnm") + (UNSPEC_COND_FMIN "fminnm")]) (define_int_attr sve_fmla_op [(UNSPEC_COND_FMLA "fmla") (UNSPEC_COND_FMLS "fmls") diff --git gcc/gimple-match.h gcc/gimple-match.h index b6eb888..fd657ac 100644 --- gcc/gimple-match.h +++ gcc/gimple-match.h @@ -327,6 +327,21 @@ gimple_simplified_result_is_gimple_val (const gimple_match_op *op) && is_gimple_val (op->ops[0])); } +/* Return code_helper for a gassign or gcall. */ + +inline code_helper +code_helper_for_stmt (gimple * orig_stmt) +{ + code_helper code; + if (gassign * stmt = dyn_cast (orig_stmt)) + code = code_helper (gimple_assign_rhs_code (stmt)); + else if (gcall * stmt = dyn_cast (orig_stmt)) + code = code_helper (gimple_call_combined_fn (stmt)); + else + gcc_unreachable (); + return code; +} + extern tree (*mprts_hook) (gimple_match_op *); bool gimple_simplify (gimple *, gimple_match_op *, gimple_seq *, diff --git gcc/internal-fn.c gcc/internal-fn.c index d082dd5..629b689 100644 --- gcc/internal-fn.c +++ gcc/internal-fn.c @@ -3336,7 +3336,9 @@ conditional_internal_fn_code (internal_fn ifn) T (FMA) \ T (FMS) \ T (FNMA) \ - T (FNMS) + T (FNMS) \ + T (FMIN) \ + T (FMAX) /* Return a function that only performs internal function FN when a certain condition is met and that uses a given fallback value otherwise. diff --git gcc/internal-fn.def gcc/internal-fn.def index cda314e..8ea43bf 100644 --- gcc/internal-fn.def +++ gcc/internal-fn.def @@ -173,6 +173,9 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary) DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary) DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary) +DEF_INTERNAL_OPTAB_FN (COND_FMAX, ECF_CONST, cond_fmax, cond_binary) +DEF_INTERNAL_OPTAB_FN (COND_FMIN, ECF_CONST, cond_fmin, cond_binary) + DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary) DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW, @@ -187,6 +190,10 @@ DEF_INTERNAL_OPTAB_FN (REDUC_IOR, ECF_CONST | ECF_NOTHROW, reduc_ior_scal, unary) DEF_INTERNAL_OPTAB_FN (REDUC_XOR, ECF_CONST | ECF_NOTHROW, reduc_xor_scal, unary) +DEF_INTERNAL_OPTAB_FN (REDUC_FMAX, ECF_CONST | ECF_NOTHROW, + reduc_fmax_scal, unary) +DEF_INTERNAL_OPTAB_FN (REDUC_FMIN, ECF_CONST | ECF_NOTHROW, + reduc_fmin_scal, unary) /* Extract the last active element from a vector. */ DEF_INTERNAL_OPTAB_FN (EXTRACT_LAST, ECF_CONST | ECF_NOTHROW, diff --git gcc/optabs.def gcc/optabs.def index 5a67f5e..8ed4de2 100644 --- gcc/optabs.def +++ gcc/optabs.def @@ -238,6 +238,8 @@ OPTAB_D (cond_fma_optab, "cond_fma$a") OPTAB_D (cond_fms_optab, "cond_fms$a") OPTAB_D (cond_fnma_optab, "cond_fnma$a") OPTAB_D (cond_fnms_optab, "cond_fnms$a") +OPTAB_D (cond_fmin_optab, "cond_fmin$a") +OPTAB_D (cond_fmax_optab, "cond_fmax$a") OPTAB_D (cmov_optab, "cmov$a6") OPTAB_D (cstore_optab, "cstore$a4") OPTAB_D (ctrap_optab, "ctrap$a4") @@ -315,6 +317,8 @@ OPTAB_D (reduc_umin_scal_optab, "reduc_umin_scal_$a") OPTAB_D (reduc_and_scal_optab, "reduc_and_scal_$a") OPTAB_D (reduc_ior_scal_optab, "reduc_ior_scal_$a") OPTAB_D (reduc_xor_scal_optab, "reduc_xor_scal_$a") +OPTAB_D (reduc_fmax_scal_optab, "reduc_fmax_scal_$a") +OPTAB_D (reduc_fmin_scal_optab, "reduc_fmin_scal_$a") OPTAB_D (fold_left_plus_optab, "fold_left_plus_$a") OPTAB_D (extract_last_optab, "extract_last_$a") diff --git gcc/testsuite/gcc.target/aarch64/sve/reduc_10.c gcc/testsuite/gcc.target/aarch64/sve/reduc_10.c new file mode 100644 index 0000000..d5ebe97 --- /dev/null +++ gcc/testsuite/gcc.target/aarch64/sve/reduc_10.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define DEF_REDUC_BI_MAXMIN(TYPE, NAME, FUNC) \ +TYPE __attribute__ ((noinline, noclone)) \ +reduc_bi_##NAME##_##TYPE (TYPE *a, int n) \ +{ \ + TYPE r = 13; \ + for (int i = 0; i < n; ++i) \ + r = __builtin_##FUNC (r, a[i]); \ + return r; \ +} + +#define TEST_BI_MAXMIN(T) \ + T (_Float16, max, fmaxf16) \ + T (float, max, fmaxf) \ + T (double, max, fmax) \ + \ + T (_Float16, min, fminf16) \ + T (float, min, fminf) \ + T (double, min, fmin) + +TEST_BI_MAXMIN (DEF_REDUC_BI_MAXMIN) + +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfminnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ diff --git gcc/testsuite/gcc.target/aarch64/sve/reduc_9.c gcc/testsuite/gcc.target/aarch64/sve/reduc_9.c new file mode 100644 index 0000000..9147565 --- /dev/null +++ gcc/testsuite/gcc.target/aarch64/sve/reduc_9.c @@ -0,0 +1,201 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define DEF_REDUC_PLUS(TYPE) \ +TYPE __attribute__ ((noinline, noclone)) \ +reduc_plus_##TYPE (TYPE *a, int n) \ +{ \ + TYPE r = 0; \ + for (int i = 0; i < n; ++i) \ + r += a[i]; \ + return r; \ +} + +#define TEST_PLUS(T) \ + T (int8_t) \ + T (int16_t) \ + T (int32_t) \ + T (int64_t) \ + T (uint8_t) \ + T (uint16_t) \ + T (uint32_t) \ + T (uint64_t) \ + T (_Float16) \ + T (float) \ + T (double) + +TEST_PLUS (DEF_REDUC_PLUS) + +#define DEF_REDUC_MAXMIN(TYPE, NAME, CMP_OP) \ +TYPE __attribute__ ((noinline, noclone)) \ +reduc_##NAME##_##TYPE (TYPE *a, int n) \ +{ \ + TYPE r = 13; \ + for (int i = 0; i < n; ++i) \ + r = a[i] CMP_OP r ? a[i] : r; \ + return r; \ +} + +#define TEST_MAXMIN(T) \ + T (int8_t, max, >) \ + T (int16_t, max, >) \ + T (int32_t, max, >) \ + T (int64_t, max, >) \ + T (uint8_t, max, >) \ + T (uint16_t, max, >) \ + T (uint32_t, max, >) \ + T (uint64_t, max, >) \ + T (_Float16, max, >) \ + T (float, max, >) \ + T (double, max, >) \ + \ + T (int8_t, min, <) \ + T (int16_t, min, <) \ + T (int32_t, min, <) \ + T (int64_t, min, <) \ + T (uint8_t, min, <) \ + T (uint16_t, min, <) \ + T (uint32_t, min, <) \ + T (uint64_t, min, <) \ + T (_Float16, min, <) \ + T (float, min, <) \ + T (double, min, <) + +TEST_MAXMIN (DEF_REDUC_MAXMIN) + +#define DEF_REDUC_BITWISE(TYPE, NAME, BIT_OP) \ +TYPE __attribute__ ((noinline, noclone)) \ +reduc_##NAME##_##TYPE (TYPE *a, int n) \ +{ \ + TYPE r = 13; \ + for (int i = 0; i < n; ++i) \ + r BIT_OP a[i]; \ + return r; \ +} + +#define TEST_BITWISE(T) \ + T (int8_t, and, &=) \ + T (int16_t, and, &=) \ + T (int32_t, and, &=) \ + T (int64_t, and, &=) \ + T (uint8_t, and, &=) \ + T (uint16_t, and, &=) \ + T (uint32_t, and, &=) \ + T (uint64_t, and, &=) \ + \ + T (int8_t, ior, |=) \ + T (int16_t, ior, |=) \ + T (int32_t, ior, |=) \ + T (int64_t, ior, |=) \ + T (uint8_t, ior, |=) \ + T (uint16_t, ior, |=) \ + T (uint32_t, ior, |=) \ + T (uint64_t, ior, |=) \ + \ + T (int8_t, xor, ^=) \ + T (int16_t, xor, ^=) \ + T (int32_t, xor, ^=) \ + T (int64_t, xor, ^=) \ + T (uint8_t, xor, ^=) \ + T (uint16_t, xor, ^=) \ + T (uint32_t, xor, ^=) \ + T (uint64_t, xor, ^=) + +TEST_BITWISE (DEF_REDUC_BITWISE) + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 0 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 0 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 0 } } */ + +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 0 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 0 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 0 } } */ + +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadda\th[0-9]+, p[0-7], h[0-9]+, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tsmaxv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmaxv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmaxv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmaxv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumaxv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumaxv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumaxv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumaxv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 0 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 0 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 0 } } */ + +/* { dg-final { scan-assembler-times {\tsminv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsminv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsminv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsminv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuminv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuminv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuminv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuminv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 0 } } */ +/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 0 } } */ +/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 0 } } */ + +/* { dg-final { scan-assembler-times {\tandv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tandv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tandv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tandv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\torv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\teorv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teorv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teorv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teorv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ diff --git gcc/tree-vect-loop.c gcc/tree-vect-loop.c index 633c315..20f2f06 100644 --- gcc/tree-vect-loop.c +++ gcc/tree-vect-loop.c @@ -54,6 +54,9 @@ along with GCC; see the file COPYING3. If not see #include "tree-vector-builder.h" #include "vec-perm-indices.h" #include "tree-eh.h" +#include "case-cfn-macros.h" +#include "gimple-match.h" +#include "builtins.h" /* Loop Vectorization Pass. @@ -2322,7 +2325,7 @@ fold_left_reduction_fn (tree_code code, internal_fn *reduc_fn) /* Function reduction_fn_for_scalar_code Input: - CODE - tree_code of a reduction operations. + CODE - code_helper of a reduction operation. Output: REDUC_FN - the corresponding internal function to be used to reduce the @@ -2333,21 +2336,22 @@ fold_left_reduction_fn (tree_code code, internal_fn *reduc_fn) Return FALSE if CODE currently cannot be vectorized as reduction. */ static bool -reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn) +reduction_fn_for_scalar_code (code_helper code, internal_fn * reduc_fn) { - switch (code) - { + if (code.is_tree_code ()) + switch ((enum tree_code) code) + { case MAX_EXPR: - *reduc_fn = IFN_REDUC_MAX; - return true; + *reduc_fn = IFN_REDUC_MAX; + return true; case MIN_EXPR: - *reduc_fn = IFN_REDUC_MIN; - return true; + *reduc_fn = IFN_REDUC_MIN; + return true; case PLUS_EXPR: - *reduc_fn = IFN_REDUC_PLUS; - return true; + *reduc_fn = IFN_REDUC_PLUS; + return true; case BIT_AND_EXPR: *reduc_fn = IFN_REDUC_AND; @@ -2363,12 +2367,28 @@ reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn) case MULT_EXPR: case MINUS_EXPR: - *reduc_fn = IFN_LAST; - return true; + *reduc_fn = IFN_LAST; + return true; default: - return false; - } + return false; + } + else + switch ((combined_fn) code) + { + CASE_CFN_FMAX: + CASE_CFN_FMAX_FN: + *reduc_fn = IFN_REDUC_FMAX; + return true; + + CASE_CFN_FMIN: + CASE_CFN_FMIN_FN: + *reduc_fn = IFN_REDUC_FMIN; + return true; + + default: + return false; + } } /* If there is a neutral value X such that SLP reduction NODE would not @@ -2616,9 +2636,13 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi, overflow must wrap. */ static bool -needs_fold_left_reduction_p (tree type, tree_code code, +needs_fold_left_reduction_p (tree type, code_helper orig_code, bool need_wrapping_integral_overflow) { + if (orig_code.is_fn_code ()) + return false; + enum tree_code code = orig_code; + /* CHECKME: check for !flag_finite_math_only too? */ if (SCALAR_FLOAT_TYPE_P (type)) switch (code) @@ -2653,7 +2677,7 @@ needs_fold_left_reduction_p (tree type, tree_code code, bool check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi, - tree loop_arg, enum tree_code code) + tree loop_arg, code_helper code) { auto_vec > path; auto_bitmap visited; @@ -2752,7 +2776,6 @@ pop: return ! fail && ! neg; } - /* Function vect_is_simple_reduction (1) Detect a cross-iteration def-use cycle that represents a simple @@ -2808,13 +2831,13 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop); gimple *phi_use_stmt = NULL; - enum tree_code orig_code, code; + code_helper orig_code, code; tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE; tree type; tree name; imm_use_iterator imm_iter; use_operand_p use_p; - bool phi_def; + bool phi_def, is_call; *double_reduc = false; *v_reduc_type = TREE_CODE_REDUCTION; @@ -2865,11 +2888,19 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, { name = gimple_assign_lhs (def_stmt); phi_def = false; + is_call = false; } else if (gphi *def_stmt = dyn_cast (def_stmt_info->stmt)) { name = PHI_RESULT (def_stmt); phi_def = true; + is_call = false; + } + else if (gcall *def_stmt = dyn_cast (def_stmt_info->stmt)) + { + name = gimple_call_lhs (def_stmt); + phi_def = false; + is_call = true; } else { @@ -2970,8 +3001,43 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, } } - gassign *def_stmt = as_a (def_stmt_info->stmt); - code = orig_code = gimple_assign_rhs_code (def_stmt); + if (is_call) + { + gcall *def_stmt = as_a (def_stmt_info->stmt); + + if (!gimple_call_builtin_p (def_stmt)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "reduction: unhandled reduction " + "with non-builtin call: %G", + def_stmt_info->stmt); + return NULL; + } + + code = orig_code = gimple_call_combined_fn (def_stmt); + + switch ((combined_fn) orig_code) + { + CASE_CFN_FMAX: + CASE_CFN_FMAX_FN: + CASE_CFN_FMIN: + CASE_CFN_FMIN_FN: + break; + default: + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "reduction: unhandled reduction with builtin: %G", + def_stmt_info->stmt); + return NULL; + } + } + else + { + gassign *def_stmt = as_a (def_stmt_info->stmt); + code = orig_code = gimple_assign_rhs_code (def_stmt); + } + gimple *def_stmt = def_stmt_info->stmt; if (nested_in_vect_loop && !check_reduction) { @@ -3026,17 +3092,28 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, op1 = gimple_assign_rhs2 (def_stmt); op2 = gimple_assign_rhs3 (def_stmt); } - else if (!commutative_tree_code (code) || !associative_tree_code (code)) + else if (!is_call + && (!commutative_tree_code (code) + || !associative_tree_code (code))) { if (dump_enabled_p ()) report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, "reduction: not commutative/associative: "); return NULL; } - else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) + else if (is_call || get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) { - op1 = gimple_assign_rhs1 (def_stmt); - op2 = gimple_assign_rhs2 (def_stmt); + if (is_call) + { + gcc_assert (gimple_call_num_args (def_stmt) == 2); + op1 = gimple_call_arg (def_stmt, 0); + op2 = gimple_call_arg (def_stmt, 1); + } + else + { + op1 = gimple_assign_rhs1 (def_stmt); + op2 = gimple_assign_rhs2 (def_stmt); + } } else { @@ -3055,7 +3132,7 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, return NULL; } - type = TREE_TYPE (gimple_assign_lhs (def_stmt)); + type = TREE_TYPE (gimple_get_lhs (def_stmt)); if ((TREE_CODE (op1) == SSA_NAME && !types_compatible_p (type,TREE_TYPE (op1))) || (TREE_CODE (op2) == SSA_NAME @@ -3164,6 +3241,9 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, return NULL; } } + else if (is_call) + swap_ssa_operands (def_stmt, gimple_call_arg_ptr (def_stmt, 0), + gimple_call_arg_ptr (def_stmt, 1)); else swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt), gimple_assign_rhs2_ptr (def_stmt)); @@ -3172,7 +3252,10 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, report_vect_op (MSG_NOTE, def_stmt, "detected reduction: need to swap operands: "); - if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt))) + if (!is_call && CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt))) + LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true; + else if (is_call + && CONSTANT_CLASS_P (gimple_call_arg (def_stmt, 0))) LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true; } else @@ -3745,6 +3828,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn, int ncopies, stmt_vector_for_cost *cost_vec) { int prologue_cost = 0, epilogue_cost = 0, inside_cost; + code_helper orig_code; enum tree_code code; optab optab; tree vectype; @@ -3765,7 +3849,9 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn, mode = TYPE_MODE (vectype); stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); - code = gimple_assign_rhs_code (orig_stmt_info->stmt); + orig_code = code_helper_for_stmt (orig_stmt_info->stmt); + /* Use MAX_EXPR tree_code for the call-based reductions. */ + code = orig_code.is_tree_code () ? (enum tree_code) orig_code : MAX_EXPR; if (reduction_type == EXTRACT_LAST_REDUCTION || reduction_type == FOLD_LEFT_REDUCTION) @@ -3861,7 +3947,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn, { int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)); tree bitsize = - TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt_info->stmt))); + TYPE_SIZE (TREE_TYPE (gimple_get_lhs (orig_stmt_info->stmt))); int element_bitsize = tree_to_uhwi (bitsize); int nelements = vec_size_in_bits / element_bitsize; @@ -3984,7 +4070,7 @@ get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val, struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree scalar_type = TREE_TYPE (init_val); tree vectype = get_vectype_for_scalar_type (scalar_type); - enum tree_code code = gimple_assign_rhs_code (stmt_vinfo->stmt); + code_helper code = code_helper_for_stmt (stmt_vinfo->stmt); tree def_for_init; tree init_def; REAL_VALUE_TYPE real_init_val = dconst0; @@ -4002,82 +4088,106 @@ get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val, vect_reduction_type reduction_type = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo); - switch (code) - { - case WIDEN_SUM_EXPR: - case DOT_PROD_EXPR: - case SAD_EXPR: - case PLUS_EXPR: - case MINUS_EXPR: - case BIT_IOR_EXPR: - case BIT_XOR_EXPR: - case MULT_EXPR: - case BIT_AND_EXPR: + if (code.is_tree_code ()) + switch ((enum tree_code) code) { - /* ADJUSTMENT_DEF is NULL when called from - vect_create_epilog_for_reduction to vectorize double reduction. */ - if (adjustment_def) - *adjustment_def = init_val; - - if (code == MULT_EXPR) - { - real_init_val = dconst1; - int_init_val = 1; - } - - if (code == BIT_AND_EXPR) - int_init_val = -1; - - if (SCALAR_FLOAT_TYPE_P (scalar_type)) - def_for_init = build_real (scalar_type, real_init_val); - else - def_for_init = build_int_cst (scalar_type, int_init_val); - - if (adjustment_def) - /* Option1: the first element is '0' or '1' as well. */ - init_def = gimple_build_vector_from_val (&stmts, vectype, - def_for_init); - else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()) - { - /* Option2 (variable length): the first element is INIT_VAL. */ + case WIDEN_SUM_EXPR: + case DOT_PROD_EXPR: + case SAD_EXPR: + case PLUS_EXPR: + case MINUS_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + case MULT_EXPR: + case BIT_AND_EXPR: + { + /* ADJUSTMENT_DEF is NULL when called from + vect_create_epilog_for_reduction to vectorize double reduction. */ + if (adjustment_def) + *adjustment_def = init_val; + + if (code == MULT_EXPR) + { + real_init_val = dconst1; + int_init_val = 1; + } + + if (code == BIT_AND_EXPR) + int_init_val = -1; + + if (SCALAR_FLOAT_TYPE_P (scalar_type)) + def_for_init = build_real (scalar_type, real_init_val); + else + def_for_init = build_int_cst (scalar_type, int_init_val); + + if (adjustment_def) + /* Option1: the first element is '0' or '1' as well. */ init_def = gimple_build_vector_from_val (&stmts, vectype, def_for_init); - init_def = gimple_build (&stmts, CFN_VEC_SHL_INSERT, - vectype, init_def, init_val); - } - else - { - /* Option2: the first element is INIT_VAL. */ - tree_vector_builder elts (vectype, 1, 2); - elts.quick_push (init_val); - elts.quick_push (def_for_init); - init_def = gimple_build_vector (&stmts, &elts); - } - } - break; + else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()) + { + /* Option2 (variable length): the first element is INIT_VAL. */ + init_def = gimple_build_vector_from_val (&stmts, vectype, + def_for_init); + init_def = gimple_build (&stmts, CFN_VEC_SHL_INSERT, + vectype, init_def, init_val); + } + else + { + /* Option2: the first element is INIT_VAL. */ + tree_vector_builder elts (vectype, 1, 2); + elts.quick_push (init_val); + elts.quick_push (def_for_init); + init_def = gimple_build_vector (&stmts, &elts); + } + } + break; - case MIN_EXPR: - case MAX_EXPR: - case COND_EXPR: - { - if (adjustment_def) - { - *adjustment_def = NULL_TREE; - if (reduction_type != COND_REDUCTION - && reduction_type != EXTRACT_LAST_REDUCTION) - { - init_def = vect_get_vec_def_for_operand (init_val, stmt_vinfo); - break; - } - } - init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val); - init_def = gimple_build_vector_from_val (&stmts, vectype, init_val); + case MIN_EXPR: + case MAX_EXPR: + case COND_EXPR: + { + if (adjustment_def) + { + *adjustment_def = NULL_TREE; + init_def = vect_get_vec_def_for_operand (init_val, stmt_vinfo); + break; + } + init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val); + init_def = gimple_build_vector_from_val (&stmts, vectype, init_val); + } + break; + + default: + gcc_unreachable (); } - break; + else + switch ((combined_fn) code) + { + CASE_CFN_FMAX: + CASE_CFN_FMAX_FN: + CASE_CFN_FMIN: + CASE_CFN_FMIN_FN: + { + if (adjustment_def) + { + *adjustment_def = NULL_TREE; + if (reduction_type != COND_REDUCTION + && reduction_type != EXTRACT_LAST_REDUCTION) + { + init_def + = vect_get_vec_def_for_operand (init_val, stmt_vinfo); + break; + } + } + init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val); + init_def = gimple_build_vector_from_val (&stmts, vectype, init_val); + } + break; - default: - gcc_unreachable (); - } + default: + gcc_unreachable (); + } if (stmts) gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); @@ -4345,7 +4455,7 @@ vect_create_epilog_for_reduction (vec vect_defs, tree vec_dest; tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest; gimple *epilog_stmt = NULL; - enum tree_code code = gimple_assign_rhs_code (stmt_info->stmt); + code_helper code = code_helper_for_stmt (stmt_info->stmt); gimple *exit_phi; tree bitsize; tree adjustment_def = NULL; @@ -4689,13 +4799,13 @@ vect_create_epilog_for_reduction (vec vect_defs, gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt_info); } - code = gimple_assign_rhs_code (orig_stmt_info->stmt); + code = code_helper_for_stmt (orig_stmt_info->stmt); /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore, partial results are added and not subtracted. */ if (code == MINUS_EXPR) code = PLUS_EXPR; - scalar_dest = gimple_assign_lhs (orig_stmt_info->stmt); + scalar_dest = gimple_get_lhs (orig_stmt_info->stmt); scalar_type = TREE_TYPE (scalar_dest); scalar_results.create (group_size); new_scalar_dest = vect_create_destination_var (scalar_dest, NULL); @@ -5988,7 +6098,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, tree vectype_in = NULL_TREE; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - enum tree_code code, orig_code; + code_helper code, orig_code; internal_fn reduc_fn; machine_mode vec_mode; int op_type; @@ -6065,25 +6175,54 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, /* Leave the scalar phi in place. */ return true; - gassign *reduc_stmt = as_a (reduc_stmt_info->stmt); - code = gimple_assign_rhs_code (reduc_stmt); - for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k) + if (gassign *reduc_stmt = dyn_cast (reduc_stmt_info->stmt)) { - tree op = gimple_op (reduc_stmt, k); - if (op == phi_result) - continue; - if (k == 1 && code == COND_EXPR) - continue; - bool is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt); - gcc_assert (is_simple_use); - if (dt == vect_constant_def || dt == vect_external_def) - continue; - if (!vectype_in - || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in))) - < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (op))))) - vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op)); - break; + code = gimple_assign_rhs_code (reduc_stmt); + + for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k) + { + tree op = gimple_op (reduc_stmt, k); + if (op == phi_result) + continue; + if (k == 1 && code == COND_EXPR) + continue; + bool is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt); + gcc_assert (is_simple_use); + if (dt == vect_constant_def || dt == vect_external_def) + continue; + if (!vectype_in + || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in))) + < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (op))))) + vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op)); + break; + } } + else if (gcall *reduc_stmt = dyn_cast (reduc_stmt_info->stmt)) + { + code = code_helper_for_stmt (reduc_stmt); + + for (unsigned k = 1; k < gimple_call_num_args (reduc_stmt); ++k) + { + tree op = gimple_call_arg (reduc_stmt, k); + if (op == phi_result) + continue; + bool is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt); + gcc_assert (is_simple_use); + if (dt == vect_constant_def || dt == vect_external_def) + continue; + if (!vectype_in + || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in))) + < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (op))))) + vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op)); + break; + } + + } + else + gcc_unreachable (); + + gimple *reduc_stmt = reduc_stmt_info->stmt; + /* For a nested cycle we might end up with an operation like phi_result * phi_result. */ if (!vectype_in) @@ -6103,7 +6242,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, single_defuse_cycle = true; /* Create the destination vector */ - scalar_dest = gimple_assign_lhs (reduc_stmt); + scalar_dest = gimple_get_lhs (reduc_stmt); vec_dest = vect_create_destination_var (scalar_dest, vectype_out); if (slp_node) @@ -6177,39 +6316,51 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, inside the loop body. The last operand is the reduction variable, which is defined by the loop-header-phi. */ - gassign *stmt = as_a (stmt_info->stmt); - /* Flatten RHS. */ - switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))) - { - case GIMPLE_BINARY_RHS: - code = gimple_assign_rhs_code (stmt); - op_type = TREE_CODE_LENGTH (code); - gcc_assert (op_type == binary_op); - ops[0] = gimple_assign_rhs1 (stmt); - ops[1] = gimple_assign_rhs2 (stmt); - break; - - case GIMPLE_TERNARY_RHS: - code = gimple_assign_rhs_code (stmt); - op_type = TREE_CODE_LENGTH (code); - gcc_assert (op_type == ternary_op); - ops[0] = gimple_assign_rhs1 (stmt); - ops[1] = gimple_assign_rhs2 (stmt); - ops[2] = gimple_assign_rhs3 (stmt); - break; - - case GIMPLE_UNARY_RHS: - return false; + if (gassign *stmt = dyn_cast (stmt_info->stmt)) + { + switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))) + { + case GIMPLE_BINARY_RHS: + code = gimple_assign_rhs_code (stmt); + op_type = TREE_CODE_LENGTH ((enum tree_code) code); + gcc_assert (op_type == binary_op); + ops[0] = gimple_assign_rhs1 (stmt); + ops[1] = gimple_assign_rhs2 (stmt); + break; - default: - gcc_unreachable (); + case GIMPLE_TERNARY_RHS: + code = gimple_assign_rhs_code (stmt); + op_type = TREE_CODE_LENGTH ((enum tree_code) code); + gcc_assert (op_type == ternary_op); + ops[0] = gimple_assign_rhs1 (stmt); + ops[1] = gimple_assign_rhs2 (stmt); + ops[2] = gimple_assign_rhs3 (stmt); + break; + + case GIMPLE_UNARY_RHS: + return false; + + default: + gcc_unreachable (); + } } + else if (gcall *stmt = dyn_cast (stmt_info->stmt)) + { + code = code_helper_for_stmt (stmt); + gcc_assert (gimple_call_num_args (stmt) == 2); + op_type = binary_op; + ops[0] = gimple_call_arg (stmt, 0); + ops[1] = gimple_call_arg (stmt, 1); + } + else + gcc_unreachable (); if (code == COND_EXPR && slp_node) return false; - scalar_dest = gimple_assign_lhs (stmt); + gimple *stmt = stmt_info->stmt; + scalar_dest = gimple_get_lhs (stmt); scalar_type = TREE_TYPE (scalar_dest); if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type) && !SCALAR_FLOAT_TYPE_P (scalar_type)) @@ -6533,7 +6684,12 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, /* 4. Supportable by target? */ /* 4.1. check support for the operation in the loop */ - optab = optab_for_tree_code (code, vectype_in, optab_default); + if (code.is_tree_code ()) + optab = optab_for_tree_code (code, vectype_in, optab_default); + else + /* Use MAX_EXPR tree_code for the call-based reductions. */ + optab = optab_for_tree_code (MAX_EXPR, vectype_in, optab_default); + if (!optab) { if (dump_enabled_p ()) @@ -6897,7 +7053,15 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, else vec_num = 1; - internal_fn cond_fn = get_conditional_internal_fn (code); + internal_fn cond_fn; + if (code.is_fn_code ()) + { + gcall *orig_call = as_a (stmt_info->stmt); + internal_fn ifn = replacement_internal_fn (orig_call); + cond_fn = get_conditional_internal_fn (ifn); + } + else + cond_fn = get_conditional_internal_fn ((enum tree_code) code); vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); if (!vec_stmt) /* transformation not required. */ @@ -7074,7 +7238,8 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, /* Make sure that the reduction accumulator is vop[0]. */ if (reduc_index == 1) { - gcc_assert (commutative_tree_code (code)); + gcc_assert (code.is_fn_code () + || commutative_tree_code (code)); std::swap (vop[0], vop[1]); } tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies, @@ -7088,6 +7253,18 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi); } + else if (code.is_fn_code ()) + { + gcall *orig_call = as_a (stmt_info->stmt); + internal_fn ifn = replacement_internal_fn (orig_call); + gcall *call + = gimple_build_call_internal (ifn, 2, vop[0], vop[1]); + new_temp = make_ssa_name (vec_dest, call); + gimple_call_set_lhs (call, new_temp); + gimple_call_set_nothrow (call, true); + new_stmt_info + = vect_finish_stmt_generation (stmt_info, call, gsi); + } else { if (op_type == ternary_op) diff --git gcc/tree-vectorizer.h gcc/tree-vectorizer.h index f1c186b..578105dc 100644 --- gcc/tree-vectorizer.h +++ gcc/tree-vectorizer.h @@ -26,6 +26,7 @@ typedef struct _stmt_vec_info *stmt_vec_info; #include "tree-data-ref.h" #include "tree-hash-traits.h" #include "target.h" +#include "gimple-match.h" /* Used for naming of new temporaries. */ enum vect_var_kind { @@ -1556,7 +1557,7 @@ extern stmt_vec_info vect_force_simple_reduction (loop_vec_info, stmt_vec_info, bool *, bool); /* Used in gimple-loop-interchange.c. */ extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, - enum tree_code); + code_helper); /* Drive for loop analysis stage. */ extern opt_loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info,