diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 92bda1a7e14a3c9ea63e151e4a49a818bf4d1bdb..adba9fe97a9b43729c5e86d244a2a23e76cac097 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6112,6 +6112,22 @@ instruction pattern. There is no need for the hook to handle these two implementation approaches itself. @end deftypefn +@deftypefn {Target Hook} bool TARGET_VECTORIZE_CAN_SPECIAL_DIV_BY_CONST (enum @var{tree_code}, tree @var{vectype}, tree @var{treeop0}, tree @var{treeop1}, rtx *@var{output}, rtx @var{in0}, rtx @var{in1}) +This hook is used to test whether the target has a special method of +division of vectors of type @var{vectype} using the two operands @code{treeop0}, +and @code{treeop1} and producing a vector of type @var{vectype}. The division +will then not be decomposed by the and kept as a div. + +When the hook is being used to test whether the target supports a special +divide, @var{in0}, @var{in1}, and @var{output} are all null. When the hook +is being used to emit a division, @var{in0} and @var{in1} are the source +vectors of type @var{vecttype} and @var{output} is the destination vector of +type @var{vectype}. + +Return true if the operation is possible, emitting instructions for it +if rtxes are provided and updating @var{output}. +@end deftypefn + @deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION (unsigned @var{code}, tree @var{vec_type_out}, tree @var{vec_type_in}) This hook should return the decl of a function that implements the vectorized variant of the function with the @code{combined_fn} code diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 112462310b134705d860153294287cfd7d4af81d..d5a745a02acdf051ea1da1b04076d058c24ce093 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4164,6 +4164,8 @@ address; but often a machine-dependent strategy can generate better code. @hook TARGET_VECTORIZE_VEC_PERM_CONST +@hook TARGET_VECTORIZE_CAN_SPECIAL_DIV_BY_CONST + @hook TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION @hook TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION diff --git a/gcc/explow.cc b/gcc/explow.cc index ddb4d6ae3600542f8d2bb5617cdd3933a9fae6c0..568e0eb1a158c696458ae678f5e346bf34ba0036 100644 --- a/gcc/explow.cc +++ b/gcc/explow.cc @@ -1037,7 +1037,7 @@ round_push (rtx size) TRUNC_DIV_EXPR. */ size = expand_binop (Pmode, add_optab, size, alignm1_rtx, NULL_RTX, 1, OPTAB_LIB_WIDEN); - size = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, size, align_rtx, + size = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, NULL, NULL, size, align_rtx, NULL_RTX, 1); size = expand_mult (Pmode, size, align_rtx, NULL_RTX, 1); @@ -1203,7 +1203,7 @@ align_dynamic_address (rtx target, unsigned required_align) gen_int_mode (required_align / BITS_PER_UNIT - 1, Pmode), NULL_RTX, 1, OPTAB_LIB_WIDEN); - target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target, + target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, NULL, NULL, target, gen_int_mode (required_align / BITS_PER_UNIT, Pmode), NULL_RTX, 1); diff --git a/gcc/expmed.h b/gcc/expmed.h index 0b2538c4c6bd51dfdc772ef70bdf631c0bed8717..0db2986f11ff4a4b10b59501c6f33cb3595659b5 100644 --- a/gcc/expmed.h +++ b/gcc/expmed.h @@ -708,8 +708,9 @@ extern rtx expand_variable_shift (enum tree_code, machine_mode, extern rtx expand_shift (enum tree_code, machine_mode, rtx, poly_int64, rtx, int); #ifdef GCC_OPTABS_H -extern rtx expand_divmod (int, enum tree_code, machine_mode, rtx, rtx, - rtx, int, enum optab_methods = OPTAB_LIB_WIDEN); +extern rtx expand_divmod (int, enum tree_code, machine_mode, tree, tree, + rtx, rtx, rtx, int, + enum optab_methods = OPTAB_LIB_WIDEN); #endif #endif diff --git a/gcc/expmed.cc b/gcc/expmed.cc index 8d7418be418406e72a895ecddf2dc7fdb950c76c..b64ea5ac46a9da85770a5bb0990db8b97d3af414 100644 --- a/gcc/expmed.cc +++ b/gcc/expmed.cc @@ -4222,8 +4222,8 @@ expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d) rtx expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, - rtx op0, rtx op1, rtx target, int unsignedp, - enum optab_methods methods) + tree treeop0, tree treeop1, rtx op0, rtx op1, rtx target, + int unsignedp, enum optab_methods methods) { machine_mode compute_mode; rtx tquotient; @@ -4375,6 +4375,14 @@ expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; + /* Check if the target has specific expansions for the division. */ + if (treeop0 + && targetm.vectorize.can_special_div_by_const (code, TREE_TYPE (treeop0), + treeop0, treeop1, + &target, op0, op1)) + return target; + + /* Now convert to the best mode to use. */ if (compute_mode != mode) { @@ -4618,8 +4626,8 @@ expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, || (optab_handler (sdivmod_optab, int_mode) != CODE_FOR_nothing))) quotient = expand_divmod (0, TRUNC_DIV_EXPR, - int_mode, op0, - gen_int_mode (abs_d, + int_mode, treeop0, treeop1, + op0, gen_int_mode (abs_d, int_mode), NULL_RTX, 0); else @@ -4808,8 +4816,8 @@ expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, size - 1, NULL_RTX, 0); t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign), NULL_RTX); - t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1, - NULL_RTX, 0); + t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, treeop0, + treeop1, t3, op1, NULL_RTX, 0); if (t4) { rtx t5; diff --git a/gcc/expr.cc b/gcc/expr.cc index 80bb1b8a4c5b8350fb1b8f57a99fd52e5882fcb6..b786f1d75e25f3410c0640cd96a8abc055fa34d9 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -8028,16 +8028,17 @@ force_operand (rtx value, rtx target) return expand_divmod (0, FLOAT_MODE_P (GET_MODE (value)) ? RDIV_EXPR : TRUNC_DIV_EXPR, - GET_MODE (value), op1, op2, target, 0); + GET_MODE (value), NULL, NULL, op1, op2, + target, 0); case MOD: - return expand_divmod (1, TRUNC_MOD_EXPR, GET_MODE (value), op1, op2, - target, 0); + return expand_divmod (1, TRUNC_MOD_EXPR, GET_MODE (value), NULL, NULL, + op1, op2, target, 0); case UDIV: - return expand_divmod (0, TRUNC_DIV_EXPR, GET_MODE (value), op1, op2, - target, 1); + return expand_divmod (0, TRUNC_DIV_EXPR, GET_MODE (value), NULL, NULL, + op1, op2, target, 1); case UMOD: - return expand_divmod (1, TRUNC_MOD_EXPR, GET_MODE (value), op1, op2, - target, 1); + return expand_divmod (1, TRUNC_MOD_EXPR, GET_MODE (value), NULL, NULL, + op1, op2, target, 1); case ASHIFTRT: return expand_simple_binop (GET_MODE (value), code, op1, op2, target, 0, OPTAB_LIB_WIDEN); @@ -8990,11 +8991,13 @@ expand_expr_divmod (tree_code code, machine_mode mode, tree treeop0, bool speed_p = optimize_insn_for_speed_p (); do_pending_stack_adjust (); start_sequence (); - rtx uns_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 1); + rtx uns_ret = expand_divmod (mod_p, code, mode, treeop0, treeop1, + op0, op1, target, 1); rtx_insn *uns_insns = get_insns (); end_sequence (); start_sequence (); - rtx sgn_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 0); + rtx sgn_ret = expand_divmod (mod_p, code, mode, treeop0, treeop1, + op0, op1, target, 0); rtx_insn *sgn_insns = get_insns (); end_sequence (); unsigned uns_cost = seq_cost (uns_insns, speed_p); @@ -9016,7 +9019,8 @@ expand_expr_divmod (tree_code code, machine_mode mode, tree treeop0, emit_insn (sgn_insns); return sgn_ret; } - return expand_divmod (mod_p, code, mode, op0, op1, target, unsignedp); + return expand_divmod (mod_p, code, mode, treeop0, treeop1, + op0, op1, target, unsignedp); } rtx diff --git a/gcc/optabs.cc b/gcc/optabs.cc index 165f8d1fa22432b96967c69a58dbb7b4bf18120d..cff37ccb0dfc3dd79b97d0abfd872f340855dc96 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -1104,8 +1104,9 @@ expand_doubleword_mod (machine_mode mode, rtx op0, rtx op1, bool unsignedp) return NULL_RTX; } } - rtx remainder = expand_divmod (1, TRUNC_MOD_EXPR, word_mode, sum, - gen_int_mode (INTVAL (op1), word_mode), + rtx remainder = expand_divmod (1, TRUNC_MOD_EXPR, word_mode, NULL, NULL, + sum, gen_int_mode (INTVAL (op1), + word_mode), NULL_RTX, 1, OPTAB_DIRECT); if (remainder == NULL_RTX) return NULL_RTX; @@ -1208,8 +1209,8 @@ expand_doubleword_divmod (machine_mode mode, rtx op0, rtx op1, rtx *rem, if (op11 != const1_rtx) { - rtx rem2 = expand_divmod (1, TRUNC_MOD_EXPR, mode, quot1, op11, - NULL_RTX, unsignedp, OPTAB_DIRECT); + rtx rem2 = expand_divmod (1, TRUNC_MOD_EXPR, mode, NULL, NULL, quot1, + op11, NULL_RTX, unsignedp, OPTAB_DIRECT); if (rem2 == NULL_RTX) return NULL_RTX; @@ -1223,8 +1224,8 @@ expand_doubleword_divmod (machine_mode mode, rtx op0, rtx op1, rtx *rem, if (rem2 == NULL_RTX) return NULL_RTX; - rtx quot2 = expand_divmod (0, TRUNC_DIV_EXPR, mode, quot1, op11, - NULL_RTX, unsignedp, OPTAB_DIRECT); + rtx quot2 = expand_divmod (0, TRUNC_DIV_EXPR, mode, NULL, NULL, quot1, + op11, NULL_RTX, unsignedp, OPTAB_DIRECT); if (quot2 == NULL_RTX) return NULL_RTX; diff --git a/gcc/target.def b/gcc/target.def index 2a7fa68f83dd15dcdd2c332e8431e6142ec7d305..92ebd2af18fe8abb6ed95b07081cdd70113db9b1 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1902,6 +1902,25 @@ implementation approaches itself.", const vec_perm_indices &sel), NULL) +DEFHOOK +(can_special_div_by_const, + "This hook is used to test whether the target has a special method of\n\ +division of vectors of type @var{vectype} using the two operands @code{treeop0},\n\ +and @code{treeop1} and producing a vector of type @var{vectype}. The division\n\ +will then not be decomposed by the and kept as a div.\n\ +\n\ +When the hook is being used to test whether the target supports a special\n\ +divide, @var{in0}, @var{in1}, and @var{output} are all null. When the hook\n\ +is being used to emit a division, @var{in0} and @var{in1} are the source\n\ +vectors of type @var{vecttype} and @var{output} is the destination vector of\n\ +type @var{vectype}.\n\ +\n\ +Return true if the operation is possible, emitting instructions for it\n\ +if rtxes are provided and updating @var{output}.", + bool, (enum tree_code, tree vectype, tree treeop0, tree treeop1, rtx *output, + rtx in0, rtx in1), + default_can_special_div_by_const) + /* Return true if the target supports misaligned store/load of a specific factor denoted in the third parameter. The last parameter is true if the access is defined in a packed struct. */ diff --git a/gcc/target.h b/gcc/target.h index d6fa6931499d15edff3e5af3e429540d001c7058..c836036ac7fa7910d62bd3da56f39c061f68b665 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -51,6 +51,7 @@ #include "insn-codes.h" #include "tm.h" #include "hard-reg-set.h" +#include "tree-core.h" #if CHECKING_P diff --git a/gcc/targhooks.h b/gcc/targhooks.h index ecce55ebe797cedc940620e8d89816973a045d49..42451a3e22e86fee9da2f56e2640d63f936b336d 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -207,6 +207,8 @@ extern void default_addr_space_diagnose_usage (addr_space_t, location_t); extern rtx default_addr_space_convert (rtx, tree, tree); extern unsigned int default_case_values_threshold (void); extern bool default_have_conditional_execution (void); +extern bool default_can_special_div_by_const (enum tree_code, tree, tree, tree, + rtx *, rtx, rtx); extern bool default_libc_has_function (enum function_class, tree); extern bool default_libc_has_fast_function (int fcode); diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc index b15ae19bcb60c59ae8112e67b5f06a241a9bdbf1..8206533382611a7640efba241279936ced41ee95 100644 --- a/gcc/targhooks.cc +++ b/gcc/targhooks.cc @@ -1807,6 +1807,14 @@ default_have_conditional_execution (void) return HAVE_conditional_execution; } +/* Default that no division by constant operations are special. */ +bool +default_can_special_div_by_const (enum tree_code, tree, tree, tree, rtx *, rtx, + rtx) +{ + return false; +} + /* By default we assume that c99 functions are present at the runtime, but sincos is not. */ bool diff --git a/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-1.c b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-1.c new file mode 100644 index 0000000000000000000000000000000000000000..472cd710534bc8aa9b1b4916f3d7b4d5b64a19b9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-1.c @@ -0,0 +1,25 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +#define N 50 +#define TYPE uint8_t + +__attribute__((noipa, noinline, optimize("O1"))) +void fun1(TYPE* restrict pixel, TYPE level, int n) +{ + for (int i = 0; i < n; i+=1) + pixel[i] = (pixel[i] * level) / 0xff; +} + +__attribute__((noipa, noinline, optimize("O3"))) +void fun2(TYPE* restrict pixel, TYPE level, int n) +{ + for (int i = 0; i < n; i+=1) + pixel[i] = (pixel[i] * level) / 0xff; +} + +#include "vect-div-bitmask.h" + +/* { dg-final { scan-tree-dump-not "vect_recog_divmod_pattern: detected" "vect" { target aarch64*-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-2.c b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-2.c new file mode 100644 index 0000000000000000000000000000000000000000..e904a71885b2e8487593a2cd3db75b3e4112e2cc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-2.c @@ -0,0 +1,25 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +#define N 50 +#define TYPE uint16_t + +__attribute__((noipa, noinline, optimize("O1"))) +void fun1(TYPE* restrict pixel, TYPE level, int n) +{ + for (int i = 0; i < n; i+=1) + pixel[i] = (pixel[i] * level) / 0xffffU; +} + +__attribute__((noipa, noinline, optimize("O3"))) +void fun2(TYPE* restrict pixel, TYPE level, int n) +{ + for (int i = 0; i < n; i+=1) + pixel[i] = (pixel[i] * level) / 0xffffU; +} + +#include "vect-div-bitmask.h" + +/* { dg-final { scan-tree-dump-not "vect_recog_divmod_pattern: detected" "vect" { target aarch64*-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-3.c b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-3.c new file mode 100644 index 0000000000000000000000000000000000000000..a1418ebbf5ea8731ed4e3e720157701d9d1cf852 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-3.c @@ -0,0 +1,26 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-fno-vect-cost-model" { target aarch64*-*-* } } */ + +#include +#include "tree-vect.h" + +#define N 50 +#define TYPE uint32_t + +__attribute__((noipa, noinline, optimize("O1"))) +void fun1(TYPE* restrict pixel, TYPE level, int n) +{ + for (int i = 0; i < n; i+=1) + pixel[i] = (pixel[i] * (uint64_t)level) / 0xffffffffUL; +} + +__attribute__((noipa, noinline, optimize("O3"))) +void fun2(TYPE* restrict pixel, TYPE level, int n) +{ + for (int i = 0; i < n; i+=1) + pixel[i] = (pixel[i] * (uint64_t)level) / 0xffffffffUL; +} + +#include "vect-div-bitmask.h" + +/* { dg-final { scan-tree-dump-not "vect_recog_divmod_pattern: detected" "vect" { target aarch64*-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-div-bitmask.h b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask.h new file mode 100644 index 0000000000000000000000000000000000000000..29a16739aa4b706616367bfd1832f28ebd07993e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask.h @@ -0,0 +1,43 @@ +#include + +#ifndef N +#define N 65 +#endif + +#ifndef TYPE +#define TYPE uint32_t +#endif + +#ifndef DEBUG +#define DEBUG 0 +#endif + +#define BASE ((TYPE) -1 < 0 ? -126 : 4) + +int main () +{ + TYPE a[N]; + TYPE b[N]; + + for (int i = 0; i < N; ++i) + { + a[i] = BASE + i * 13; + b[i] = BASE + i * 13; + if (DEBUG) + printf ("%d: 0x%x\n", i, a[i]); + } + + fun1 (a, N / 2, N); + fun2 (b, N / 2, N); + + for (int i = 0; i < N; ++i) + { + if (DEBUG) + printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]); + + if (a[i] != b[i]) + __builtin_abort (); + } + return 0; +} + diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc index 350129555a0c71c0896c4f1003163f3b3557c11b..ebee5e24b186915ebcb3a817c9a12046b6ec94f3 100644 --- a/gcc/tree-vect-generic.cc +++ b/gcc/tree-vect-generic.cc @@ -1237,6 +1237,14 @@ expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type tree rhs2 = gimple_assign_rhs2 (assign); tree ret; + /* Check if the target was going to handle it through the special + division callback hook. */ + if (targetm.vectorize.can_special_div_by_const (code, type, rhs1, + rhs2, NULL, + NULL_RTX, NULL_RTX)) + return NULL_TREE; + + if (!optimize || !VECTOR_INTEGER_TYPE_P (type) || TREE_CODE (rhs2) != VECTOR_CST diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 09574bb1a2696b3438a4ce9f09f74b42e784aca0..607acdf95eb30335d8bc0e85af0b1bfea10fe443 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -3596,6 +3596,12 @@ vect_recog_divmod_pattern (vec_info *vinfo, return pattern_stmt; } + else if (targetm.vectorize.can_special_div_by_const (rhs_code, vectype, + oprnd0, oprnd1, NULL, + NULL_RTX, NULL_RTX)) + { + return NULL; + } if (prec > HOST_BITS_PER_WIDE_INT || integer_zerop (oprnd1)) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index c9dab217f059f17e91e9a7582523e627d7a45b66..6d05c48a7339de094d7288bd68e0e1c1e93faafe 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -6260,6 +6260,11 @@ vectorizable_operation (vec_info *vinfo, } target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing); + if (!target_support_p) + target_support_p + = targetm.vectorize.can_special_div_by_const (code, vectype, + op0, op1, NULL, + NULL_RTX, NULL_RTX); } bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);