diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 7235d34c4b30949febfa10d5a626ac9358281cfa..02004c4b0f4d88dffe980f7408038595e21af35d 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5668,6 +5668,18 @@ represented in RTL using a @code{smul_highpart} RTX expression. Similar, but the multiplication is unsigned. This may be represented in RTL using an @code{umul_highpart} RTX expression. +@cindex @code{sadd@var{m}3_highpart} instruction pattern +@item @samp{smul@var{m}3_highpart} +Perform a signed addition of operands 1 and 2, which have mode +@var{m}, and store the most significant half of the product in operand 0. +The least significant half of the product is discarded. This may be +represented in RTL using a @code{sadd_highpart} RTX expression. + +@cindex @code{uadd@var{m}3_highpart} instruction pattern +@item @samp{uadd@var{m}3_highpart} +Similar, but the addition is unsigned. This may be represented +in RTL using an @code{uadd_highpart} RTX expression. + @cindex @code{madd@var{m}@var{n}4} instruction pattern @item @samp{madd@var{m}@var{n}4} Multiply operands 1 and 2, sign-extend them to mode @var{n}, add diff --git a/gcc/doc/rtl.texi b/gcc/doc/rtl.texi index d1380e1eb3ba6b2853686f41f2bf937bfcbed1fe..63a7ef6e566eeea4f14c00343d171940ec4222f3 100644 --- a/gcc/doc/rtl.texi +++ b/gcc/doc/rtl.texi @@ -2535,6 +2535,17 @@ out in machine mode @var{m}. @code{smul_highpart} returns the high part of a signed multiplication, @code{umul_highpart} returns the high part of an unsigned multiplication. +@findex sadd_highpart +@findex uadd_highpart +@cindex high-part addition +@cindex addition high part +@item (sadd_highpart:@var{m} @var{x} @var{y}) +@itemx (uadd_highpart:@var{m} @var{x} @var{y}) +Represents the high-part addition of @var{x} and @var{y} carried +out in machine mode @var{m}. @code{sadd_highpart} returns the high part +of a signed addition, @code{uadd_highpart} returns the high part +of an unsigned addition. + @findex fma @cindex fused multiply-add @item (fma:@var{m} @var{x} @var{y} @var{z}) diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index c6c891972d1e58cd163b259ba96a599d62326865..3ab2031a336b8758d5791484017e6b0d62ab077e 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6137,22 +6137,6 @@ instruction pattern. There is no need for the hook to handle these two implementation approaches itself. @end deftypefn -@deftypefn {Target Hook} bool TARGET_VECTORIZE_CAN_SPECIAL_DIV_BY_CONST (enum @var{tree_code}, tree @var{vectype}, wide_int @var{constant}, rtx *@var{output}, rtx @var{in0}, rtx @var{in1}) -This hook is used to test whether the target has a special method of -division of vectors of type @var{vectype} using the value @var{constant}, -and producing a vector of type @var{vectype}. The division -will then not be decomposed by the vectorizer and kept as a div. - -When the hook is being used to test whether the target supports a special -divide, @var{in0}, @var{in1}, and @var{output} are all null. When the hook -is being used to emit a division, @var{in0} and @var{in1} are the source -vectors of type @var{vecttype} and @var{output} is the destination vector of -type @var{vectype}. - -Return true if the operation is possible, emitting instructions for it -if rtxes are provided and updating @var{output}. -@end deftypefn - @deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION (unsigned @var{code}, tree @var{vec_type_out}, tree @var{vec_type_in}) This hook should return the decl of a function that implements the vectorized variant of the function with the @code{combined_fn} code diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 613b2534149415f442163d599503efaf423b673b..8790f4e44b98b51ad5d1efec0a3abccd1c293c7b 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4173,8 +4173,6 @@ address; but often a machine-dependent strategy can generate better code. @hook TARGET_VECTORIZE_VEC_PERM_CONST -@hook TARGET_VECTORIZE_CAN_SPECIAL_DIV_BY_CONST - @hook TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION @hook TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION diff --git a/gcc/explow.cc b/gcc/explow.cc index 83439b32abe1b9aa4b7983eb629804f97486acbd..be9195b33323ee5597fc212f0befa016eea4573c 100644 --- a/gcc/explow.cc +++ b/gcc/explow.cc @@ -1037,7 +1037,7 @@ round_push (rtx size) TRUNC_DIV_EXPR. */ size = expand_binop (Pmode, add_optab, size, alignm1_rtx, NULL_RTX, 1, OPTAB_LIB_WIDEN); - size = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, NULL, NULL, size, align_rtx, + size = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, size, align_rtx, NULL_RTX, 1); size = expand_mult (Pmode, size, align_rtx, NULL_RTX, 1); @@ -1203,7 +1203,7 @@ align_dynamic_address (rtx target, unsigned required_align) gen_int_mode (required_align / BITS_PER_UNIT - 1, Pmode), NULL_RTX, 1, OPTAB_LIB_WIDEN); - target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, NULL, NULL, target, + target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target, gen_int_mode (required_align / BITS_PER_UNIT, Pmode), NULL_RTX, 1); diff --git a/gcc/expmed.h b/gcc/expmed.h index 0419e2dac85850889ce0bee59515e31a80c582de..4dfe635c22ee49f2dba4c53640941628068f3901 100644 --- a/gcc/expmed.h +++ b/gcc/expmed.h @@ -710,9 +710,8 @@ extern rtx expand_shift (enum tree_code, machine_mode, rtx, poly_int64, rtx, extern rtx maybe_expand_shift (enum tree_code, machine_mode, rtx, int, rtx, int); #ifdef GCC_OPTABS_H -extern rtx expand_divmod (int, enum tree_code, machine_mode, tree, tree, - rtx, rtx, rtx, int, - enum optab_methods = OPTAB_LIB_WIDEN); +extern rtx expand_divmod (int, enum tree_code, machine_mode, rtx, rtx, + rtx, int, enum optab_methods = OPTAB_LIB_WIDEN); #endif #endif diff --git a/gcc/expmed.cc b/gcc/expmed.cc index 917360199ca56157cf3c3693b65e93cd9d8ed244..1553ea8e31eb6433025ab18a3a59c169d3b7692f 100644 --- a/gcc/expmed.cc +++ b/gcc/expmed.cc @@ -4222,8 +4222,8 @@ expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d) rtx expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, - tree treeop0, tree treeop1, rtx op0, rtx op1, rtx target, - int unsignedp, enum optab_methods methods) + rtx op0, rtx op1, rtx target, int unsignedp, + enum optab_methods methods) { machine_mode compute_mode; rtx tquotient; @@ -4375,17 +4375,6 @@ expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; - /* Check if the target has specific expansions for the division. */ - tree cst; - if (treeop0 - && treeop1 - && (cst = uniform_integer_cst_p (treeop1)) - && targetm.vectorize.can_special_div_by_const (code, TREE_TYPE (treeop0), - wi::to_wide (cst), - &target, op0, op1)) - return target; - - /* Now convert to the best mode to use. */ if (compute_mode != mode) { @@ -4629,8 +4618,8 @@ expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, || (optab_handler (sdivmod_optab, int_mode) != CODE_FOR_nothing))) quotient = expand_divmod (0, TRUNC_DIV_EXPR, - int_mode, treeop0, treeop1, - op0, gen_int_mode (abs_d, + int_mode, op0, + gen_int_mode (abs_d, int_mode), NULL_RTX, 0); else @@ -4819,8 +4808,8 @@ expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, size - 1, NULL_RTX, 0); t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign), NULL_RTX); - t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, treeop0, - treeop1, t3, op1, NULL_RTX, 0); + t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1, + NULL_RTX, 0); if (t4) { rtx t5; diff --git a/gcc/expr.cc b/gcc/expr.cc index 15be1c8db999103bb9e5fa33daa44ae06de5ace8..78d35297e755216339078d5b2280c6e277f26d72 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -8207,17 +8207,16 @@ force_operand (rtx value, rtx target) return expand_divmod (0, FLOAT_MODE_P (GET_MODE (value)) ? RDIV_EXPR : TRUNC_DIV_EXPR, - GET_MODE (value), NULL, NULL, op1, op2, - target, 0); + GET_MODE (value), op1, op2, target, 0); case MOD: - return expand_divmod (1, TRUNC_MOD_EXPR, GET_MODE (value), NULL, NULL, - op1, op2, target, 0); + return expand_divmod (1, TRUNC_MOD_EXPR, GET_MODE (value), op1, op2, + target, 0); case UDIV: - return expand_divmod (0, TRUNC_DIV_EXPR, GET_MODE (value), NULL, NULL, - op1, op2, target, 1); + return expand_divmod (0, TRUNC_DIV_EXPR, GET_MODE (value), op1, op2, + target, 1); case UMOD: - return expand_divmod (1, TRUNC_MOD_EXPR, GET_MODE (value), NULL, NULL, - op1, op2, target, 1); + return expand_divmod (1, TRUNC_MOD_EXPR, GET_MODE (value), op1, op2, + target, 1); case ASHIFTRT: return expand_simple_binop (GET_MODE (value), code, op1, op2, target, 0, OPTAB_LIB_WIDEN); @@ -9170,13 +9169,11 @@ expand_expr_divmod (tree_code code, machine_mode mode, tree treeop0, bool speed_p = optimize_insn_for_speed_p (); do_pending_stack_adjust (); start_sequence (); - rtx uns_ret = expand_divmod (mod_p, code, mode, treeop0, treeop1, - op0, op1, target, 1); + rtx uns_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 1); rtx_insn *uns_insns = get_insns (); end_sequence (); start_sequence (); - rtx sgn_ret = expand_divmod (mod_p, code, mode, treeop0, treeop1, - op0, op1, target, 0); + rtx sgn_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 0); rtx_insn *sgn_insns = get_insns (); end_sequence (); unsigned uns_cost = seq_cost (uns_insns, speed_p); @@ -9198,8 +9195,7 @@ expand_expr_divmod (tree_code code, machine_mode mode, tree treeop0, emit_insn (sgn_insns); return sgn_ret; } - return expand_divmod (mod_p, code, mode, treeop0, treeop1, - op0, op1, target, unsignedp); + return expand_divmod (mod_p, code, mode, op0, op1, target, unsignedp); } rtx diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 22b4a2d92967076c658965afcaeaf39b449a8caf..2796d3669a0806538052584f5a3b8a734baa800f 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -174,6 +174,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first, DEF_INTERNAL_SIGNED_OPTAB_FN (MULH, ECF_CONST | ECF_NOTHROW, first, smul_highpart, umul_highpart, binary) +DEF_INTERNAL_SIGNED_OPTAB_FN (ADDH, ECF_CONST | ECF_NOTHROW, first, + sadd_highpart, uadd_highpart, binary) DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | ECF_NOTHROW, first, smulhs, umulhs, binary) DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first, diff --git a/gcc/optabs.cc b/gcc/optabs.cc index cf22bfec3f5513f56d22c866231edbf322ff6945..474ccbd7915b4f144cebe0369a6e77082c1e617b 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -1106,9 +1106,8 @@ expand_doubleword_mod (machine_mode mode, rtx op0, rtx op1, bool unsignedp) return NULL_RTX; } } - rtx remainder = expand_divmod (1, TRUNC_MOD_EXPR, word_mode, NULL, NULL, - sum, gen_int_mode (INTVAL (op1), - word_mode), + rtx remainder = expand_divmod (1, TRUNC_MOD_EXPR, word_mode, sum, + gen_int_mode (INTVAL (op1), word_mode), NULL_RTX, 1, OPTAB_DIRECT); if (remainder == NULL_RTX) return NULL_RTX; @@ -1211,8 +1210,8 @@ expand_doubleword_divmod (machine_mode mode, rtx op0, rtx op1, rtx *rem, if (op11 != const1_rtx) { - rtx rem2 = expand_divmod (1, TRUNC_MOD_EXPR, mode, NULL, NULL, quot1, - op11, NULL_RTX, unsignedp, OPTAB_DIRECT); + rtx rem2 = expand_divmod (1, TRUNC_MOD_EXPR, mode, quot1, op11, + NULL_RTX, unsignedp, OPTAB_DIRECT); if (rem2 == NULL_RTX) return NULL_RTX; @@ -1226,8 +1225,8 @@ expand_doubleword_divmod (machine_mode mode, rtx op0, rtx op1, rtx *rem, if (rem2 == NULL_RTX) return NULL_RTX; - rtx quot2 = expand_divmod (0, TRUNC_DIV_EXPR, mode, NULL, NULL, quot1, - op11, NULL_RTX, unsignedp, OPTAB_DIRECT); + rtx quot2 = expand_divmod (0, TRUNC_DIV_EXPR, mode, quot1, op11, + NULL_RTX, unsignedp, OPTAB_DIRECT); if (quot2 == NULL_RTX) return NULL_RTX; diff --git a/gcc/optabs.def b/gcc/optabs.def index 695f5911b300c9ca5737de9be809fa01aabe5e01..77a152ec2d1949deca2c2d7a5ccbf6147947351a 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -265,6 +265,8 @@ OPTAB_D (spaceship_optab, "spaceship$a3") OPTAB_D (smul_highpart_optab, "smul$a3_highpart") OPTAB_D (umul_highpart_optab, "umul$a3_highpart") +OPTAB_D (sadd_highpart_optab, "sadd$a3_highpart") +OPTAB_D (uadd_highpart_optab, "uadd$a3_highpart") OPTAB_D (cmpmem_optab, "cmpmem$a") OPTAB_D (cmpstr_optab, "cmpstr$a") diff --git a/gcc/target.def b/gcc/target.def index db8af0cbe81624513f114fc9bbd8be61d855f409..e0a5c7adbd962f5d08ed08d1d81afa2c2baa64a5 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1905,25 +1905,6 @@ implementation approaches itself.", const vec_perm_indices &sel), NULL) -DEFHOOK -(can_special_div_by_const, - "This hook is used to test whether the target has a special method of\n\ -division of vectors of type @var{vectype} using the value @var{constant},\n\ -and producing a vector of type @var{vectype}. The division\n\ -will then not be decomposed by the vectorizer and kept as a div.\n\ -\n\ -When the hook is being used to test whether the target supports a special\n\ -divide, @var{in0}, @var{in1}, and @var{output} are all null. When the hook\n\ -is being used to emit a division, @var{in0} and @var{in1} are the source\n\ -vectors of type @var{vecttype} and @var{output} is the destination vector of\n\ -type @var{vectype}.\n\ -\n\ -Return true if the operation is possible, emitting instructions for it\n\ -if rtxes are provided and updating @var{output}.", - bool, (enum tree_code, tree vectype, wide_int constant, rtx *output, - rtx in0, rtx in1), - default_can_special_div_by_const) - /* Return true if the target supports misaligned store/load of a specific factor denoted in the third parameter. The last parameter is true if the access is defined in a packed struct. */ diff --git a/gcc/target.h b/gcc/target.h index 03fd03a52075b4836159035ec14078c0aebdd7e9..93691882757232c514fca82b99f913158c2d47b1 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -51,7 +51,6 @@ #include "insn-codes.h" #include "tm.h" #include "hard-reg-set.h" -#include "tree-core.h" #if CHECKING_P diff --git a/gcc/targhooks.h b/gcc/targhooks.h index a1df260f5483dc84f18d8f12c5202484a32d5bb7..a6a4809ca91baa5d7fad2244549317a31390f0c2 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -209,8 +209,6 @@ extern void default_addr_space_diagnose_usage (addr_space_t, location_t); extern rtx default_addr_space_convert (rtx, tree, tree); extern unsigned int default_case_values_threshold (void); extern bool default_have_conditional_execution (void); -extern bool default_can_special_div_by_const (enum tree_code, tree, wide_int, - rtx *, rtx, rtx); extern bool default_libc_has_function (enum function_class, tree); extern bool default_libc_has_fast_function (int fcode); diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc index fe0116521feaf32187e7bc113bf93b1805852c79..211525720a620d6f533e2da91e03877337a931e7 100644 --- a/gcc/targhooks.cc +++ b/gcc/targhooks.cc @@ -1840,14 +1840,6 @@ default_have_conditional_execution (void) return HAVE_conditional_execution; } -/* Default that no division by constant operations are special. */ -bool -default_can_special_div_by_const (enum tree_code, tree, wide_int, rtx *, rtx, - rtx) -{ - return false; -} - /* By default we assume that c99 functions are present at the runtime, but sincos is not. */ bool diff --git a/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-4.c b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-4.c new file mode 100644 index 0000000000000000000000000000000000000000..c81f8946922250234bf759e0a0a04ea8c1f73e3c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-4.c @@ -0,0 +1,25 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +typedef unsigned __attribute__((__vector_size__ (16))) V; + +static __attribute__((__noinline__)) __attribute__((__noclone__)) V +foo (V v, unsigned short i) +{ + v /= i; + return v; +} + +int +main (void) +{ + V v = foo ((V) { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }, 0xffff); + for (unsigned i = 0; i < sizeof (v) / sizeof (v[0]); i++) + if (v[i] != 0x00010001) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-not "vect_recog_divmod_pattern: detected" "vect" { target aarch64*-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-5.c b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-5.c new file mode 100644 index 0000000000000000000000000000000000000000..b4eb1a4dacba481e6306b49914d2a29b933de625 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-5.c @@ -0,0 +1,58 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include +#include "tree-vect.h" + +#define N 50 +#define TYPE uint8_t + +#ifndef DEBUG +#define DEBUG 0 +#endif + +#define BASE ((TYPE) -1 < 0 ? -126 : 4) + + +__attribute__((noipa, noinline, optimize("O1"))) +void fun1(TYPE* restrict pixel, TYPE level, int n) +{ + for (int i = 0; i < n; i+=1) + pixel[i] = (pixel[i] + level) / 0xff; +} + +__attribute__((noipa, noinline, optimize("O3"))) +void fun2(TYPE* restrict pixel, TYPE level, int n) +{ + for (int i = 0; i < n; i+=1) + pixel[i] = (pixel[i] + level) / 0xff; +} + +int main () +{ + TYPE a[N]; + TYPE b[N]; + + for (int i = 0; i < N; ++i) + { + a[i] = BASE + i * 13; + b[i] = BASE + i * 13; + if (DEBUG) + printf ("%d: 0x%x\n", i, a[i]); + } + + fun1 (a, N / 2, N); + fun2 (b, N / 2, N); + + for (int i = 0; i < N; ++i) + { + if (DEBUG) + printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]); + + if (a[i] != b[i]) + __builtin_abort (); + } + return 0; +} + +/* { dg-final { scan-tree-dump "divmod pattern recognized" "vect" { target aarch64*-*-* } } } */ diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc index 166a248f4b9512d4c6fc8d760b458b7a467f7790..519a824ec727d4d4f28c14077dc3e970bed75ef6 100644 --- a/gcc/tree-vect-generic.cc +++ b/gcc/tree-vect-generic.cc @@ -1237,17 +1237,6 @@ expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type tree rhs2 = gimple_assign_rhs2 (assign); tree ret; - /* Check if the target was going to handle it through the special - division callback hook. */ - tree cst = uniform_integer_cst_p (rhs2); - if (cst && - targetm.vectorize.can_special_div_by_const (code, type, - wi::to_wide (cst), - NULL, - NULL_RTX, NULL_RTX)) - return NULL_TREE; - - if (!optimize || !VECTOR_INTEGER_TYPE_P (type) || TREE_CODE (rhs2) != VECTOR_CST diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 6934aebc69f231af24668f0a1c3d140e97f55487..e39d7e6b362ef44eb2fc467f3369de2afea139d6 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -3914,12 +3914,82 @@ vect_recog_divmod_pattern (vec_info *vinfo, return pattern_stmt; } else if ((cst = uniform_integer_cst_p (oprnd1)) - && targetm.vectorize.can_special_div_by_const (rhs_code, vectype, - wi::to_wide (cst), - NULL, NULL_RTX, - NULL_RTX)) + && TYPE_UNSIGNED (itype) + && rhs_code == TRUNC_DIV_EXPR + && vectype + && direct_internal_fn_supported_p (IFN_ADDH, vectype, + OPTIMIZE_FOR_SPEED)) { - return NULL; + /* div optimizations using narrowings + we can do the division e.g. shorts by 255 faster by calculating it as + (x + ((x + 257) >> 8)) >> 8 assuming the operation is done in + double the precision of x. + + If we imagine a short as being composed of two blocks of bytes then + adding 257 or 0b0000_0001_0000_0001 to the number is equivalent to + adding 1 to each sub component: + + short value of 16-bits + ┌──────────────┬────────────────┐ + │ │ │ + └──────────────┴────────────────┘ + 8-bit part1 ▲ 8-bit part2 ▲ + │ │ + │ │ + +1 +1 + + after the first addition, we have to shift right by 8, and narrow the + results back to a byte. Remember that the addition must be done in + double the precision of the input. However if we know that the addition + `x + 257` does not overflow then we can do the operation in the current + precision. In which case we don't need the pack and unpacks. */ + auto wcst = wi::to_wide (cst); + int pow = wi::exact_log2 (wcst + 1); + if (pow == (int) (element_precision (vectype) / 2)) + { + wide_int min,max; + /* If we're in a pattern we need to find the orginal definition. */ + tree op0 = oprnd0; + gimple *stmt = SSA_NAME_DEF_STMT (oprnd0); + stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt); + if (is_pattern_stmt_p (stmt_info)) + { + auto orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); + if (is_gimple_assign (STMT_VINFO_STMT (orig_stmt))) + op0 = gimple_assign_lhs (STMT_VINFO_STMT (orig_stmt)); + } + + /* Check that no overflow will occur. If we don't have range + information we can't perform the optimization. */ + if (vect_get_range_info (op0, &min, &max)) + { + wide_int one = wi::to_wide (build_one_cst (itype)); + wide_int adder = wi::add (one, wi::lshift (one, pow)); + wi::overflow_type ovf; + /* We need adder and max in the same precision. */ + wide_int zadder + = wide_int_storage::from (adder, wi::get_precision (max), + UNSIGNED); + wi::add (max, zadder, UNSIGNED, &ovf); + if (ovf == wi::OVF_NONE) + { + *type_out = vectype; + tree tadder = wide_int_to_tree (itype, adder); + gcall *patt1 + = gimple_build_call_internal (IFN_ADDH, 2, oprnd0, tadder); + tree lhs = vect_recog_temp_ssa_var (itype, NULL); + gimple_call_set_lhs (patt1, lhs); + append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype); + + pattern_stmt + = gimple_build_call_internal (IFN_ADDH, 2, oprnd0, lhs); + lhs = vect_recog_temp_ssa_var (itype, NULL); + gimple_call_set_lhs (pattern_stmt, lhs); + + return pattern_stmt; + } + } + } } if (prec > HOST_BITS_PER_WIDE_INT diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index eb4ca1f184e374d177eb43d5eb93acf6e6a8fde9..3a0fb5ad898ad42c3867f0b9564fc4e066e50081 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -6263,15 +6263,6 @@ vectorizable_operation (vec_info *vinfo, } target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing); - tree cst; - if (!target_support_p - && op1 - && (cst = uniform_integer_cst_p (op1))) - target_support_p - = targetm.vectorize.can_special_div_by_const (code, vectype, - wi::to_wide (cst), - NULL, NULL_RTX, - NULL_RTX); } bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);