diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index f3619c505c025f158c2bc64756531877378b22e1..784c49d7d24cef7619e4d613f7b4f6e945866c38 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5588,6 +5588,18 @@ signed op0, op1; op0 = op1 / (1 << imm); @end smallexample +@cindex @code{udiv_pow2_bitmask@var{m2}} instruction pattern +@item @samp{udiv_pow2_bitmask@var{m2}} +@cindex @code{udiv_pow2_bitmask@var{m2}} instruction pattern +@itemx @samp{udiv_pow2_bitmask@var{m2}} +Unsigned vector division by an immediate that is equivalent to +@samp{2^(bitsize(m) / 2) - 1}. +@smallexample +unsigned short op0; op1; +@dots{} +op0 = op1 / 0xffU; +@end smallexample + @cindex @code{vec_shl_insert_@var{m}} instruction pattern @item @samp{vec_shl_insert_@var{m}} Shift the elements in vector input operand 1 left one element (i.e.@: diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index d2d550d358606022b1cb44fa842f06e0be507bc3..a3e3cc1520f77683ebf6256898f916ed45de475f 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -159,6 +159,8 @@ DEF_INTERNAL_OPTAB_FN (VEC_SHL_INSERT, ECF_CONST | ECF_NOTHROW, vec_shl_insert, binary) DEF_INTERNAL_OPTAB_FN (DIV_POW2, ECF_CONST | ECF_NOTHROW, sdiv_pow2, binary) +DEF_INTERNAL_OPTAB_FN (DIV_POW2_BITMASK, ECF_CONST | ECF_NOTHROW, + udiv_pow2_bitmask, unary) DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary) DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary) diff --git a/gcc/optabs.def b/gcc/optabs.def index 801310ebaa7d469520809bb7efed6820f8eb866b..3f0ac05ef5ad5aed8d6ca391f4eed71b0494e17f 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -372,6 +372,7 @@ OPTAB_D (smulhrs_optab, "smulhrs$a3") OPTAB_D (umulhs_optab, "umulhs$a3") OPTAB_D (umulhrs_optab, "umulhrs$a3") OPTAB_D (sdiv_pow2_optab, "sdiv_pow2$a3") +OPTAB_D (udiv_pow2_bitmask_optab, "udiv_pow2_bitmask$a2") OPTAB_D (vec_pack_sfix_trunc_optab, "vec_pack_sfix_trunc_$a") OPTAB_D (vec_pack_ssat_optab, "vec_pack_ssat_$a") OPTAB_D (vec_pack_trunc_optab, "vec_pack_trunc_$a") diff --git a/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-1.c b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-1.c new file mode 100644 index 0000000000000000000000000000000000000000..a7ea3cce4764239c5d281a8f0bead1f6a452de3f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-1.c @@ -0,0 +1,25 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +#define N 50 +#define TYPE uint8_t + +__attribute__((noipa, noinline, optimize("O1"))) +void fun1(TYPE* restrict pixel, TYPE level, int n) +{ + for (int i = 0; i < n; i+=1) + pixel[i] = (pixel[i] * level) / 0xff; +} + +__attribute__((noipa, noinline, optimize("O3"))) +void fun2(TYPE* restrict pixel, TYPE level, int n) +{ + for (int i = 0; i < n; i+=1) + pixel[i] = (pixel[i] * level) / 0xff; +} + +#include "vect-div-bitmask.h" + +/* { dg-final { scan-tree-dump "vect_recog_divmod_pattern: detected" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-2.c b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-2.c new file mode 100644 index 0000000000000000000000000000000000000000..009e16e1b36497e5724410d9843f1ce122b26dda --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-2.c @@ -0,0 +1,25 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +#define N 50 +#define TYPE uint16_t + +__attribute__((noipa, noinline, optimize("O1"))) +void fun1(TYPE* restrict pixel, TYPE level, int n) +{ + for (int i = 0; i < n; i+=1) + pixel[i] = (pixel[i] * level) / 0xffffU; +} + +__attribute__((noipa, noinline, optimize("O3"))) +void fun2(TYPE* restrict pixel, TYPE level, int n) +{ + for (int i = 0; i < n; i+=1) + pixel[i] = (pixel[i] * level) / 0xffffU; +} + +#include "vect-div-bitmask.h" + +/* { dg-final { scan-tree-dump "vect_recog_divmod_pattern: detected" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-3.c b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-3.c new file mode 100644 index 0000000000000000000000000000000000000000..bf35a0bda8333c418e692d94220df849cc47930b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-3.c @@ -0,0 +1,26 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-fno-vect-cost-model" { target aarch64*-*-* } } */ + +#include +#include "tree-vect.h" + +#define N 50 +#define TYPE uint32_t + +__attribute__((noipa, noinline, optimize("O1"))) +void fun1(TYPE* restrict pixel, TYPE level, int n) +{ + for (int i = 0; i < n; i+=1) + pixel[i] = (pixel[i] * (uint64_t)level) / 0xffffffffUL; +} + +__attribute__((noipa, noinline, optimize("O3"))) +void fun2(TYPE* restrict pixel, TYPE level, int n) +{ + for (int i = 0; i < n; i+=1) + pixel[i] = (pixel[i] * (uint64_t)level) / 0xffffffffUL; +} + +#include "vect-div-bitmask.h" + +/* { dg-final { scan-tree-dump "vect_recog_divmod_pattern: detected" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-div-bitmask.h b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask.h new file mode 100644 index 0000000000000000000000000000000000000000..29a16739aa4b706616367bfd1832f28ebd07993e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask.h @@ -0,0 +1,43 @@ +#include + +#ifndef N +#define N 65 +#endif + +#ifndef TYPE +#define TYPE uint32_t +#endif + +#ifndef DEBUG +#define DEBUG 0 +#endif + +#define BASE ((TYPE) -1 < 0 ? -126 : 4) + +int main () +{ + TYPE a[N]; + TYPE b[N]; + + for (int i = 0; i < N; ++i) + { + a[i] = BASE + i * 13; + b[i] = BASE + i * 13; + if (DEBUG) + printf ("%d: 0x%x\n", i, a[i]); + } + + fun1 (a, N / 2, N); + fun2 (b, N / 2, N); + + for (int i = 0; i < N; ++i) + { + if (DEBUG) + printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]); + + if (a[i] != b[i]) + __builtin_abort (); + } + return 0; +} + diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 217bdfd7045a22578a35bb891a4318d741071872..a738558cb8d12296bff462d716310ca8d82957b5 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -3558,6 +3558,33 @@ vect_recog_divmod_pattern (vec_info *vinfo, return pattern_stmt; } + else if ((TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1) + && rhs_code != TRUNC_MOD_EXPR) + { + wide_int icst = wi::to_wide (oprnd1); + wide_int val = wi::add (icst, 1); + int pow = wi::exact_log2 (val); + if (pow == (prec / 2)) + { + /* Pattern detected. */ + vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt); + + *type_out = vectype; + + /* Check if the target supports this internal function. */ + internal_fn ifn = IFN_DIV_POW2_BITMASK; + if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED)) + { + tree var_div = vect_recog_temp_ssa_var (itype, NULL); + gimple *div_stmt = gimple_build_call_internal (ifn, 1, oprnd0); + gimple_call_set_lhs (div_stmt, var_div); + + gimple_set_location (div_stmt, gimple_location (last_stmt)); + + return div_stmt; + } + } + } if (prec > HOST_BITS_PER_WIDE_INT || integer_zerop (oprnd1))