From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp-out1.suse.de (smtp-out1.suse.de [195.135.220.28]) by sourceware.org (Postfix) with ESMTPS id 8F82B385355D for ; Mon, 13 Jun 2022 09:24:03 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 8F82B385355D Received: from relay2.suse.de (relay2.suse.de [149.44.160.134]) by smtp-out1.suse.de (Postfix) with ESMTP id 727D921D7B; Mon, 13 Jun 2022 09:24:02 +0000 (UTC) Received: from [10.168.4.8] (unknown [10.168.4.8]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by relay2.suse.de (Postfix) with ESMTPS id 513972C142; Mon, 13 Jun 2022 09:24:02 +0000 (UTC) Date: Mon, 13 Jun 2022 11:24:02 +0200 (CEST) From: Richard Biener To: Tamar Christina cc: gcc-patches@gcc.gnu.org, nd@arm.com, richard.sandiford@arm.com Subject: Re: [PATCH 1/2]middle-end Support optimized division by pow2 bitmask In-Reply-To: Message-ID: References: MIME-Version: 1.0 X-Spam-Status: No, score=-10.8 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, KAM_LOTSOFHASH, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8BIT X-Content-Filtered-By: Mailman/MimeDel 2.1.29 X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 13 Jun 2022 09:24:05 -0000 On Thu, 9 Jun 2022, Tamar Christina wrote: > Hi All, > > In plenty of image and video processing code it's common to modify pixel values > by a widening operation and then scale them back into range by dividing by 255. > > This patch adds an optab to allow us to emit an optimized sequence when doing > an unsigned division that is equivalent to: > > x = y / (2 ^ (bitsize (y)/2)-1 > > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu > and no issues. > > Ok for master? Looking at 2/2 it seems that this is the wrong way to attack the problem. The ISA doesn't have such instruction so adding an optab looks premature. I suppose that there's no unsigned vector integer division and thus we open-code that in a different way? Isn't the correct thing then to fixup that open-coding if it is more efficient? Richard. > Thanks, > Tamar > > gcc/ChangeLog: > > * internal-fn.def (DIV_POW2_BITMASK): New. > * optabs.def (udiv_pow2_bitmask_optab): New. > * doc/md.texi: Document it. > * tree-vect-patterns.cc (vect_recog_divmod_pattern): Recognize pattern. > > gcc/testsuite/ChangeLog: > > * gcc.dg/vect/vect-div-bitmask-1.c: New test. > * gcc.dg/vect/vect-div-bitmask-2.c: New test. > * gcc.dg/vect/vect-div-bitmask-3.c: New test. > * gcc.dg/vect/vect-div-bitmask.h: New file. > > --- inline copy of patch -- > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi > index f3619c505c025f158c2bc64756531877378b22e1..784c49d7d24cef7619e4d613f7b4f6e945866c38 100644 > --- a/gcc/doc/md.texi > +++ b/gcc/doc/md.texi > @@ -5588,6 +5588,18 @@ signed op0, op1; > op0 = op1 / (1 << imm); > @end smallexample > > +@cindex @code{udiv_pow2_bitmask@var{m2}} instruction pattern > +@item @samp{udiv_pow2_bitmask@var{m2}} > +@cindex @code{udiv_pow2_bitmask@var{m2}} instruction pattern > +@itemx @samp{udiv_pow2_bitmask@var{m2}} > +Unsigned vector division by an immediate that is equivalent to > +@samp{2^(bitsize(m) / 2) - 1}. > +@smallexample > +unsigned short op0; op1; > +@dots{} > +op0 = op1 / 0xffU; > +@end smallexample > + > @cindex @code{vec_shl_insert_@var{m}} instruction pattern > @item @samp{vec_shl_insert_@var{m}} > Shift the elements in vector input operand 1 left one element (i.e.@: > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > index d2d550d358606022b1cb44fa842f06e0be507bc3..a3e3cc1520f77683ebf6256898f916ed45de475f 100644 > --- a/gcc/internal-fn.def > +++ b/gcc/internal-fn.def > @@ -159,6 +159,8 @@ DEF_INTERNAL_OPTAB_FN (VEC_SHL_INSERT, ECF_CONST | ECF_NOTHROW, > vec_shl_insert, binary) > > DEF_INTERNAL_OPTAB_FN (DIV_POW2, ECF_CONST | ECF_NOTHROW, sdiv_pow2, binary) > +DEF_INTERNAL_OPTAB_FN (DIV_POW2_BITMASK, ECF_CONST | ECF_NOTHROW, > + udiv_pow2_bitmask, unary) > > DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary) > DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary) > diff --git a/gcc/optabs.def b/gcc/optabs.def > index 801310ebaa7d469520809bb7efed6820f8eb866b..3f0ac05ef5ad5aed8d6ca391f4eed71b0494e17f 100644 > --- a/gcc/optabs.def > +++ b/gcc/optabs.def > @@ -372,6 +372,7 @@ OPTAB_D (smulhrs_optab, "smulhrs$a3") > OPTAB_D (umulhs_optab, "umulhs$a3") > OPTAB_D (umulhrs_optab, "umulhrs$a3") > OPTAB_D (sdiv_pow2_optab, "sdiv_pow2$a3") > +OPTAB_D (udiv_pow2_bitmask_optab, "udiv_pow2_bitmask$a2") > OPTAB_D (vec_pack_sfix_trunc_optab, "vec_pack_sfix_trunc_$a") > OPTAB_D (vec_pack_ssat_optab, "vec_pack_ssat_$a") > OPTAB_D (vec_pack_trunc_optab, "vec_pack_trunc_$a") > diff --git a/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-1.c b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-1.c > new file mode 100644 > index 0000000000000000000000000000000000000000..a7ea3cce4764239c5d281a8f0bead1f6a452de3f > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-1.c > @@ -0,0 +1,25 @@ > +/* { dg-require-effective-target vect_int } */ > + > +#include > +#include "tree-vect.h" > + > +#define N 50 > +#define TYPE uint8_t > + > +__attribute__((noipa, noinline, optimize("O1"))) > +void fun1(TYPE* restrict pixel, TYPE level, int n) > +{ > + for (int i = 0; i < n; i+=1) > + pixel[i] = (pixel[i] * level) / 0xff; > +} > + > +__attribute__((noipa, noinline, optimize("O3"))) > +void fun2(TYPE* restrict pixel, TYPE level, int n) > +{ > + for (int i = 0; i < n; i+=1) > + pixel[i] = (pixel[i] * level) / 0xff; > +} > + > +#include "vect-div-bitmask.h" > + > +/* { dg-final { scan-tree-dump "vect_recog_divmod_pattern: detected" "vect" } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-2.c b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-2.c > new file mode 100644 > index 0000000000000000000000000000000000000000..009e16e1b36497e5724410d9843f1ce122b26dda > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-2.c > @@ -0,0 +1,25 @@ > +/* { dg-require-effective-target vect_int } */ > + > +#include > +#include "tree-vect.h" > + > +#define N 50 > +#define TYPE uint16_t > + > +__attribute__((noipa, noinline, optimize("O1"))) > +void fun1(TYPE* restrict pixel, TYPE level, int n) > +{ > + for (int i = 0; i < n; i+=1) > + pixel[i] = (pixel[i] * level) / 0xffffU; > +} > + > +__attribute__((noipa, noinline, optimize("O3"))) > +void fun2(TYPE* restrict pixel, TYPE level, int n) > +{ > + for (int i = 0; i < n; i+=1) > + pixel[i] = (pixel[i] * level) / 0xffffU; > +} > + > +#include "vect-div-bitmask.h" > + > +/* { dg-final { scan-tree-dump "vect_recog_divmod_pattern: detected" "vect" } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-3.c b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-3.c > new file mode 100644 > index 0000000000000000000000000000000000000000..bf35a0bda8333c418e692d94220df849cc47930b > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-3.c > @@ -0,0 +1,26 @@ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-additional-options "-fno-vect-cost-model" { target aarch64*-*-* } } */ > + > +#include > +#include "tree-vect.h" > + > +#define N 50 > +#define TYPE uint32_t > + > +__attribute__((noipa, noinline, optimize("O1"))) > +void fun1(TYPE* restrict pixel, TYPE level, int n) > +{ > + for (int i = 0; i < n; i+=1) > + pixel[i] = (pixel[i] * (uint64_t)level) / 0xffffffffUL; > +} > + > +__attribute__((noipa, noinline, optimize("O3"))) > +void fun2(TYPE* restrict pixel, TYPE level, int n) > +{ > + for (int i = 0; i < n; i+=1) > + pixel[i] = (pixel[i] * (uint64_t)level) / 0xffffffffUL; > +} > + > +#include "vect-div-bitmask.h" > + > +/* { dg-final { scan-tree-dump "vect_recog_divmod_pattern: detected" "vect" } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-div-bitmask.h b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask.h > new file mode 100644 > index 0000000000000000000000000000000000000000..29a16739aa4b706616367bfd1832f28ebd07993e > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask.h > @@ -0,0 +1,43 @@ > +#include > + > +#ifndef N > +#define N 65 > +#endif > + > +#ifndef TYPE > +#define TYPE uint32_t > +#endif > + > +#ifndef DEBUG > +#define DEBUG 0 > +#endif > + > +#define BASE ((TYPE) -1 < 0 ? -126 : 4) > + > +int main () > +{ > + TYPE a[N]; > + TYPE b[N]; > + > + for (int i = 0; i < N; ++i) > + { > + a[i] = BASE + i * 13; > + b[i] = BASE + i * 13; > + if (DEBUG) > + printf ("%d: 0x%x\n", i, a[i]); > + } > + > + fun1 (a, N / 2, N); > + fun2 (b, N / 2, N); > + > + for (int i = 0; i < N; ++i) > + { > + if (DEBUG) > + printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]); > + > + if (a[i] != b[i]) > + __builtin_abort (); > + } > + return 0; > +} > + > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc > index 217bdfd7045a22578a35bb891a4318d741071872..a738558cb8d12296bff462d716310ca8d82957b5 100644 > --- a/gcc/tree-vect-patterns.cc > +++ b/gcc/tree-vect-patterns.cc > @@ -3558,6 +3558,33 @@ vect_recog_divmod_pattern (vec_info *vinfo, > > return pattern_stmt; > } > + else if ((TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1) > + && rhs_code != TRUNC_MOD_EXPR) > + { > + wide_int icst = wi::to_wide (oprnd1); > + wide_int val = wi::add (icst, 1); > + int pow = wi::exact_log2 (val); > + if (pow == (prec / 2)) > + { > + /* Pattern detected. */ > + vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt); > + > + *type_out = vectype; > + > + /* Check if the target supports this internal function. */ > + internal_fn ifn = IFN_DIV_POW2_BITMASK; > + if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED)) > + { > + tree var_div = vect_recog_temp_ssa_var (itype, NULL); > + gimple *div_stmt = gimple_build_call_internal (ifn, 1, oprnd0); > + gimple_call_set_lhs (div_stmt, var_div); > + > + gimple_set_location (div_stmt, gimple_location (last_stmt)); > + > + return div_stmt; > + } > + } > + } > > if (prec > HOST_BITS_PER_WIDE_INT > || integer_zerop (oprnd1)) > > > > > -- Richard Biener SUSE Software Solutions Germany GmbH, Frankenstraße 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman; HRB 36809 (AG Nuernberg)