From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 55906 invoked by alias); 1 Dec 2015 10:53:09 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 55897 invoked by uid 89); 1 Dec 2015 10:53:08 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.1 required=5.0 tests=AWL,BAYES_00,FREEMAIL_FROM,RCVD_IN_DNSWL_LOW,SPF_PASS autolearn=ham version=3.3.2 X-HELO: mail-yk0-f170.google.com Received: from mail-yk0-f170.google.com (HELO mail-yk0-f170.google.com) (209.85.160.170) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-GCM-SHA256 encrypted) ESMTPS; Tue, 01 Dec 2015 10:53:06 +0000 Received: by ykdr82 with SMTP id r82so2161136ykd.3 for ; Tue, 01 Dec 2015 02:53:04 -0800 (PST) MIME-Version: 1.0 X-Received: by 10.13.242.133 with SMTP id b127mr56837184ywf.280.1448967184571; Tue, 01 Dec 2015 02:53:04 -0800 (PST) Received: by 10.37.93.11 with HTTP; Tue, 1 Dec 2015 02:53:04 -0800 (PST) In-Reply-To: <87610iedjx.fsf@e105548-lin.cambridge.arm.com> References: <87610iedjx.fsf@e105548-lin.cambridge.arm.com> Date: Tue, 01 Dec 2015 10:53:00 -0000 Message-ID: Subject: Re: PR68577: Handle narrowing for vector popcount, etc. From: Richard Biener To: GCC Patches , richard.sandiford@arm.com Content-Type: text/plain; charset=UTF-8 X-IsSubscribed: yes X-SW-Source: 2015-12/txt/msg00062.txt.bz2 On Tue, Dec 1, 2015 at 10:14 AM, Richard Sandiford wrote: > This patch adds support for simple cases where the a vector internal > function returns wider results than the scalar equivalent. It punts > on other cases. > > Tested on powerpc64-linux-gnu and x86_64-linux-gnu. OK to install? > > Thanks, > Richard > > > gcc/ > PR tree-optimization/68577 > * tree-vect-stmts.c (simple_integer_narrowing): New function. > (vectorizable_call): Restrict internal function handling > to NONE and NARROW cases, using simple_integer_narrowing > to test for the latter. Add cost of narrowing operation > and insert it where necessary. > > gcc/testsuite/ > PR tree-optimization/68577 > * gcc.dg/vect/pr68577.c: New test. > > diff --git a/gcc/testsuite/gcc.dg/vect/pr68577.c b/gcc/testsuite/gcc.dg/vect/pr68577.c > new file mode 100644 > index 0000000..999c1c8 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr68577.c > @@ -0,0 +1,25 @@ > +/* { dg-do compile } */ > + > +int a, b; > + > +void > +__sched_cpucount (void) > +{ > + while (b) > + { > + long l = b++; > + a += __builtin_popcountl(l); > + } > +} > + > +void > +slp_test (int *x, long *y) > +{ > + for (int i = 0; i < 512; i += 4) > + { > + x[i] = __builtin_popcountl(y[i]); > + x[i + 1] = __builtin_popcountl(y[i + 1]); > + x[i + 2] = __builtin_popcountl(y[i + 2]); > + x[i + 3] = __builtin_popcountl(y[i + 3]); > + } > +} > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c > index 3b078da..af86bce 100644 > --- a/gcc/tree-vect-stmts.c > +++ b/gcc/tree-vect-stmts.c > @@ -2122,6 +2122,40 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi, > return true; > } > > +/* Return true if vector type VECTYPE_OUT has integer elements and > + if we can narrow two integer vectors with the same shape as > + VECTYPE_IN to VECTYPE_OUT in a single step. On success, > + return the binary pack code in *CONVERT_CODE and the types > + of the input vectors in *CONVERT_FROM. */ > + > +static bool > +simple_integer_narrowing (tree vectype_out, tree vectype_in, > + tree_code *convert_code, tree *convert_from) > +{ > + if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))) > + return false; > + > + if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_in))) > + { > + unsigned int bits > + = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype_in))); > + tree scalar_type = build_nonstandard_integer_type (bits, 0); > + vectype_in = get_same_sized_vectype (scalar_type, vectype_in); > + } > + any reason for supporting non-integer types on the input? It seems to me you are doing this for the lrint case? If so isn't the "question" wrong and you should pass the integer type the IFN returns as vectype_in instead? That said, this conversion doesn't seem to belong to simple_integer_narrowing. The patch is ok with simply removing it. Thanks, Richard. > + tree_code code; > + int multi_step_cvt = 0; > + auto_vec interm_types; > + if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in, > + &code, &multi_step_cvt, > + &interm_types) > + || multi_step_cvt) > + return false; > + > + *convert_code = code; > + *convert_from = vectype_in; > + return true; > +} > > /* Function vectorizable_call. > > @@ -2288,7 +2322,13 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, > tree callee = gimple_call_fndecl (stmt); > > /* First try using an internal function. */ > - if (cfn != CFN_LAST) > + tree_code convert_code = ERROR_MARK; > + tree convert_from = NULL_TREE; > + if (cfn != CFN_LAST > + && (modifier == NONE > + || (modifier == NARROW > + && simple_integer_narrowing (vectype_out, vectype_in, > + &convert_code, &convert_from)))) > ifn = vectorizable_internal_function (cfn, callee, vectype_out, > vectype_in); > > @@ -2328,7 +2368,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, > > if (slp_node || PURE_SLP_STMT (stmt_info)) > ncopies = 1; > - else if (modifier == NARROW) > + else if (modifier == NARROW && ifn == IFN_LAST) > ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; > else > ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; > @@ -2344,6 +2384,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, > dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ===" > "\n"); > vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); > + if (ifn != IFN_LAST && modifier == NARROW && !slp_node) > + add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2, > + vec_promote_demote, stmt_info, 0, vect_body); > + > return true; > } > > @@ -2357,9 +2401,9 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, > vec_dest = vect_create_destination_var (scalar_dest, vectype_out); > > prev_stmt_info = NULL; > - switch (modifier) > + if (modifier == NONE || ifn != IFN_LAST) > { > - case NONE: > + tree prev_res = NULL_TREE; > for (j = 0; j < ncopies; ++j) > { > /* Build argument list for the vectorized call. */ > @@ -2387,12 +2431,30 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, > vec vec_oprndsk = vec_defs[k]; > vargs[k] = vec_oprndsk[i]; > } > - if (ifn != IFN_LAST) > - new_stmt = gimple_build_call_internal_vec (ifn, vargs); > + if (modifier == NARROW) > + { > + tree half_res = make_ssa_name (convert_from); > + new_stmt = gimple_build_call_internal_vec (ifn, vargs); > + gimple_call_set_lhs (new_stmt, half_res); > + vect_finish_stmt_generation (stmt, new_stmt, gsi); > + if ((i & 1) == 0) > + { > + prev_res = half_res; > + continue; > + } > + new_temp = make_ssa_name (vec_dest); > + new_stmt = gimple_build_assign (new_temp, convert_code, > + prev_res, half_res); > + } > else > - new_stmt = gimple_build_call_vec (fndecl, vargs); > - new_temp = make_ssa_name (vec_dest, new_stmt); > - gimple_call_set_lhs (new_stmt, new_temp); > + { > + if (ifn != IFN_LAST) > + new_stmt = gimple_build_call_internal_vec (ifn, vargs); > + else > + new_stmt = gimple_build_call_vec (fndecl, vargs); > + new_temp = make_ssa_name (vec_dest, new_stmt); > + gimple_call_set_lhs (new_stmt, new_temp); > + } > vect_finish_stmt_generation (stmt, new_stmt, gsi); > SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); > } > @@ -2436,6 +2498,21 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, > new_temp = make_ssa_name (vec_dest); > new_stmt = gimple_build_assign (new_temp, new_var); > } > + else if (modifier == NARROW) > + { > + tree half_res = make_ssa_name (convert_from); > + new_stmt = gimple_build_call_internal_vec (ifn, vargs); > + gimple_call_set_lhs (new_stmt, half_res); > + vect_finish_stmt_generation (stmt, new_stmt, gsi); > + if ((j & 1) == 0) > + { > + prev_res = half_res; > + continue; > + } > + new_temp = make_ssa_name (vec_dest); > + new_stmt = gimple_build_assign (new_temp, convert_code, > + prev_res, half_res); > + } > else > { > if (ifn != IFN_LAST) > @@ -2447,17 +2524,16 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, > } > vect_finish_stmt_generation (stmt, new_stmt, gsi); > > - if (j == 0) > + if (j == (modifier == NARROW ? 1 : 0)) > STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; > else > STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; > > prev_stmt_info = vinfo_for_stmt (new_stmt); > } > - > - break; > - > - case NARROW: > + } > + else if (modifier == NARROW) > + { > for (j = 0; j < ncopies; ++j) > { > /* Build argument list for the vectorized call. */ > @@ -2528,10 +2604,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, > vargs.quick_push (vec_oprnd1); > } > > - if (ifn != IFN_LAST) > - new_stmt = gimple_build_call_internal_vec (ifn, vargs); > - else > - new_stmt = gimple_build_call_vec (fndecl, vargs); > + new_stmt = gimple_build_call_vec (fndecl, vargs); > new_temp = make_ssa_name (vec_dest, new_stmt); > gimple_call_set_lhs (new_stmt, new_temp); > vect_finish_stmt_generation (stmt, new_stmt, gsi); > @@ -2545,13 +2618,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, > } > > *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); > - > - break; > - > - case WIDEN: > - /* No current target implements this case. */ > - return false; > } > + else > + /* No current target implements this case. */ > + return false; > > vargs.release (); > >