From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-patches-return-415886-listarch-gcc-patches=gcc.gnu.org@gcc.gnu.org>
Received: (qmail 55906 invoked by alias); 1 Dec 2015 10:53:09 -0000
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
Received: (qmail 55897 invoked by uid 89); 1 Dec 2015 10:53:08 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-2.1 required=5.0 tests=AWL,BAYES_00,FREEMAIL_FROM,RCVD_IN_DNSWL_LOW,SPF_PASS autolearn=ham version=3.3.2
X-HELO: mail-yk0-f170.google.com
Received: from mail-yk0-f170.google.com (HELO mail-yk0-f170.google.com) (209.85.160.170) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-GCM-SHA256 encrypted) ESMTPS; Tue, 01 Dec 2015 10:53:06 +0000
Received: by ykdr82 with SMTP id r82so2161136ykd.3        for <gcc-patches@gcc.gnu.org>; Tue, 01 Dec 2015 02:53:04 -0800 (PST)
MIME-Version: 1.0
X-Received: by 10.13.242.133 with SMTP id b127mr56837184ywf.280.1448967184571; Tue, 01 Dec 2015 02:53:04 -0800 (PST)
Received: by 10.37.93.11 with HTTP; Tue, 1 Dec 2015 02:53:04 -0800 (PST)
In-Reply-To: <87610iedjx.fsf@e105548-lin.cambridge.arm.com>
References: <87610iedjx.fsf@e105548-lin.cambridge.arm.com>
Date: Tue, 01 Dec 2015 10:53:00 -0000
Message-ID: <CAFiYyc1VXMw=sPcQi+iJD70TgX88oFFn+1afdahSZb6usdz4Tw@mail.gmail.com>
Subject: Re: PR68577: Handle narrowing for vector popcount, etc.
From: Richard Biener <richard.guenther@gmail.com>
To: GCC Patches <gcc-patches@gcc.gnu.org>, richard.sandiford@arm.com
Content-Type: text/plain; charset=UTF-8
X-IsSubscribed: yes
X-SW-Source: 2015-12/txt/msg00062.txt.bz2

On Tue, Dec 1, 2015 at 10:14 AM, Richard Sandiford
<richard.sandiford@arm.com> wrote:
> This patch adds support for simple cases where the a vector internal
> function returns wider results than the scalar equivalent.  It punts
> on other cases.
>
> Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  OK to install?
>
> Thanks,
> Richard
>
>
> gcc/
>         PR tree-optimization/68577
>         * tree-vect-stmts.c (simple_integer_narrowing): New function.
>         (vectorizable_call): Restrict internal function handling
>         to NONE and NARROW cases, using simple_integer_narrowing
>         to test for the latter.  Add cost of narrowing operation
>         and insert it where necessary.
>
> gcc/testsuite/
>         PR tree-optimization/68577
>         * gcc.dg/vect/pr68577.c: New test.
>
> diff --git a/gcc/testsuite/gcc.dg/vect/pr68577.c b/gcc/testsuite/gcc.dg/vect/pr68577.c
> new file mode 100644
> index 0000000..999c1c8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr68577.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +
> +int a, b;
> +
> +void
> +__sched_cpucount (void)
> +{
> +  while (b)
> +    {
> +      long l = b++;
> +      a += __builtin_popcountl(l);
> +    }
> +}
> +
> +void
> +slp_test (int *x, long *y)
> +{
> +  for (int i = 0; i < 512; i += 4)
> +    {
> +      x[i] = __builtin_popcountl(y[i]);
> +      x[i + 1] = __builtin_popcountl(y[i + 1]);
> +      x[i + 2] = __builtin_popcountl(y[i + 2]);
> +      x[i + 3] = __builtin_popcountl(y[i + 3]);
> +    }
> +}
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 3b078da..af86bce 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -2122,6 +2122,40 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>    return true;
>  }
>
> +/* Return true if vector type VECTYPE_OUT has integer elements and
> +   if we can narrow two integer vectors with the same shape as
> +   VECTYPE_IN to VECTYPE_OUT in a single step.  On success,
> +   return the binary pack code in *CONVERT_CODE and the types
> +   of the input vectors in *CONVERT_FROM.  */
> +
> +static bool
> +simple_integer_narrowing (tree vectype_out, tree vectype_in,
> +                         tree_code *convert_code, tree *convert_from)
> +{
> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out)))
> +    return false;
> +
> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
> +    {
> +      unsigned int bits
> +       = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype_in)));
> +      tree scalar_type = build_nonstandard_integer_type (bits, 0);
> +      vectype_in = get_same_sized_vectype (scalar_type, vectype_in);
> +    }
> +

any reason for supporting non-integer types on the input?  It seems to me
you are doing this for the lrint case?  If so isn't the "question" wrong and
you should pass the integer type the IFN returns as vectype_in instead?

That said, this conversion doesn't seem to belong to simple_integer_narrowing.

The patch is ok with simply removing it.

Thanks,
Richard.

> +  tree_code code;
> +  int multi_step_cvt = 0;
> +  auto_vec <tree, 8> interm_types;
> +  if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
> +                                       &code, &multi_step_cvt,
> +                                       &interm_types)
> +      || multi_step_cvt)
> +    return false;
> +
> +  *convert_code = code;
> +  *convert_from = vectype_in;
> +  return true;
> +}
>
>  /* Function vectorizable_call.
>
> @@ -2288,7 +2322,13 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>    tree callee = gimple_call_fndecl (stmt);
>
>    /* First try using an internal function.  */
> -  if (cfn != CFN_LAST)
> +  tree_code convert_code = ERROR_MARK;
> +  tree convert_from = NULL_TREE;
> +  if (cfn != CFN_LAST
> +      && (modifier == NONE
> +         || (modifier == NARROW
> +             && simple_integer_narrowing (vectype_out, vectype_in,
> +                                          &convert_code, &convert_from))))
>      ifn = vectorizable_internal_function (cfn, callee, vectype_out,
>                                           vectype_in);
>
> @@ -2328,7 +2368,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>
>    if (slp_node || PURE_SLP_STMT (stmt_info))
>      ncopies = 1;
> -  else if (modifier == NARROW)
> +  else if (modifier == NARROW && ifn == IFN_LAST)
>      ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
>    else
>      ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
> @@ -2344,6 +2384,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>          dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
>                           "\n");
>        vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
> +      if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
> +       add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
> +                      vec_promote_demote, stmt_info, 0, vect_body);
> +
>        return true;
>      }
>
> @@ -2357,9 +2401,9 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>    vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
>
>    prev_stmt_info = NULL;
> -  switch (modifier)
> +  if (modifier == NONE || ifn != IFN_LAST)
>      {
> -    case NONE:
> +      tree prev_res = NULL_TREE;
>        for (j = 0; j < ncopies; ++j)
>         {
>           /* Build argument list for the vectorized call.  */
> @@ -2387,12 +2431,30 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>                       vec<tree> vec_oprndsk = vec_defs[k];
>                       vargs[k] = vec_oprndsk[i];
>                     }
> -                 if (ifn != IFN_LAST)
> -                   new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> +                 if (modifier == NARROW)
> +                   {
> +                     tree half_res = make_ssa_name (convert_from);
> +                     new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> +                     gimple_call_set_lhs (new_stmt, half_res);
> +                     vect_finish_stmt_generation (stmt, new_stmt, gsi);
> +                     if ((i & 1) == 0)
> +                       {
> +                         prev_res = half_res;
> +                         continue;
> +                       }
> +                     new_temp = make_ssa_name (vec_dest);
> +                     new_stmt = gimple_build_assign (new_temp, convert_code,
> +                                                     prev_res, half_res);
> +                   }
>                   else
> -                   new_stmt = gimple_build_call_vec (fndecl, vargs);
> -                 new_temp = make_ssa_name (vec_dest, new_stmt);
> -                 gimple_call_set_lhs (new_stmt, new_temp);
> +                   {
> +                     if (ifn != IFN_LAST)
> +                       new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> +                     else
> +                       new_stmt = gimple_build_call_vec (fndecl, vargs);
> +                     new_temp = make_ssa_name (vec_dest, new_stmt);
> +                     gimple_call_set_lhs (new_stmt, new_temp);
> +                   }
>                   vect_finish_stmt_generation (stmt, new_stmt, gsi);
>                   SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
>                 }
> @@ -2436,6 +2498,21 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>               new_temp = make_ssa_name (vec_dest);
>               new_stmt = gimple_build_assign (new_temp, new_var);
>             }
> +         else if (modifier == NARROW)
> +           {
> +             tree half_res = make_ssa_name (convert_from);
> +             new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> +             gimple_call_set_lhs (new_stmt, half_res);
> +             vect_finish_stmt_generation (stmt, new_stmt, gsi);
> +             if ((j & 1) == 0)
> +               {
> +                 prev_res = half_res;
> +                 continue;
> +               }
> +             new_temp = make_ssa_name (vec_dest);
> +             new_stmt = gimple_build_assign (new_temp, convert_code,
> +                                             prev_res, half_res);
> +           }
>           else
>             {
>               if (ifn != IFN_LAST)
> @@ -2447,17 +2524,16 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>             }
>           vect_finish_stmt_generation (stmt, new_stmt, gsi);
>
> -         if (j == 0)
> +         if (j == (modifier == NARROW ? 1 : 0))
>             STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
>           else
>             STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
>
>           prev_stmt_info = vinfo_for_stmt (new_stmt);
>         }
> -
> -      break;
> -
> -    case NARROW:
> +    }
> +  else if (modifier == NARROW)
> +    {
>        for (j = 0; j < ncopies; ++j)
>         {
>           /* Build argument list for the vectorized call.  */
> @@ -2528,10 +2604,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>               vargs.quick_push (vec_oprnd1);
>             }
>
> -         if (ifn != IFN_LAST)
> -           new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> -         else
> -           new_stmt = gimple_build_call_vec (fndecl, vargs);
> +         new_stmt = gimple_build_call_vec (fndecl, vargs);
>           new_temp = make_ssa_name (vec_dest, new_stmt);
>           gimple_call_set_lhs (new_stmt, new_temp);
>           vect_finish_stmt_generation (stmt, new_stmt, gsi);
> @@ -2545,13 +2618,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>         }
>
>        *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
> -
> -      break;
> -
> -    case WIDEN:
> -      /* No current target implements this case.  */
> -      return false;
>      }
> +  else
> +    /* No current target implements this case.  */
> +    return false;
>
>    vargs.release ();
>
>