public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* PR68577: Handle narrowing for vector popcount, etc.
@ 2015-12-01  9:14 Richard Sandiford
  2015-12-01 10:53 ` Richard Biener
  0 siblings, 1 reply; 3+ messages in thread
From: Richard Sandiford @ 2015-12-01  9:14 UTC (permalink / raw)
  To: gcc-patches

This patch adds support for simple cases where the a vector internal
function returns wider results than the scalar equivalent.  It punts
on other cases.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  OK to install?

Thanks,
Richard


gcc/
	PR tree-optimization/68577
	* tree-vect-stmts.c (simple_integer_narrowing): New function.
	(vectorizable_call): Restrict internal function handling
	to NONE and NARROW cases, using simple_integer_narrowing
	to test for the latter.  Add cost of narrowing operation
	and insert it where necessary.

gcc/testsuite/
	PR tree-optimization/68577
	* gcc.dg/vect/pr68577.c: New test.

diff --git a/gcc/testsuite/gcc.dg/vect/pr68577.c b/gcc/testsuite/gcc.dg/vect/pr68577.c
new file mode 100644
index 0000000..999c1c8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr68577.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+
+int a, b;
+
+void
+__sched_cpucount (void)
+{
+  while (b)
+    {
+      long l = b++;
+      a += __builtin_popcountl(l);
+    }
+}
+
+void
+slp_test (int *x, long *y)
+{
+  for (int i = 0; i < 512; i += 4)
+    {
+      x[i] = __builtin_popcountl(y[i]);
+      x[i + 1] = __builtin_popcountl(y[i + 1]);
+      x[i + 2] = __builtin_popcountl(y[i + 2]);
+      x[i + 3] = __builtin_popcountl(y[i + 3]);
+    }
+}
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 3b078da..af86bce 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -2122,6 +2122,40 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
   return true;
 }
 
+/* Return true if vector type VECTYPE_OUT has integer elements and
+   if we can narrow two integer vectors with the same shape as
+   VECTYPE_IN to VECTYPE_OUT in a single step.  On success,
+   return the binary pack code in *CONVERT_CODE and the types
+   of the input vectors in *CONVERT_FROM.  */
+
+static bool
+simple_integer_narrowing (tree vectype_out, tree vectype_in,
+			  tree_code *convert_code, tree *convert_from)
+{
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out)))
+    return false;
+
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
+    {
+      unsigned int bits
+	= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype_in)));
+      tree scalar_type = build_nonstandard_integer_type (bits, 0);
+      vectype_in = get_same_sized_vectype (scalar_type, vectype_in);
+    }
+
+  tree_code code;
+  int multi_step_cvt = 0;
+  auto_vec <tree, 8> interm_types;
+  if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
+					&code, &multi_step_cvt,
+					&interm_types)
+      || multi_step_cvt)
+    return false;
+
+  *convert_code = code;
+  *convert_from = vectype_in;
+  return true;
+}
 
 /* Function vectorizable_call.
 
@@ -2288,7 +2322,13 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
   tree callee = gimple_call_fndecl (stmt);
 
   /* First try using an internal function.  */
-  if (cfn != CFN_LAST)
+  tree_code convert_code = ERROR_MARK;
+  tree convert_from = NULL_TREE;
+  if (cfn != CFN_LAST
+      && (modifier == NONE
+	  || (modifier == NARROW
+	      && simple_integer_narrowing (vectype_out, vectype_in,
+					   &convert_code, &convert_from))))
     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
 					  vectype_in);
 
@@ -2328,7 +2368,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 
   if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
-  else if (modifier == NARROW)
+  else if (modifier == NARROW && ifn == IFN_LAST)
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2344,6 +2384,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
                          "\n");
       vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
+      if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
+	add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
+		       vec_promote_demote, stmt_info, 0, vect_body);
+
       return true;
     }
 
@@ -2357,9 +2401,9 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
 
   prev_stmt_info = NULL;
-  switch (modifier)
+  if (modifier == NONE || ifn != IFN_LAST)
     {
-    case NONE:
+      tree prev_res = NULL_TREE;
       for (j = 0; j < ncopies; ++j)
 	{
 	  /* Build argument list for the vectorized call.  */
@@ -2387,12 +2431,30 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 		      vec<tree> vec_oprndsk = vec_defs[k];
 		      vargs[k] = vec_oprndsk[i];
 		    }
-		  if (ifn != IFN_LAST)
-		    new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+		  if (modifier == NARROW)
+		    {
+		      tree half_res = make_ssa_name (convert_from);
+		      new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+		      gimple_call_set_lhs (new_stmt, half_res);
+		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
+		      if ((i & 1) == 0)
+			{
+			  prev_res = half_res;
+			  continue;
+			}
+		      new_temp = make_ssa_name (vec_dest);
+		      new_stmt = gimple_build_assign (new_temp, convert_code,
+						      prev_res, half_res);
+		    }
 		  else
-		    new_stmt = gimple_build_call_vec (fndecl, vargs);
-		  new_temp = make_ssa_name (vec_dest, new_stmt);
-		  gimple_call_set_lhs (new_stmt, new_temp);
+		    {
+		      if (ifn != IFN_LAST)
+			new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+		      else
+			new_stmt = gimple_build_call_vec (fndecl, vargs);
+		      new_temp = make_ssa_name (vec_dest, new_stmt);
+		      gimple_call_set_lhs (new_stmt, new_temp);
+		    }
 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
 		}
@@ -2436,6 +2498,21 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 	      new_temp = make_ssa_name (vec_dest);
 	      new_stmt = gimple_build_assign (new_temp, new_var);
 	    }
+	  else if (modifier == NARROW)
+	    {
+	      tree half_res = make_ssa_name (convert_from);
+	      new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+	      gimple_call_set_lhs (new_stmt, half_res);
+	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
+	      if ((j & 1) == 0)
+		{
+		  prev_res = half_res;
+		  continue;
+		}
+	      new_temp = make_ssa_name (vec_dest);
+	      new_stmt = gimple_build_assign (new_temp, convert_code,
+					      prev_res, half_res);
+	    }
 	  else
 	    {
 	      if (ifn != IFN_LAST)
@@ -2447,17 +2524,16 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 	    }
 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
 
-	  if (j == 0)
+	  if (j == (modifier == NARROW ? 1 : 0))
 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
 	  else
 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
 
 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
 	}
-
-      break;
-
-    case NARROW:
+    }
+  else if (modifier == NARROW)
+    {
       for (j = 0; j < ncopies; ++j)
 	{
 	  /* Build argument list for the vectorized call.  */
@@ -2528,10 +2604,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 	      vargs.quick_push (vec_oprnd1);
 	    }
 
-	  if (ifn != IFN_LAST)
-	    new_stmt = gimple_build_call_internal_vec (ifn, vargs);
-	  else
-	    new_stmt = gimple_build_call_vec (fndecl, vargs);
+	  new_stmt = gimple_build_call_vec (fndecl, vargs);
 	  new_temp = make_ssa_name (vec_dest, new_stmt);
 	  gimple_call_set_lhs (new_stmt, new_temp);
 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
@@ -2545,13 +2618,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 	}
 
       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
-
-      break;
-
-    case WIDEN:
-      /* No current target implements this case.  */
-      return false;
     }
+  else
+    /* No current target implements this case.  */
+    return false;
 
   vargs.release ();
 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: PR68577: Handle narrowing for vector popcount, etc.
  2015-12-01  9:14 PR68577: Handle narrowing for vector popcount, etc Richard Sandiford
@ 2015-12-01 10:53 ` Richard Biener
  2015-12-01 21:04   ` Richard Sandiford
  0 siblings, 1 reply; 3+ messages in thread
From: Richard Biener @ 2015-12-01 10:53 UTC (permalink / raw)
  To: GCC Patches, richard.sandiford

On Tue, Dec 1, 2015 at 10:14 AM, Richard Sandiford
<richard.sandiford@arm.com> wrote:
> This patch adds support for simple cases where the a vector internal
> function returns wider results than the scalar equivalent.  It punts
> on other cases.
>
> Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  OK to install?
>
> Thanks,
> Richard
>
>
> gcc/
>         PR tree-optimization/68577
>         * tree-vect-stmts.c (simple_integer_narrowing): New function.
>         (vectorizable_call): Restrict internal function handling
>         to NONE and NARROW cases, using simple_integer_narrowing
>         to test for the latter.  Add cost of narrowing operation
>         and insert it where necessary.
>
> gcc/testsuite/
>         PR tree-optimization/68577
>         * gcc.dg/vect/pr68577.c: New test.
>
> diff --git a/gcc/testsuite/gcc.dg/vect/pr68577.c b/gcc/testsuite/gcc.dg/vect/pr68577.c
> new file mode 100644
> index 0000000..999c1c8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr68577.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +
> +int a, b;
> +
> +void
> +__sched_cpucount (void)
> +{
> +  while (b)
> +    {
> +      long l = b++;
> +      a += __builtin_popcountl(l);
> +    }
> +}
> +
> +void
> +slp_test (int *x, long *y)
> +{
> +  for (int i = 0; i < 512; i += 4)
> +    {
> +      x[i] = __builtin_popcountl(y[i]);
> +      x[i + 1] = __builtin_popcountl(y[i + 1]);
> +      x[i + 2] = __builtin_popcountl(y[i + 2]);
> +      x[i + 3] = __builtin_popcountl(y[i + 3]);
> +    }
> +}
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 3b078da..af86bce 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -2122,6 +2122,40 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>    return true;
>  }
>
> +/* Return true if vector type VECTYPE_OUT has integer elements and
> +   if we can narrow two integer vectors with the same shape as
> +   VECTYPE_IN to VECTYPE_OUT in a single step.  On success,
> +   return the binary pack code in *CONVERT_CODE and the types
> +   of the input vectors in *CONVERT_FROM.  */
> +
> +static bool
> +simple_integer_narrowing (tree vectype_out, tree vectype_in,
> +                         tree_code *convert_code, tree *convert_from)
> +{
> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out)))
> +    return false;
> +
> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
> +    {
> +      unsigned int bits
> +       = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype_in)));
> +      tree scalar_type = build_nonstandard_integer_type (bits, 0);
> +      vectype_in = get_same_sized_vectype (scalar_type, vectype_in);
> +    }
> +

any reason for supporting non-integer types on the input?  It seems to me
you are doing this for the lrint case?  If so isn't the "question" wrong and
you should pass the integer type the IFN returns as vectype_in instead?

That said, this conversion doesn't seem to belong to simple_integer_narrowing.

The patch is ok with simply removing it.

Thanks,
Richard.

> +  tree_code code;
> +  int multi_step_cvt = 0;
> +  auto_vec <tree, 8> interm_types;
> +  if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
> +                                       &code, &multi_step_cvt,
> +                                       &interm_types)
> +      || multi_step_cvt)
> +    return false;
> +
> +  *convert_code = code;
> +  *convert_from = vectype_in;
> +  return true;
> +}
>
>  /* Function vectorizable_call.
>
> @@ -2288,7 +2322,13 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>    tree callee = gimple_call_fndecl (stmt);
>
>    /* First try using an internal function.  */
> -  if (cfn != CFN_LAST)
> +  tree_code convert_code = ERROR_MARK;
> +  tree convert_from = NULL_TREE;
> +  if (cfn != CFN_LAST
> +      && (modifier == NONE
> +         || (modifier == NARROW
> +             && simple_integer_narrowing (vectype_out, vectype_in,
> +                                          &convert_code, &convert_from))))
>      ifn = vectorizable_internal_function (cfn, callee, vectype_out,
>                                           vectype_in);
>
> @@ -2328,7 +2368,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>
>    if (slp_node || PURE_SLP_STMT (stmt_info))
>      ncopies = 1;
> -  else if (modifier == NARROW)
> +  else if (modifier == NARROW && ifn == IFN_LAST)
>      ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
>    else
>      ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
> @@ -2344,6 +2384,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>          dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
>                           "\n");
>        vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
> +      if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
> +       add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
> +                      vec_promote_demote, stmt_info, 0, vect_body);
> +
>        return true;
>      }
>
> @@ -2357,9 +2401,9 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>    vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
>
>    prev_stmt_info = NULL;
> -  switch (modifier)
> +  if (modifier == NONE || ifn != IFN_LAST)
>      {
> -    case NONE:
> +      tree prev_res = NULL_TREE;
>        for (j = 0; j < ncopies; ++j)
>         {
>           /* Build argument list for the vectorized call.  */
> @@ -2387,12 +2431,30 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>                       vec<tree> vec_oprndsk = vec_defs[k];
>                       vargs[k] = vec_oprndsk[i];
>                     }
> -                 if (ifn != IFN_LAST)
> -                   new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> +                 if (modifier == NARROW)
> +                   {
> +                     tree half_res = make_ssa_name (convert_from);
> +                     new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> +                     gimple_call_set_lhs (new_stmt, half_res);
> +                     vect_finish_stmt_generation (stmt, new_stmt, gsi);
> +                     if ((i & 1) == 0)
> +                       {
> +                         prev_res = half_res;
> +                         continue;
> +                       }
> +                     new_temp = make_ssa_name (vec_dest);
> +                     new_stmt = gimple_build_assign (new_temp, convert_code,
> +                                                     prev_res, half_res);
> +                   }
>                   else
> -                   new_stmt = gimple_build_call_vec (fndecl, vargs);
> -                 new_temp = make_ssa_name (vec_dest, new_stmt);
> -                 gimple_call_set_lhs (new_stmt, new_temp);
> +                   {
> +                     if (ifn != IFN_LAST)
> +                       new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> +                     else
> +                       new_stmt = gimple_build_call_vec (fndecl, vargs);
> +                     new_temp = make_ssa_name (vec_dest, new_stmt);
> +                     gimple_call_set_lhs (new_stmt, new_temp);
> +                   }
>                   vect_finish_stmt_generation (stmt, new_stmt, gsi);
>                   SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
>                 }
> @@ -2436,6 +2498,21 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>               new_temp = make_ssa_name (vec_dest);
>               new_stmt = gimple_build_assign (new_temp, new_var);
>             }
> +         else if (modifier == NARROW)
> +           {
> +             tree half_res = make_ssa_name (convert_from);
> +             new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> +             gimple_call_set_lhs (new_stmt, half_res);
> +             vect_finish_stmt_generation (stmt, new_stmt, gsi);
> +             if ((j & 1) == 0)
> +               {
> +                 prev_res = half_res;
> +                 continue;
> +               }
> +             new_temp = make_ssa_name (vec_dest);
> +             new_stmt = gimple_build_assign (new_temp, convert_code,
> +                                             prev_res, half_res);
> +           }
>           else
>             {
>               if (ifn != IFN_LAST)
> @@ -2447,17 +2524,16 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>             }
>           vect_finish_stmt_generation (stmt, new_stmt, gsi);
>
> -         if (j == 0)
> +         if (j == (modifier == NARROW ? 1 : 0))
>             STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
>           else
>             STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
>
>           prev_stmt_info = vinfo_for_stmt (new_stmt);
>         }
> -
> -      break;
> -
> -    case NARROW:
> +    }
> +  else if (modifier == NARROW)
> +    {
>        for (j = 0; j < ncopies; ++j)
>         {
>           /* Build argument list for the vectorized call.  */
> @@ -2528,10 +2604,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>               vargs.quick_push (vec_oprnd1);
>             }
>
> -         if (ifn != IFN_LAST)
> -           new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> -         else
> -           new_stmt = gimple_build_call_vec (fndecl, vargs);
> +         new_stmt = gimple_build_call_vec (fndecl, vargs);
>           new_temp = make_ssa_name (vec_dest, new_stmt);
>           gimple_call_set_lhs (new_stmt, new_temp);
>           vect_finish_stmt_generation (stmt, new_stmt, gsi);
> @@ -2545,13 +2618,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>         }
>
>        *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
> -
> -      break;
> -
> -    case WIDEN:
> -      /* No current target implements this case.  */
> -      return false;
>      }
> +  else
> +    /* No current target implements this case.  */
> +    return false;
>
>    vargs.release ();
>
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: PR68577: Handle narrowing for vector popcount, etc.
  2015-12-01 10:53 ` Richard Biener
@ 2015-12-01 21:04   ` Richard Sandiford
  0 siblings, 0 replies; 3+ messages in thread
From: Richard Sandiford @ 2015-12-01 21:04 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

Richard Biener <richard.guenther@gmail.com> writes:
> On Tue, Dec 1, 2015 at 10:14 AM, Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
>> index 3b078da..af86bce 100644
>> --- a/gcc/tree-vect-stmts.c
>> +++ b/gcc/tree-vect-stmts.c
>> @@ -2122,6 +2122,40 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>>    return true;
>>  }
>>
>> +/* Return true if vector type VECTYPE_OUT has integer elements and
>> +   if we can narrow two integer vectors with the same shape as
>> +   VECTYPE_IN to VECTYPE_OUT in a single step.  On success,
>> +   return the binary pack code in *CONVERT_CODE and the types
>> +   of the input vectors in *CONVERT_FROM.  */
>> +
>> +static bool
>> +simple_integer_narrowing (tree vectype_out, tree vectype_in,
>> +                         tree_code *convert_code, tree *convert_from)
>> +{
>> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out)))
>> +    return false;
>> +
>> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
>> +    {
>> +      unsigned int bits
>> +       = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype_in)));
>> +      tree scalar_type = build_nonstandard_integer_type (bits, 0);
>> +      vectype_in = get_same_sized_vectype (scalar_type, vectype_in);
>> +    }
>> +
>
> any reason for supporting non-integer types on the input?  It seems to me
> you are doing this for the lrint case?  If so isn't the "question" wrong and
> you should pass the integer type the IFN returns as vectype_in instead?
>
> That said, this conversion doesn't seem to belong to simple_integer_narrowing.
>
> The patch is ok with simply removing it.

OK, thanks, here's what I applied after retesting.

Richard


gcc/
	PR tree-optimization/68577
	* tree-vect-stmts.c (simple_integer_narrowing): New function.
	(vectorizable_call): Restrict internal function handling
	to NONE and NARROW cases, using simple_integer_narrowing
	to test for the latter.  Add cost of narrowing operation
	and insert it where necessary.

gcc/testsuite/
	PR tree-optimization/68577
	* gcc.dg/vect/pr68577.c: New test.

Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	2015-12-01 14:53:39.689404993 +0000
+++ gcc/tree-vect-stmts.c	2015-12-01 20:50:22.288498596 +0000
@@ -2140,6 +2140,31 @@ vectorizable_mask_load_store (gimple *st
   return true;
 }
 
+/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
+   integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
+   in a single step.  On success, store the binary pack code in
+   *CONVERT_CODE.  */
+
+static bool
+simple_integer_narrowing (tree vectype_out, tree vectype_in,
+			  tree_code *convert_code)
+{
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
+      || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
+    return false;
+
+  tree_code code;
+  int multi_step_cvt = 0;
+  auto_vec <tree, 8> interm_types;
+  if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
+					&code, &multi_step_cvt,
+					&interm_types)
+      || multi_step_cvt)
+    return false;
+
+  *convert_code = code;
+  return true;
+}
 
 /* Function vectorizable_call.
 
@@ -2306,7 +2331,12 @@ vectorizable_call (gimple *gs, gimple_st
   tree callee = gimple_call_fndecl (stmt);
 
   /* First try using an internal function.  */
-  if (cfn != CFN_LAST)
+  tree_code convert_code = ERROR_MARK;
+  if (cfn != CFN_LAST
+      && (modifier == NONE
+	  || (modifier == NARROW
+	      && simple_integer_narrowing (vectype_out, vectype_in,
+					   &convert_code))))
     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
 					  vectype_in);
 
@@ -2346,7 +2376,7 @@ vectorizable_call (gimple *gs, gimple_st
 
   if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
-  else if (modifier == NARROW)
+  else if (modifier == NARROW && ifn == IFN_LAST)
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2362,6 +2392,10 @@ vectorizable_call (gimple *gs, gimple_st
         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
                          "\n");
       vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
+      if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
+	add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
+		       vec_promote_demote, stmt_info, 0, vect_body);
+
       return true;
     }
 
@@ -2375,9 +2409,9 @@ vectorizable_call (gimple *gs, gimple_st
   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
 
   prev_stmt_info = NULL;
-  switch (modifier)
+  if (modifier == NONE || ifn != IFN_LAST)
     {
-    case NONE:
+      tree prev_res = NULL_TREE;
       for (j = 0; j < ncopies; ++j)
 	{
 	  /* Build argument list for the vectorized call.  */
@@ -2405,12 +2439,30 @@ vectorizable_call (gimple *gs, gimple_st
 		      vec<tree> vec_oprndsk = vec_defs[k];
 		      vargs[k] = vec_oprndsk[i];
 		    }
-		  if (ifn != IFN_LAST)
-		    new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+		  if (modifier == NARROW)
+		    {
+		      tree half_res = make_ssa_name (vectype_in);
+		      new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+		      gimple_call_set_lhs (new_stmt, half_res);
+		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
+		      if ((i & 1) == 0)
+			{
+			  prev_res = half_res;
+			  continue;
+			}
+		      new_temp = make_ssa_name (vec_dest);
+		      new_stmt = gimple_build_assign (new_temp, convert_code,
+						      prev_res, half_res);
+		    }
 		  else
-		    new_stmt = gimple_build_call_vec (fndecl, vargs);
-		  new_temp = make_ssa_name (vec_dest, new_stmt);
-		  gimple_call_set_lhs (new_stmt, new_temp);
+		    {
+		      if (ifn != IFN_LAST)
+			new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+		      else
+			new_stmt = gimple_build_call_vec (fndecl, vargs);
+		      new_temp = make_ssa_name (vec_dest, new_stmt);
+		      gimple_call_set_lhs (new_stmt, new_temp);
+		    }
 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
 		}
@@ -2454,6 +2506,21 @@ vectorizable_call (gimple *gs, gimple_st
 	      new_temp = make_ssa_name (vec_dest);
 	      new_stmt = gimple_build_assign (new_temp, new_var);
 	    }
+	  else if (modifier == NARROW)
+	    {
+	      tree half_res = make_ssa_name (vectype_in);
+	      new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+	      gimple_call_set_lhs (new_stmt, half_res);
+	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
+	      if ((j & 1) == 0)
+		{
+		  prev_res = half_res;
+		  continue;
+		}
+	      new_temp = make_ssa_name (vec_dest);
+	      new_stmt = gimple_build_assign (new_temp, convert_code,
+					      prev_res, half_res);
+	    }
 	  else
 	    {
 	      if (ifn != IFN_LAST)
@@ -2465,17 +2532,16 @@ vectorizable_call (gimple *gs, gimple_st
 	    }
 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
 
-	  if (j == 0)
+	  if (j == (modifier == NARROW ? 1 : 0))
 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
 	  else
 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
 
 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
 	}
-
-      break;
-
-    case NARROW:
+    }
+  else if (modifier == NARROW)
+    {
       for (j = 0; j < ncopies; ++j)
 	{
 	  /* Build argument list for the vectorized call.  */
@@ -2546,10 +2612,7 @@ vectorizable_call (gimple *gs, gimple_st
 	      vargs.quick_push (vec_oprnd1);
 	    }
 
-	  if (ifn != IFN_LAST)
-	    new_stmt = gimple_build_call_internal_vec (ifn, vargs);
-	  else
-	    new_stmt = gimple_build_call_vec (fndecl, vargs);
+	  new_stmt = gimple_build_call_vec (fndecl, vargs);
 	  new_temp = make_ssa_name (vec_dest, new_stmt);
 	  gimple_call_set_lhs (new_stmt, new_temp);
 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
@@ -2563,13 +2626,10 @@ vectorizable_call (gimple *gs, gimple_st
 	}
 
       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
-
-      break;
-
-    case WIDEN:
-      /* No current target implements this case.  */
-      return false;
     }
+  else
+    /* No current target implements this case.  */
+    return false;
 
   vargs.release ();
 
Index: gcc/testsuite/gcc.dg/vect/pr68577.c
===================================================================
--- /dev/null	2015-10-14 14:19:37.164773004 +0100
+++ gcc/testsuite/gcc.dg/vect/pr68577.c	2015-12-01 20:50:22.284498641 +0000
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+
+int a, b;
+
+void
+__sched_cpucount (void)
+{
+  while (b)
+    {
+      long l = b++;
+      a += __builtin_popcountl(l);
+    }
+}
+
+void
+slp_test (int *x, long *y)
+{
+  for (int i = 0; i < 512; i += 4)
+    {
+      x[i] = __builtin_popcountl(y[i]);
+      x[i + 1] = __builtin_popcountl(y[i + 1]);
+      x[i + 2] = __builtin_popcountl(y[i + 2]);
+      x[i + 3] = __builtin_popcountl(y[i + 3]);
+    }
+}

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-12-01 21:04 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-01  9:14 PR68577: Handle narrowing for vector popcount, etc Richard Sandiford
2015-12-01 10:53 ` Richard Biener
2015-12-01 21:04   ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).