* [PATCH] Add vect_recog_popcount_pattern to handle mismatch between the vectorized popcount IFN and scalar popcount builtin.
@ 2021-06-17 6:29 liuhongt
2021-06-21 10:05 ` Richard Biener
0 siblings, 1 reply; 4+ messages in thread
From: liuhongt @ 2021-06-17 6:29 UTC (permalink / raw)
To: gcc-patches, richard.guenther
The patch remove those pro- and demotions when backend support direct
optab.
For i386: it enables vectorization for vpopcntb/vpopcntw and optimized
for vpopcntq.
gcc/ChangeLog:
PR tree-optimization/97770
* tree-vect-patterns.c (vect_recog_popcount_pattern):
New.
(vect_recog_func vect_vect_recog_func_ptrs): Add new pattern.
gcc/testsuite/ChangeLog:
PR tree-optimization/97770
* gcc.target/i386/avx512bitalg-pr97770-1.c: Remove xfail.
* gcc.target/i386/avx512vpopcntdq-pr97770-1.c: Remove xfail.
---
.../gcc.target/i386/avx512bitalg-pr97770-1.c | 27 +++--
.../i386/avx512vpopcntdq-pr97770-1.c | 9 +-
gcc/tree-vect-patterns.c | 110 ++++++++++++++++++
3 files changed, 127 insertions(+), 19 deletions(-)
diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
index c83a477045c..d1beec4cdb4 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
@@ -1,19 +1,18 @@
/* PR target/97770 */
/* { dg-do compile } */
-/* { dg-options "-O2 -mavx512bitalg -mavx512vl -mprefer-vector-width=512" } */
-/* Add xfail since no IFN for QI/HImode popcount */
-/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
-/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
-/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
-/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
-/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
-/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
+/* { dg-options "-O2 -march=icelake-server -mprefer-vector-width=512" } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
#include <immintrin.h>
void
__attribute__ ((noipa, optimize("-O3")))
-popcountb_128 (char * __restrict dest, char* src)
+popcountb_128 (unsigned char * __restrict dest, unsigned char* src)
{
for (int i = 0; i != 16; i++)
dest[i] = __builtin_popcount (src[i]);
@@ -21,7 +20,7 @@ popcountb_128 (char * __restrict dest, char* src)
void
__attribute__ ((noipa, optimize("-O3")))
-popcountw_128 (short* __restrict dest, short* src)
+popcountw_128 (unsigned short* __restrict dest, unsigned short* src)
{
for (int i = 0; i != 8; i++)
dest[i] = __builtin_popcount (src[i]);
@@ -29,7 +28,7 @@ popcountw_128 (short* __restrict dest, short* src)
void
__attribute__ ((noipa, optimize("-O3")))
-popcountb_256 (char * __restrict dest, char* src)
+popcountb_256 (unsigned char * __restrict dest, unsigned char* src)
{
for (int i = 0; i != 32; i++)
dest[i] = __builtin_popcount (src[i]);
@@ -37,7 +36,7 @@ popcountb_256 (char * __restrict dest, char* src)
void
__attribute__ ((noipa, optimize("-O3")))
-popcountw_256 (short* __restrict dest, short* src)
+popcountw_256 (unsigned short* __restrict dest, unsigned short* src)
{
for (int i = 0; i != 16; i++)
dest[i] = __builtin_popcount (src[i]);
@@ -45,7 +44,7 @@ popcountw_256 (short* __restrict dest, short* src)
void
__attribute__ ((noipa, optimize("-O3")))
-popcountb_512 (char * __restrict dest, char* src)
+popcountb_512 (unsigned char * __restrict dest, unsigned char* src)
{
for (int i = 0; i != 64; i++)
dest[i] = __builtin_popcount (src[i]);
@@ -53,7 +52,7 @@ popcountb_512 (char * __restrict dest, char* src)
void
__attribute__ ((noipa, optimize("-O3")))
-popcountw_512 (short* __restrict dest, short* src)
+popcountw_512 (unsigned short* __restrict dest, unsigned short* src)
{
for (int i = 0; i != 32; i++)
dest[i] = __builtin_popcount (src[i]);
diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
index 63bb00d9b4a..dedd2e4c3d6 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
@@ -1,13 +1,12 @@
/* PR target/97770 */
/* { dg-do compile } */
-/* { dg-options "-O2 -mavx512vpopcntdq -mavx512vl -mprefer-vector-width=512" } */
+/* { dg-options "-O2 -march=icelake-server -mprefer-vector-width=512" } */
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
-/* Add xfail since current vectorizor cannot generate expected code for DImode popcount */
-/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*xmm" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*ymm" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*zmm" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
#ifndef AVX512VPOPCNTQ_H_INCLUDED
#define AVX512VPOPCNTQ_H_INCLUDED
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index 177d44ebb5e..5c80800efbb 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -1292,6 +1292,115 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
"vect_recog_widen_minus_pattern");
}
+/* Function vect_recog_popcount_pattern
+
+ Try to find the following pattern:
+
+ UTYPE1 A;
+ TYPE1 B;
+ UTYPE2 temp_in;
+ TYPE3 temp_out;
+ temp_in = (TYPE2)A;
+
+ temp_out = __builtin_popcount{,l,ll} (temp_in);
+ B = (TYPE1) temp_out;
+
+ TYPE2 may or may not be equal to TYPE3.
+ i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
+ i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
+
+ Input:
+
+ * STMT_VINFO: The stmt from which the pattern search begins.
+ here it starts with B = (TYPE1) temp_out;
+
+ Output:
+
+ * TYPE_OUT: The vector type of the output of this pattern.
+
+ * Return value: A new stmt that will be used to replace the sequence of
+ stmts that constitute the pattern. In this case it will be:
+ B = .POPCOUNT (A);
+*/
+
+static gimple *
+vect_recog_popcount_pattern (vec_info *vinfo,
+ stmt_vec_info stmt_vinfo, tree *type_out)
+{
+ gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
+ gimple *popcount_stmt, *pattern_stmt;
+ tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
+ auto_vec<tree> vargs;
+
+ /* Find B = (TYPE1) temp_out. */
+ if (!last_stmt)
+ return NULL;
+ tree_code code = gimple_assign_rhs_code (last_stmt);
+ if (!CONVERT_EXPR_CODE_P (code))
+ return NULL;
+
+ lhs_oprnd = gimple_assign_lhs (last_stmt);
+ lhs_type = TREE_TYPE (lhs_oprnd);
+ if (TREE_CODE (lhs_type) != INTEGER_TYPE)
+ return NULL;
+
+ rhs_oprnd = gimple_assign_rhs1 (last_stmt);
+ if (TREE_CODE (rhs_oprnd) != SSA_NAME
+ || !has_single_use (rhs_oprnd))
+ return NULL;
+ popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
+
+ /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
+ if (!is_gimple_call (popcount_stmt)
+ || !gimple_call_lhs (popcount_stmt))
+ return NULL;
+ switch (gimple_call_combined_fn (popcount_stmt))
+ {
+ CASE_CFN_POPCOUNT:
+ break;
+ default:
+ return NULL;
+ }
+
+ rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
+ vect_unpromoted_value unprom_diff;
+ rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
+ &unprom_diff);
+
+ if (!rhs_origin)
+ return NULL;
+
+ /* Input and outout of .POPCOUNT should be same-precision integer.
+ Also A should be unsigned or same presion as temp_in,
+ otherwise there would be sign_extend from A to temp_in. */
+ if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
+ || !(TYPE_UNSIGNED (unprom_diff.type)
+ || (TYPE_PRECISION (unprom_diff.type)
+ == TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
+ return NULL;
+ vargs.safe_push (unprom_diff.op);
+
+ vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
+ vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
+ /* Do it only the backend existed popcount<vector_mode>2. */
+ if (!direct_internal_fn_supported_p (IFN_POPCOUNT,
+ vec_type,
+ OPTIMIZE_FOR_SPEED))
+ return NULL;
+
+ /* Create B = .POPCOUNT (A). */
+ new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
+ pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
+ gimple_call_set_lhs (pattern_stmt, new_var);
+ gimple_set_location (pattern_stmt, gimple_location (last_stmt));
+ *type_out = vec_type;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "created pattern stmt: %G", pattern_stmt);
+ return pattern_stmt;
+}
+
/* Function vect_recog_pow_pattern
Try to find the following pattern:
@@ -5283,6 +5392,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
{ vect_recog_sad_pattern, "sad" },
{ vect_recog_widen_sum_pattern, "widen_sum" },
{ vect_recog_pow_pattern, "pow" },
+ { vect_recog_popcount_pattern, "popcount" },
{ vect_recog_widen_shift_pattern, "widen_shift" },
{ vect_recog_rotate_pattern, "rotate" },
{ vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
--
2.18.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] Add vect_recog_popcount_pattern to handle mismatch between the vectorized popcount IFN and scalar popcount builtin.
2021-06-17 6:29 [PATCH] Add vect_recog_popcount_pattern to handle mismatch between the vectorized popcount IFN and scalar popcount builtin liuhongt
@ 2021-06-21 10:05 ` Richard Biener
2021-06-22 2:43 ` Hongtao Liu
0 siblings, 1 reply; 4+ messages in thread
From: Richard Biener @ 2021-06-21 10:05 UTC (permalink / raw)
To: liuhongt; +Cc: GCC Patches, Hongtao Liu, H. J. Lu
On Thu, Jun 17, 2021 at 8:29 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> The patch remove those pro- and demotions when backend support direct
> optab.
>
> For i386: it enables vectorization for vpopcntb/vpopcntw and optimized
> for vpopcntq.
>
> gcc/ChangeLog:
>
> PR tree-optimization/97770
> * tree-vect-patterns.c (vect_recog_popcount_pattern):
> New.
> (vect_recog_func vect_vect_recog_func_ptrs): Add new pattern.
>
> gcc/testsuite/ChangeLog:
>
> PR tree-optimization/97770
> * gcc.target/i386/avx512bitalg-pr97770-1.c: Remove xfail.
> * gcc.target/i386/avx512vpopcntdq-pr97770-1.c: Remove xfail.
> ---
> .../gcc.target/i386/avx512bitalg-pr97770-1.c | 27 +++--
> .../i386/avx512vpopcntdq-pr97770-1.c | 9 +-
> gcc/tree-vect-patterns.c | 110 ++++++++++++++++++
> 3 files changed, 127 insertions(+), 19 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
> index c83a477045c..d1beec4cdb4 100644
> --- a/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
> +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
> @@ -1,19 +1,18 @@
> /* PR target/97770 */
> /* { dg-do compile } */
> -/* { dg-options "-O2 -mavx512bitalg -mavx512vl -mprefer-vector-width=512" } */
> -/* Add xfail since no IFN for QI/HImode popcount */
> -/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
> -/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
> -/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
> -/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
> -/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
> -/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
> +/* { dg-options "-O2 -march=icelake-server -mprefer-vector-width=512" } */
> +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
> +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
> +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
>
> #include <immintrin.h>
>
> void
> __attribute__ ((noipa, optimize("-O3")))
> -popcountb_128 (char * __restrict dest, char* src)
> +popcountb_128 (unsigned char * __restrict dest, unsigned char* src)
> {
> for (int i = 0; i != 16; i++)
> dest[i] = __builtin_popcount (src[i]);
> @@ -21,7 +20,7 @@ popcountb_128 (char * __restrict dest, char* src)
>
> void
> __attribute__ ((noipa, optimize("-O3")))
> -popcountw_128 (short* __restrict dest, short* src)
> +popcountw_128 (unsigned short* __restrict dest, unsigned short* src)
> {
> for (int i = 0; i != 8; i++)
> dest[i] = __builtin_popcount (src[i]);
> @@ -29,7 +28,7 @@ popcountw_128 (short* __restrict dest, short* src)
>
> void
> __attribute__ ((noipa, optimize("-O3")))
> -popcountb_256 (char * __restrict dest, char* src)
> +popcountb_256 (unsigned char * __restrict dest, unsigned char* src)
> {
> for (int i = 0; i != 32; i++)
> dest[i] = __builtin_popcount (src[i]);
> @@ -37,7 +36,7 @@ popcountb_256 (char * __restrict dest, char* src)
>
> void
> __attribute__ ((noipa, optimize("-O3")))
> -popcountw_256 (short* __restrict dest, short* src)
> +popcountw_256 (unsigned short* __restrict dest, unsigned short* src)
> {
> for (int i = 0; i != 16; i++)
> dest[i] = __builtin_popcount (src[i]);
> @@ -45,7 +44,7 @@ popcountw_256 (short* __restrict dest, short* src)
>
> void
> __attribute__ ((noipa, optimize("-O3")))
> -popcountb_512 (char * __restrict dest, char* src)
> +popcountb_512 (unsigned char * __restrict dest, unsigned char* src)
> {
> for (int i = 0; i != 64; i++)
> dest[i] = __builtin_popcount (src[i]);
> @@ -53,7 +52,7 @@ popcountb_512 (char * __restrict dest, char* src)
>
> void
> __attribute__ ((noipa, optimize("-O3")))
> -popcountw_512 (short* __restrict dest, short* src)
> +popcountw_512 (unsigned short* __restrict dest, unsigned short* src)
> {
> for (int i = 0; i != 32; i++)
> dest[i] = __builtin_popcount (src[i]);
> diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
> index 63bb00d9b4a..dedd2e4c3d6 100644
> --- a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
> +++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
> @@ -1,13 +1,12 @@
> /* PR target/97770 */
> /* { dg-do compile } */
> -/* { dg-options "-O2 -mavx512vpopcntdq -mavx512vl -mprefer-vector-width=512" } */
> +/* { dg-options "-O2 -march=icelake-server -mprefer-vector-width=512" } */
> /* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
> /* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
> /* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
> -/* Add xfail since current vectorizor cannot generate expected code for DImode popcount */
> -/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*xmm" 1 { xfail *-*-* } } } */
> -/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*ymm" 1 { xfail *-*-* } } } */
> -/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*zmm" 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
> +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
> #ifndef AVX512VPOPCNTQ_H_INCLUDED
> #define AVX512VPOPCNTQ_H_INCLUDED
>
> diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
> index 177d44ebb5e..5c80800efbb 100644
> --- a/gcc/tree-vect-patterns.c
> +++ b/gcc/tree-vect-patterns.c
> @@ -1292,6 +1292,115 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
> "vect_recog_widen_minus_pattern");
> }
>
> +/* Function vect_recog_popcount_pattern
> +
> + Try to find the following pattern:
> +
> + UTYPE1 A;
> + TYPE1 B;
> + UTYPE2 temp_in;
> + TYPE3 temp_out;
> + temp_in = (TYPE2)A;
> +
> + temp_out = __builtin_popcount{,l,ll} (temp_in);
> + B = (TYPE1) temp_out;
> +
> + TYPE2 may or may not be equal to TYPE3.
> + i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
> + i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
> +
> + Input:
> +
> + * STMT_VINFO: The stmt from which the pattern search begins.
> + here it starts with B = (TYPE1) temp_out;
> +
> + Output:
> +
> + * TYPE_OUT: The vector type of the output of this pattern.
> +
> + * Return value: A new stmt that will be used to replace the sequence of
> + stmts that constitute the pattern. In this case it will be:
> + B = .POPCOUNT (A);
> +*/
> +
> +static gimple *
> +vect_recog_popcount_pattern (vec_info *vinfo,
> + stmt_vec_info stmt_vinfo, tree *type_out)
> +{
> + gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
> + gimple *popcount_stmt, *pattern_stmt;
> + tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
> + auto_vec<tree> vargs;
> +
> + /* Find B = (TYPE1) temp_out. */
> + if (!last_stmt)
> + return NULL;
> + tree_code code = gimple_assign_rhs_code (last_stmt);
> + if (!CONVERT_EXPR_CODE_P (code))
> + return NULL;
> +
> + lhs_oprnd = gimple_assign_lhs (last_stmt);
> + lhs_type = TREE_TYPE (lhs_oprnd);
> + if (TREE_CODE (lhs_type) != INTEGER_TYPE)
> + return NULL;
INTEGRAL_TYPE_P
> + rhs_oprnd = gimple_assign_rhs1 (last_stmt);
> + if (TREE_CODE (rhs_oprnd) != SSA_NAME
> + || !has_single_use (rhs_oprnd))
> + return NULL;
> + popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
> +
> + /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
> + if (!is_gimple_call (popcount_stmt)
> + || !gimple_call_lhs (popcount_stmt))
Since you're arriving here via use-def chain the LHS will
never be NULL.
> + return NULL;
> + switch (gimple_call_combined_fn (popcount_stmt))
> + {
> + CASE_CFN_POPCOUNT:
> + break;
> + default:
> + return NULL;
> + }
> +
for safety:
if (gimple_call_num_args (popcount_stmt) != 1)
return NULL;
> + rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
> + vect_unpromoted_value unprom_diff;
> + rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
> + &unprom_diff);
> +
> + if (!rhs_origin)
> + return NULL;
> +
> + /* Input and outout of .POPCOUNT should be same-precision integer.
> + Also A should be unsigned or same presion as temp_in,
> + otherwise there would be sign_extend from A to temp_in. */
> + if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
> + || !(TYPE_UNSIGNED (unprom_diff.type)
> + || (TYPE_PRECISION (unprom_diff.type)
> + == TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
Note I find a if (A || !(B || C)) hard to read, please write if (A ||
(!B && !C)) instead.
OK otherwise.
Thanks,
Richard.
> + return NULL;
> + vargs.safe_push (unprom_diff.op);
> +
> + vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
> + vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
> + /* Do it only the backend existed popcount<vector_mode>2. */
> + if (!direct_internal_fn_supported_p (IFN_POPCOUNT,
> + vec_type,
> + OPTIMIZE_FOR_SPEED))
> + return NULL;
> +
> + /* Create B = .POPCOUNT (A). */
> + new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
> + pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
> + gimple_call_set_lhs (pattern_stmt, new_var);
> + gimple_set_location (pattern_stmt, gimple_location (last_stmt));
> + *type_out = vec_type;
> +
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location,
> + "created pattern stmt: %G", pattern_stmt);
> + return pattern_stmt;
> +}
> +
> /* Function vect_recog_pow_pattern
>
> Try to find the following pattern:
> @@ -5283,6 +5392,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
> { vect_recog_sad_pattern, "sad" },
> { vect_recog_widen_sum_pattern, "widen_sum" },
> { vect_recog_pow_pattern, "pow" },
> + { vect_recog_popcount_pattern, "popcount" },
> { vect_recog_widen_shift_pattern, "widen_shift" },
> { vect_recog_rotate_pattern, "rotate" },
> { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
> --
> 2.18.1
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] Add vect_recog_popcount_pattern to handle mismatch between the vectorized popcount IFN and scalar popcount builtin.
2021-06-21 10:05 ` Richard Biener
@ 2021-06-22 2:43 ` Hongtao Liu
2021-06-22 2:44 ` Hongtao Liu
0 siblings, 1 reply; 4+ messages in thread
From: Hongtao Liu @ 2021-06-22 2:43 UTC (permalink / raw)
To: Richard Biener; +Cc: liuhongt, GCC Patches, H. J. Lu
[-- Attachment #1: Type: text/plain, Size: 11795 bytes --]
On Mon, Jun 21, 2021 at 6:05 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Thu, Jun 17, 2021 at 8:29 AM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > The patch remove those pro- and demotions when backend support direct
> > optab.
> >
> > For i386: it enables vectorization for vpopcntb/vpopcntw and optimized
> > for vpopcntq.
> >
> > gcc/ChangeLog:
> >
> > PR tree-optimization/97770
> > * tree-vect-patterns.c (vect_recog_popcount_pattern):
> > New.
> > (vect_recog_func vect_vect_recog_func_ptrs): Add new pattern.
> >
> > gcc/testsuite/ChangeLog:
> >
> > PR tree-optimization/97770
> > * gcc.target/i386/avx512bitalg-pr97770-1.c: Remove xfail.
> > * gcc.target/i386/avx512vpopcntdq-pr97770-1.c: Remove xfail.
> > ---
> > .../gcc.target/i386/avx512bitalg-pr97770-1.c | 27 +++--
> > .../i386/avx512vpopcntdq-pr97770-1.c | 9 +-
> > gcc/tree-vect-patterns.c | 110 ++++++++++++++++++
> > 3 files changed, 127 insertions(+), 19 deletions(-)
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
> > index c83a477045c..d1beec4cdb4 100644
> > --- a/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
> > +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
> > @@ -1,19 +1,18 @@
> > /* PR target/97770 */
> > /* { dg-do compile } */
> > -/* { dg-options "-O2 -mavx512bitalg -mavx512vl -mprefer-vector-width=512" } */
> > -/* Add xfail since no IFN for QI/HImode popcount */
> > -/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
> > -/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
> > -/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
> > -/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
> > -/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
> > -/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
> > +/* { dg-options "-O2 -march=icelake-server -mprefer-vector-width=512" } */
> > +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
> >
> > #include <immintrin.h>
> >
> > void
> > __attribute__ ((noipa, optimize("-O3")))
> > -popcountb_128 (char * __restrict dest, char* src)
> > +popcountb_128 (unsigned char * __restrict dest, unsigned char* src)
> > {
> > for (int i = 0; i != 16; i++)
> > dest[i] = __builtin_popcount (src[i]);
> > @@ -21,7 +20,7 @@ popcountb_128 (char * __restrict dest, char* src)
> >
> > void
> > __attribute__ ((noipa, optimize("-O3")))
> > -popcountw_128 (short* __restrict dest, short* src)
> > +popcountw_128 (unsigned short* __restrict dest, unsigned short* src)
> > {
> > for (int i = 0; i != 8; i++)
> > dest[i] = __builtin_popcount (src[i]);
> > @@ -29,7 +28,7 @@ popcountw_128 (short* __restrict dest, short* src)
> >
> > void
> > __attribute__ ((noipa, optimize("-O3")))
> > -popcountb_256 (char * __restrict dest, char* src)
> > +popcountb_256 (unsigned char * __restrict dest, unsigned char* src)
> > {
> > for (int i = 0; i != 32; i++)
> > dest[i] = __builtin_popcount (src[i]);
> > @@ -37,7 +36,7 @@ popcountb_256 (char * __restrict dest, char* src)
> >
> > void
> > __attribute__ ((noipa, optimize("-O3")))
> > -popcountw_256 (short* __restrict dest, short* src)
> > +popcountw_256 (unsigned short* __restrict dest, unsigned short* src)
> > {
> > for (int i = 0; i != 16; i++)
> > dest[i] = __builtin_popcount (src[i]);
> > @@ -45,7 +44,7 @@ popcountw_256 (short* __restrict dest, short* src)
> >
> > void
> > __attribute__ ((noipa, optimize("-O3")))
> > -popcountb_512 (char * __restrict dest, char* src)
> > +popcountb_512 (unsigned char * __restrict dest, unsigned char* src)
> > {
> > for (int i = 0; i != 64; i++)
> > dest[i] = __builtin_popcount (src[i]);
> > @@ -53,7 +52,7 @@ popcountb_512 (char * __restrict dest, char* src)
> >
> > void
> > __attribute__ ((noipa, optimize("-O3")))
> > -popcountw_512 (short* __restrict dest, short* src)
> > +popcountw_512 (unsigned short* __restrict dest, unsigned short* src)
> > {
> > for (int i = 0; i != 32; i++)
> > dest[i] = __builtin_popcount (src[i]);
> > diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
> > index 63bb00d9b4a..dedd2e4c3d6 100644
> > --- a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
> > +++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
> > @@ -1,13 +1,12 @@
> > /* PR target/97770 */
> > /* { dg-do compile } */
> > -/* { dg-options "-O2 -mavx512vpopcntdq -mavx512vl -mprefer-vector-width=512" } */
> > +/* { dg-options "-O2 -march=icelake-server -mprefer-vector-width=512" } */
> > /* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
> > /* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
> > /* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
> > -/* Add xfail since current vectorizor cannot generate expected code for DImode popcount */
> > -/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*xmm" 1 { xfail *-*-* } } } */
> > -/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*ymm" 1 { xfail *-*-* } } } */
> > -/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*zmm" 1 { xfail *-*-* } } } */
> > +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
> > #ifndef AVX512VPOPCNTQ_H_INCLUDED
> > #define AVX512VPOPCNTQ_H_INCLUDED
> >
> > diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
> > index 177d44ebb5e..5c80800efbb 100644
> > --- a/gcc/tree-vect-patterns.c
> > +++ b/gcc/tree-vect-patterns.c
> > @@ -1292,6 +1292,115 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
> > "vect_recog_widen_minus_pattern");
> > }
> >
> > +/* Function vect_recog_popcount_pattern
> > +
> > + Try to find the following pattern:
> > +
> > + UTYPE1 A;
> > + TYPE1 B;
> > + UTYPE2 temp_in;
> > + TYPE3 temp_out;
> > + temp_in = (TYPE2)A;
> > +
> > + temp_out = __builtin_popcount{,l,ll} (temp_in);
> > + B = (TYPE1) temp_out;
> > +
> > + TYPE2 may or may not be equal to TYPE3.
> > + i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
> > + i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
> > +
> > + Input:
> > +
> > + * STMT_VINFO: The stmt from which the pattern search begins.
> > + here it starts with B = (TYPE1) temp_out;
> > +
> > + Output:
> > +
> > + * TYPE_OUT: The vector type of the output of this pattern.
> > +
> > + * Return value: A new stmt that will be used to replace the sequence of
> > + stmts that constitute the pattern. In this case it will be:
> > + B = .POPCOUNT (A);
> > +*/
> > +
> > +static gimple *
> > +vect_recog_popcount_pattern (vec_info *vinfo,
> > + stmt_vec_info stmt_vinfo, tree *type_out)
> > +{
> > + gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
> > + gimple *popcount_stmt, *pattern_stmt;
> > + tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
> > + auto_vec<tree> vargs;
> > +
> > + /* Find B = (TYPE1) temp_out. */
> > + if (!last_stmt)
> > + return NULL;
> > + tree_code code = gimple_assign_rhs_code (last_stmt);
> > + if (!CONVERT_EXPR_CODE_P (code))
> > + return NULL;
> > +
> > + lhs_oprnd = gimple_assign_lhs (last_stmt);
> > + lhs_type = TREE_TYPE (lhs_oprnd);
> > + if (TREE_CODE (lhs_type) != INTEGER_TYPE)
> > + return NULL;
>
> INTEGRAL_TYPE_P
>
Changed.
> > + rhs_oprnd = gimple_assign_rhs1 (last_stmt);
> > + if (TREE_CODE (rhs_oprnd) != SSA_NAME
> > + || !has_single_use (rhs_oprnd))
> > + return NULL;
> > + popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
> > +
> > + /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
> > + if (!is_gimple_call (popcount_stmt)
> > + || !gimple_call_lhs (popcount_stmt))
>
> Since you're arriving here via use-def chain the LHS will
> never be NULL.
>
> > + return NULL;
> > + switch (gimple_call_combined_fn (popcount_stmt))
> > + {
> > + CASE_CFN_POPCOUNT:
> > + break;
> > + default:
> > + return NULL;
> > + }
> > +
>
> for safety:
>
> if (gimple_call_num_args (popcount_stmt) != 1)
> return NULL;
>
Changed.
> > + rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
> > + vect_unpromoted_value unprom_diff;
> > + rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
> > + &unprom_diff);
> > +
> > + if (!rhs_origin)
> > + return NULL;
> > +
> > + /* Input and outout of .POPCOUNT should be same-precision integer.
> > + Also A should be unsigned or same presion as temp_in,
> > + otherwise there would be sign_extend from A to temp_in. */
> > + if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
> > + || !(TYPE_UNSIGNED (unprom_diff.type)
> > + || (TYPE_PRECISION (unprom_diff.type)
> > + == TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
>
> Note I find a if (A || !(B || C)) hard to read, please write if (A ||
> (!B && !C)) instead.
>
Changed.
> OK otherwise.
>
> Thanks,
> Richard.
>
> > + return NULL;
> > + vargs.safe_push (unprom_diff.op);
> > +
> > + vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
> > + vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
> > + /* Do it only the backend existed popcount<vector_mode>2. */
> > + if (!direct_internal_fn_supported_p (IFN_POPCOUNT,
> > + vec_type,
> > + OPTIMIZE_FOR_SPEED))
> > + return NULL;
> > +
> > + /* Create B = .POPCOUNT (A). */
> > + new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
> > + pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
> > + gimple_call_set_lhs (pattern_stmt, new_var);
> > + gimple_set_location (pattern_stmt, gimple_location (last_stmt));
> > + *type_out = vec_type;
> > +
> > + if (dump_enabled_p ())
> > + dump_printf_loc (MSG_NOTE, vect_location,
> > + "created pattern stmt: %G", pattern_stmt);
> > + return pattern_stmt;
> > +}
> > +
> > /* Function vect_recog_pow_pattern
> >
> > Try to find the following pattern:
> > @@ -5283,6 +5392,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
> > { vect_recog_sad_pattern, "sad" },
> > { vect_recog_widen_sum_pattern, "widen_sum" },
> > { vect_recog_pow_pattern, "pow" },
> > + { vect_recog_popcount_pattern, "popcount" },
> > { vect_recog_widen_shift_pattern, "widen_shift" },
> > { vect_recog_rotate_pattern, "rotate" },
> > { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
> > --
> > 2.18.1
> >
Thanks for the review, here is the patch I'm checking in.
--
BR,
Hongtao
[-- Attachment #2: 0001-Add-vect_recog_popcount_pattern-to-handle-mismatch-b_V2.patch --]
[-- Type: text/x-patch, Size: 10293 bytes --]
From dc8c51031bb38d04ffcb52a36aaeab471ee2ad0c Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Wed, 16 Jun 2021 17:34:43 +0800
Subject: [PATCH] Add vect_recog_popcount_pattern to handle mismatch between
the vectorized popcount IFN and scalar popcount builtin.
The patch remove those pro- and demotions when backend support direct
optab.
For i386: it enables vectorization for vpopcntb/vpopcntw and optimized
for vpopcntq.
gcc/ChangeLog:
PR tree-optimization/97770
* tree-vect-patterns.c (vect_recog_popcount_pattern):
New.
(vect_recog_func vect_vect_recog_func_ptrs): Add new pattern.
gcc/testsuite/ChangeLog:
PR tree-optimization/97770
* gcc.target/i386/avx512bitalg-pr97770-1.c: Remove xfail.
* gcc.target/i386/avx512vpopcntdq-pr97770-1.c: Remove xfail.
---
.../gcc.target/i386/avx512bitalg-pr97770-1.c | 27 ++---
.../i386/avx512vpopcntdq-pr97770-1.c | 9 +-
gcc/tree-vect-patterns.c | 112 ++++++++++++++++++
3 files changed, 129 insertions(+), 19 deletions(-)
diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
index c83a477045c..d1beec4cdb4 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
@@ -1,19 +1,18 @@
/* PR target/97770 */
/* { dg-do compile } */
-/* { dg-options "-O2 -mavx512bitalg -mavx512vl -mprefer-vector-width=512" } */
-/* Add xfail since no IFN for QI/HImode popcount */
-/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
-/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
-/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
-/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
-/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
-/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
+/* { dg-options "-O2 -march=icelake-server -mprefer-vector-width=512" } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
#include <immintrin.h>
void
__attribute__ ((noipa, optimize("-O3")))
-popcountb_128 (char * __restrict dest, char* src)
+popcountb_128 (unsigned char * __restrict dest, unsigned char* src)
{
for (int i = 0; i != 16; i++)
dest[i] = __builtin_popcount (src[i]);
@@ -21,7 +20,7 @@ popcountb_128 (char * __restrict dest, char* src)
void
__attribute__ ((noipa, optimize("-O3")))
-popcountw_128 (short* __restrict dest, short* src)
+popcountw_128 (unsigned short* __restrict dest, unsigned short* src)
{
for (int i = 0; i != 8; i++)
dest[i] = __builtin_popcount (src[i]);
@@ -29,7 +28,7 @@ popcountw_128 (short* __restrict dest, short* src)
void
__attribute__ ((noipa, optimize("-O3")))
-popcountb_256 (char * __restrict dest, char* src)
+popcountb_256 (unsigned char * __restrict dest, unsigned char* src)
{
for (int i = 0; i != 32; i++)
dest[i] = __builtin_popcount (src[i]);
@@ -37,7 +36,7 @@ popcountb_256 (char * __restrict dest, char* src)
void
__attribute__ ((noipa, optimize("-O3")))
-popcountw_256 (short* __restrict dest, short* src)
+popcountw_256 (unsigned short* __restrict dest, unsigned short* src)
{
for (int i = 0; i != 16; i++)
dest[i] = __builtin_popcount (src[i]);
@@ -45,7 +44,7 @@ popcountw_256 (short* __restrict dest, short* src)
void
__attribute__ ((noipa, optimize("-O3")))
-popcountb_512 (char * __restrict dest, char* src)
+popcountb_512 (unsigned char * __restrict dest, unsigned char* src)
{
for (int i = 0; i != 64; i++)
dest[i] = __builtin_popcount (src[i]);
@@ -53,7 +52,7 @@ popcountb_512 (char * __restrict dest, char* src)
void
__attribute__ ((noipa, optimize("-O3")))
-popcountw_512 (short* __restrict dest, short* src)
+popcountw_512 (unsigned short* __restrict dest, unsigned short* src)
{
for (int i = 0; i != 32; i++)
dest[i] = __builtin_popcount (src[i]);
diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
index 63bb00d9b4a..dedd2e4c3d6 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
@@ -1,13 +1,12 @@
/* PR target/97770 */
/* { dg-do compile } */
-/* { dg-options "-O2 -mavx512vpopcntdq -mavx512vl -mprefer-vector-width=512" } */
+/* { dg-options "-O2 -march=icelake-server -mprefer-vector-width=512" } */
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
-/* Add xfail since current vectorizor cannot generate expected code for DImode popcount */
-/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*xmm" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*ymm" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*zmm" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
#ifndef AVX512VPOPCNTQ_H_INCLUDED
#define AVX512VPOPCNTQ_H_INCLUDED
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index 177d44ebb5e..59727056dc7 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -1292,6 +1292,117 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
"vect_recog_widen_minus_pattern");
}
+/* Function vect_recog_popcount_pattern
+
+ Try to find the following pattern:
+
+ UTYPE1 A;
+ TYPE1 B;
+ UTYPE2 temp_in;
+ TYPE3 temp_out;
+ temp_in = (TYPE2)A;
+
+ temp_out = __builtin_popcount{,l,ll} (temp_in);
+ B = (TYPE1) temp_out;
+
+ TYPE2 may or may not be equal to TYPE3.
+ i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
+ i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
+
+ Input:
+
+ * STMT_VINFO: The stmt from which the pattern search begins.
+ here it starts with B = (TYPE1) temp_out;
+
+ Output:
+
+ * TYPE_OUT: The vector type of the output of this pattern.
+
+ * Return value: A new stmt that will be used to replace the sequence of
+ stmts that constitute the pattern. In this case it will be:
+ B = .POPCOUNT (A);
+*/
+
+static gimple *
+vect_recog_popcount_pattern (vec_info *vinfo,
+ stmt_vec_info stmt_vinfo, tree *type_out)
+{
+ gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
+ gimple *popcount_stmt, *pattern_stmt;
+ tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
+ auto_vec<tree> vargs;
+
+ /* Find B = (TYPE1) temp_out. */
+ if (!last_stmt)
+ return NULL;
+ tree_code code = gimple_assign_rhs_code (last_stmt);
+ if (!CONVERT_EXPR_CODE_P (code))
+ return NULL;
+
+ lhs_oprnd = gimple_assign_lhs (last_stmt);
+ lhs_type = TREE_TYPE (lhs_oprnd);
+ if (!INTEGRAL_TYPE_P (lhs_type))
+ return NULL;
+
+ rhs_oprnd = gimple_assign_rhs1 (last_stmt);
+ if (TREE_CODE (rhs_oprnd) != SSA_NAME
+ || !has_single_use (rhs_oprnd))
+ return NULL;
+ popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
+
+ /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
+ if (!is_gimple_call (popcount_stmt))
+ return NULL;
+ switch (gimple_call_combined_fn (popcount_stmt))
+ {
+ CASE_CFN_POPCOUNT:
+ break;
+ default:
+ return NULL;
+ }
+
+ if (gimple_call_num_args (popcount_stmt) != 1)
+ return NULL;
+
+ rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
+ vect_unpromoted_value unprom_diff;
+ rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
+ &unprom_diff);
+
+ if (!rhs_origin)
+ return NULL;
+
+ /* Input and outout of .POPCOUNT should be same-precision integer.
+ Also A should be unsigned or same presion as temp_in,
+ otherwise there would be sign_extend from A to temp_in. */
+ if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
+ || (!TYPE_UNSIGNED (unprom_diff.type)
+ && (TYPE_PRECISION (unprom_diff.type)
+ != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
+ return NULL;
+ vargs.safe_push (unprom_diff.op);
+
+ vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
+ vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
+ /* Do it only the backend existed popcount<vector_mode>2. */
+ if (!direct_internal_fn_supported_p (IFN_POPCOUNT,
+ vec_type,
+ OPTIMIZE_FOR_SPEED))
+ return NULL;
+
+ /* Create B = .POPCOUNT (A). */
+ new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
+ pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
+ gimple_call_set_lhs (pattern_stmt, new_var);
+ gimple_set_location (pattern_stmt, gimple_location (last_stmt));
+ *type_out = vec_type;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "created pattern stmt: %G", pattern_stmt);
+ return pattern_stmt;
+}
+
/* Function vect_recog_pow_pattern
Try to find the following pattern:
@@ -5283,6 +5394,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
{ vect_recog_sad_pattern, "sad" },
{ vect_recog_widen_sum_pattern, "widen_sum" },
{ vect_recog_pow_pattern, "pow" },
+ { vect_recog_popcount_pattern, "popcount" },
{ vect_recog_widen_shift_pattern, "widen_shift" },
{ vect_recog_rotate_pattern, "rotate" },
{ vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
--
2.18.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] Add vect_recog_popcount_pattern to handle mismatch between the vectorized popcount IFN and scalar popcount builtin.
2021-06-22 2:43 ` Hongtao Liu
@ 2021-06-22 2:44 ` Hongtao Liu
0 siblings, 0 replies; 4+ messages in thread
From: Hongtao Liu @ 2021-06-22 2:44 UTC (permalink / raw)
To: Richard Biener; +Cc: liuhongt, GCC Patches, H. J. Lu
On Tue, Jun 22, 2021 at 10:43 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Mon, Jun 21, 2021 at 6:05 PM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> > On Thu, Jun 17, 2021 at 8:29 AM liuhongt <hongtao.liu@intel.com> wrote:
> > >
> > > The patch remove those pro- and demotions when backend support direct
> > > optab.
> > >
> > > For i386: it enables vectorization for vpopcntb/vpopcntw and optimized
> > > for vpopcntq.
> > >
> > > gcc/ChangeLog:
> > >
> > > PR tree-optimization/97770
> > > * tree-vect-patterns.c (vect_recog_popcount_pattern):
> > > New.
> > > (vect_recog_func vect_vect_recog_func_ptrs): Add new pattern.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > PR tree-optimization/97770
> > > * gcc.target/i386/avx512bitalg-pr97770-1.c: Remove xfail.
> > > * gcc.target/i386/avx512vpopcntdq-pr97770-1.c: Remove xfail.
> > > ---
> > > .../gcc.target/i386/avx512bitalg-pr97770-1.c | 27 +++--
> > > .../i386/avx512vpopcntdq-pr97770-1.c | 9 +-
> > > gcc/tree-vect-patterns.c | 110 ++++++++++++++++++
> > > 3 files changed, 127 insertions(+), 19 deletions(-)
> > >
> > > diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
> > > index c83a477045c..d1beec4cdb4 100644
> > > --- a/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
> > > +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
> > > @@ -1,19 +1,18 @@
> > > /* PR target/97770 */
> > > /* { dg-do compile } */
> > > -/* { dg-options "-O2 -mavx512bitalg -mavx512vl -mprefer-vector-width=512" } */
> > > -/* Add xfail since no IFN for QI/HImode popcount */
> > > -/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
> > > -/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
> > > -/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
> > > -/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
> > > -/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
> > > -/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
> > > +/* { dg-options "-O2 -march=icelake-server -mprefer-vector-width=512" } */
> > > +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
> > >
> > > #include <immintrin.h>
> > >
> > > void
> > > __attribute__ ((noipa, optimize("-O3")))
> > > -popcountb_128 (char * __restrict dest, char* src)
> > > +popcountb_128 (unsigned char * __restrict dest, unsigned char* src)
> > > {
> > > for (int i = 0; i != 16; i++)
> > > dest[i] = __builtin_popcount (src[i]);
> > > @@ -21,7 +20,7 @@ popcountb_128 (char * __restrict dest, char* src)
> > >
> > > void
> > > __attribute__ ((noipa, optimize("-O3")))
> > > -popcountw_128 (short* __restrict dest, short* src)
> > > +popcountw_128 (unsigned short* __restrict dest, unsigned short* src)
> > > {
> > > for (int i = 0; i != 8; i++)
> > > dest[i] = __builtin_popcount (src[i]);
> > > @@ -29,7 +28,7 @@ popcountw_128 (short* __restrict dest, short* src)
> > >
> > > void
> > > __attribute__ ((noipa, optimize("-O3")))
> > > -popcountb_256 (char * __restrict dest, char* src)
> > > +popcountb_256 (unsigned char * __restrict dest, unsigned char* src)
> > > {
> > > for (int i = 0; i != 32; i++)
> > > dest[i] = __builtin_popcount (src[i]);
> > > @@ -37,7 +36,7 @@ popcountb_256 (char * __restrict dest, char* src)
> > >
> > > void
> > > __attribute__ ((noipa, optimize("-O3")))
> > > -popcountw_256 (short* __restrict dest, short* src)
> > > +popcountw_256 (unsigned short* __restrict dest, unsigned short* src)
> > > {
> > > for (int i = 0; i != 16; i++)
> > > dest[i] = __builtin_popcount (src[i]);
> > > @@ -45,7 +44,7 @@ popcountw_256 (short* __restrict dest, short* src)
> > >
> > > void
> > > __attribute__ ((noipa, optimize("-O3")))
> > > -popcountb_512 (char * __restrict dest, char* src)
> > > +popcountb_512 (unsigned char * __restrict dest, unsigned char* src)
> > > {
> > > for (int i = 0; i != 64; i++)
> > > dest[i] = __builtin_popcount (src[i]);
> > > @@ -53,7 +52,7 @@ popcountb_512 (char * __restrict dest, char* src)
> > >
> > > void
> > > __attribute__ ((noipa, optimize("-O3")))
> > > -popcountw_512 (short* __restrict dest, short* src)
> > > +popcountw_512 (unsigned short* __restrict dest, unsigned short* src)
> > > {
> > > for (int i = 0; i != 32; i++)
> > > dest[i] = __builtin_popcount (src[i]);
> > > diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
> > > index 63bb00d9b4a..dedd2e4c3d6 100644
> > > --- a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
> > > +++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
> > > @@ -1,13 +1,12 @@
> > > /* PR target/97770 */
> > > /* { dg-do compile } */
> > > -/* { dg-options "-O2 -mavx512vpopcntdq -mavx512vl -mprefer-vector-width=512" } */
> > > +/* { dg-options "-O2 -march=icelake-server -mprefer-vector-width=512" } */
> > > /* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
> > > /* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
> > > /* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
> > > -/* Add xfail since current vectorizor cannot generate expected code for DImode popcount */
> > > -/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*xmm" 1 { xfail *-*-* } } } */
> > > -/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*ymm" 1 { xfail *-*-* } } } */
> > > -/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*zmm" 1 { xfail *-*-* } } } */
> > > +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
> > > #ifndef AVX512VPOPCNTQ_H_INCLUDED
> > > #define AVX512VPOPCNTQ_H_INCLUDED
> > >
> > > diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
> > > index 177d44ebb5e..5c80800efbb 100644
> > > --- a/gcc/tree-vect-patterns.c
> > > +++ b/gcc/tree-vect-patterns.c
> > > @@ -1292,6 +1292,115 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
> > > "vect_recog_widen_minus_pattern");
> > > }
> > >
> > > +/* Function vect_recog_popcount_pattern
> > > +
> > > + Try to find the following pattern:
> > > +
> > > + UTYPE1 A;
> > > + TYPE1 B;
> > > + UTYPE2 temp_in;
> > > + TYPE3 temp_out;
> > > + temp_in = (TYPE2)A;
> > > +
> > > + temp_out = __builtin_popcount{,l,ll} (temp_in);
> > > + B = (TYPE1) temp_out;
> > > +
> > > + TYPE2 may or may not be equal to TYPE3.
> > > + i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
> > > + i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
> > > +
> > > + Input:
> > > +
> > > + * STMT_VINFO: The stmt from which the pattern search begins.
> > > + here it starts with B = (TYPE1) temp_out;
> > > +
> > > + Output:
> > > +
> > > + * TYPE_OUT: The vector type of the output of this pattern.
> > > +
> > > + * Return value: A new stmt that will be used to replace the sequence of
> > > + stmts that constitute the pattern. In this case it will be:
> > > + B = .POPCOUNT (A);
> > > +*/
> > > +
> > > +static gimple *
> > > +vect_recog_popcount_pattern (vec_info *vinfo,
> > > + stmt_vec_info stmt_vinfo, tree *type_out)
> > > +{
> > > + gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
> > > + gimple *popcount_stmt, *pattern_stmt;
> > > + tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
> > > + auto_vec<tree> vargs;
> > > +
> > > + /* Find B = (TYPE1) temp_out. */
> > > + if (!last_stmt)
> > > + return NULL;
> > > + tree_code code = gimple_assign_rhs_code (last_stmt);
> > > + if (!CONVERT_EXPR_CODE_P (code))
> > > + return NULL;
> > > +
> > > + lhs_oprnd = gimple_assign_lhs (last_stmt);
> > > + lhs_type = TREE_TYPE (lhs_oprnd);
> > > + if (TREE_CODE (lhs_type) != INTEGER_TYPE)
> > > + return NULL;
> >
> > INTEGRAL_TYPE_P
> >
> Changed.
> > > + rhs_oprnd = gimple_assign_rhs1 (last_stmt);
> > > + if (TREE_CODE (rhs_oprnd) != SSA_NAME
> > > + || !has_single_use (rhs_oprnd))
> > > + return NULL;
> > > + popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
> > > +
> > > + /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
> > > + if (!is_gimple_call (popcount_stmt)
> > > + || !gimple_call_lhs (popcount_stmt))
> >
> > Since you're arriving here via use-def chain the LHS will
> > never be NULL.
> >
Forgot to mention this part is also changed.
> > > + return NULL;
> > > + switch (gimple_call_combined_fn (popcount_stmt))
> > > + {
> > > + CASE_CFN_POPCOUNT:
> > > + break;
> > > + default:
> > > + return NULL;
> > > + }
> > > +
> >
> > for safety:
> >
> > if (gimple_call_num_args (popcount_stmt) != 1)
> > return NULL;
> >
> Changed.
> > > + rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
> > > + vect_unpromoted_value unprom_diff;
> > > + rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
> > > + &unprom_diff);
> > > +
> > > + if (!rhs_origin)
> > > + return NULL;
> > > +
> > > + /* Input and outout of .POPCOUNT should be same-precision integer.
> > > + Also A should be unsigned or same presion as temp_in,
> > > + otherwise there would be sign_extend from A to temp_in. */
> > > + if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
> > > + || !(TYPE_UNSIGNED (unprom_diff.type)
> > > + || (TYPE_PRECISION (unprom_diff.type)
> > > + == TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
> >
> > Note I find a if (A || !(B || C)) hard to read, please write if (A ||
> > (!B && !C)) instead.
> >
> Changed.
> > OK otherwise.
> >
> > Thanks,
> > Richard.
> >
> > > + return NULL;
> > > + vargs.safe_push (unprom_diff.op);
> > > +
> > > + vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
> > > + vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
> > > + /* Do it only the backend existed popcount<vector_mode>2. */
> > > + if (!direct_internal_fn_supported_p (IFN_POPCOUNT,
> > > + vec_type,
> > > + OPTIMIZE_FOR_SPEED))
> > > + return NULL;
> > > +
> > > + /* Create B = .POPCOUNT (A). */
> > > + new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
> > > + pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
> > > + gimple_call_set_lhs (pattern_stmt, new_var);
> > > + gimple_set_location (pattern_stmt, gimple_location (last_stmt));
> > > + *type_out = vec_type;
> > > +
> > > + if (dump_enabled_p ())
> > > + dump_printf_loc (MSG_NOTE, vect_location,
> > > + "created pattern stmt: %G", pattern_stmt);
> > > + return pattern_stmt;
> > > +}
> > > +
> > > /* Function vect_recog_pow_pattern
> > >
> > > Try to find the following pattern:
> > > @@ -5283,6 +5392,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
> > > { vect_recog_sad_pattern, "sad" },
> > > { vect_recog_widen_sum_pattern, "widen_sum" },
> > > { vect_recog_pow_pattern, "pow" },
> > > + { vect_recog_popcount_pattern, "popcount" },
> > > { vect_recog_widen_shift_pattern, "widen_shift" },
> > > { vect_recog_rotate_pattern, "rotate" },
> > > { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
> > > --
> > > 2.18.1
> > >
>
> Thanks for the review, here is the patch I'm checking in.
>
> --
> BR,
> Hongtao
--
BR,
Hongtao
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-06-22 2:39 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-17 6:29 [PATCH] Add vect_recog_popcount_pattern to handle mismatch between the vectorized popcount IFN and scalar popcount builtin liuhongt
2021-06-21 10:05 ` Richard Biener
2021-06-22 2:43 ` Hongtao Liu
2021-06-22 2:44 ` Hongtao Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).