public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Check the type of mask while generating cond_op in gimple simplication.
@ 2021-08-27  6:52 liuhongt
  2021-08-30 12:24 ` Richard Biener
  0 siblings, 1 reply; 14+ messages in thread
From: liuhongt @ 2021-08-27  6:52 UTC (permalink / raw)
  To: gcc-patches; +Cc: hjl.tools, pinskia, richard.sandiford, richard.guenther

  When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
it doesn't check if mask type matches. It causes an ICE when expand cond_op
with mismatched mode.
  This patch add a function named cond_vectorized_internal_fn_supported_p
 to additionally check mask type than vectorized_internal_fn_supported_p.

  Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
  Ok for trunk?

gcc/ChangeLog:

	PR middle-end/102080
	* internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
	* internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
	* match.pd: Check the type of mask while generating cond_op in
	gimple simplication.

gcc/testsuite/ChangeLog:

	PR middle-end/102080
	* gcc.target/i386/pr102080.c: New test.
---
 gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
 gcc/internal-fn.h                        |  1 +
 gcc/match.pd                             | 24 ++++++++++++++++--------
 gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
 4 files changed, 55 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c

diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 1360a00f0b9..8b2b65db1a7 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
   expand_internal_call (gimple_call_internal_fn (stmt), stmt);
 }
 
+/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
+   doesn't check if mask type matches.  */
+bool
+cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
+					 tree mask_type)
+{
+  if (!vectorized_internal_fn_supported_p (ifn, type))
+    return false;
+
+  machine_mode mask_mode;
+  machine_mode vmode = TYPE_MODE (type);
+  int size1, size2;
+  if (VECTOR_MODE_P (vmode)
+      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
+      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
+      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
+      && size1 != size2)
+    return false;
+
+  return true;
+}
+
 /* If TYPE is a vector type, return true if IFN is a direct internal
    function that is supported for that type.  If TYPE is a scalar type,
    return true if IFN is a direct internal function that is supported for
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 19d0f849a5a..f0aea00103c 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -236,5 +236,6 @@ extern void expand_PHI (internal_fn, gcall *);
 extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
 
 extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
+extern bool cond_vectorized_internal_fn_supported_p (internal_fn, tree, tree);
 
 #endif
diff --git a/gcc/match.pd b/gcc/match.pd
index e5bbb123a6a..72b1bc674db 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6987,14 +6987,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      cond_op (COND_BINARY)
  (simplify
   (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
-  (with { tree op_type = TREE_TYPE (@4); }
-   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+  (with { tree op_type = TREE_TYPE (@4);
+	  tree mask_type = TREE_TYPE (@0); }
+   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
+						 op_type, mask_type)
 	&& element_precision (type) == element_precision (op_type))
     (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
  (simplify
   (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
-  (with { tree op_type = TREE_TYPE (@4); }
-   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+  (with { tree op_type = TREE_TYPE (@4);
+	  tree mask_type = TREE_TYPE (@0); }
+   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
+						 op_type, mask_type)
 	&& element_precision (type) == element_precision (op_type))
     (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
 
@@ -7003,14 +7007,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      cond_op (COND_TERNARY)
  (simplify
   (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
-  (with { tree op_type = TREE_TYPE (@5); }
-   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+  (with { tree op_type = TREE_TYPE (@5);
+	  tree mask_type = TREE_TYPE (@0); }
+   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
+						 op_type, mask_type)
 	&& element_precision (type) == element_precision (op_type))
     (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
  (simplify
   (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
-  (with { tree op_type = TREE_TYPE (@5); }
-   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+  (with { tree op_type = TREE_TYPE (@5);
+	  tree mask_type = TREE_TYPE (@0); }
+   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
+						 op_type, mask_type)
 	&& element_precision (type) == element_precision (op_type))
     (view_convert (cond_op (bit_not @0) @2 @3 @4
 		  (view_convert:op_type @1)))))))
diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
new file mode 100644
index 00000000000..6a40a75e1c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102080.c
@@ -0,0 +1,16 @@
+#include<immintrin.h>
+typedef float __m256 __attribute__((__vector_size__(32)));
+__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
+  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
+
+void
+__attribute__ ((target("avx")))
+IfThenElse (__m256 no) {
+  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
+}
+void
+__attribute__ ((target("avx512vl")))
+EncodedFromDisplay() {
+  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
+  IfThenElse(__trans_tmp_11);
+}
-- 
2.18.1


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Check the type of mask while generating cond_op in gimple simplication.
  2021-08-27  6:52 [PATCH] Check the type of mask while generating cond_op in gimple simplication liuhongt
@ 2021-08-30 12:24 ` Richard Biener
  2021-08-31 10:24   ` Hongtao Liu
  0 siblings, 1 reply; 14+ messages in thread
From: Richard Biener @ 2021-08-30 12:24 UTC (permalink / raw)
  To: liuhongt; +Cc: GCC Patches, H. J. Lu, Andrew Pinski, Richard Sandiford

On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
>
>   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> it doesn't check if mask type matches. It causes an ICE when expand cond_op
> with mismatched mode.
>   This patch add a function named cond_vectorized_internal_fn_supported_p
>  to additionally check mask type than vectorized_internal_fn_supported_p.
>
>   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
>   Ok for trunk?
>
> gcc/ChangeLog:
>
>         PR middle-end/102080
>         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
>         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
>         * match.pd: Check the type of mask while generating cond_op in
>         gimple simplication.
>
> gcc/testsuite/ChangeLog:
>
>         PR middle-end/102080
>         * gcc.target/i386/pr102080.c: New test.
> ---
>  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
>  gcc/internal-fn.h                        |  1 +
>  gcc/match.pd                             | 24 ++++++++++++++++--------
>  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
>  4 files changed, 55 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
>
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index 1360a00f0b9..8b2b65db1a7 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
>    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
>  }
>
> +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> +   doesn't check if mask type matches.  */
> +bool
> +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> +                                        tree mask_type)
> +{
> +  if (!vectorized_internal_fn_supported_p (ifn, type))
> +    return false;
> +
> +  machine_mode mask_mode;
> +  machine_mode vmode = TYPE_MODE (type);
> +  int size1, size2;
> +  if (VECTOR_MODE_P (vmode)
> +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> +      && size1 != size2)

Why do we check for equal size rather than just mode equality which
I think would work for non-constant sized modes as well?  And when
using sizes you'd instead use maybe_ne (GET_MODE_SIZE (mask_mode),
GET_MODE_SIZE (TYPE_MODE (mask_type)))

Thanks,
Richard.

> +    return false;
> +
> +  return true;
> +}
> +
>  /* If TYPE is a vector type, return true if IFN is a direct internal
>     function that is supported for that type.  If TYPE is a scalar type,
>     return true if IFN is a direct internal function that is supported for
> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> index 19d0f849a5a..f0aea00103c 100644
> --- a/gcc/internal-fn.h
> +++ b/gcc/internal-fn.h
> @@ -236,5 +236,6 @@ extern void expand_PHI (internal_fn, gcall *);
>  extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
>
>  extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
> +extern bool cond_vectorized_internal_fn_supported_p (internal_fn, tree, tree);
>
>  #endif
> diff --git a/gcc/match.pd b/gcc/match.pd
> index e5bbb123a6a..72b1bc674db 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -6987,14 +6987,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>       cond_op (COND_BINARY)
>   (simplify
>    (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
> -  (with { tree op_type = TREE_TYPE (@4); }
> -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> +  (with { tree op_type = TREE_TYPE (@4);
> +         tree mask_type = TREE_TYPE (@0); }
> +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> +                                                op_type, mask_type)
>         && element_precision (type) == element_precision (op_type))
>      (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
>   (simplify
>    (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
> -  (with { tree op_type = TREE_TYPE (@4); }
> -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> +  (with { tree op_type = TREE_TYPE (@4);
> +         tree mask_type = TREE_TYPE (@0); }
> +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> +                                                op_type, mask_type)
>         && element_precision (type) == element_precision (op_type))
>      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
>
> @@ -7003,14 +7007,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>       cond_op (COND_TERNARY)
>   (simplify
>    (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
> -  (with { tree op_type = TREE_TYPE (@5); }
> -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> +  (with { tree op_type = TREE_TYPE (@5);
> +         tree mask_type = TREE_TYPE (@0); }
> +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> +                                                op_type, mask_type)
>         && element_precision (type) == element_precision (op_type))
>      (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
>   (simplify
>    (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
> -  (with { tree op_type = TREE_TYPE (@5); }
> -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> +  (with { tree op_type = TREE_TYPE (@5);
> +         tree mask_type = TREE_TYPE (@0); }
> +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> +                                                op_type, mask_type)
>         && element_precision (type) == element_precision (op_type))
>      (view_convert (cond_op (bit_not @0) @2 @3 @4
>                   (view_convert:op_type @1)))))))
> diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
> new file mode 100644
> index 00000000000..6a40a75e1c5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102080.c
> @@ -0,0 +1,16 @@
> +#include<immintrin.h>
> +typedef float __m256 __attribute__((__vector_size__(32)));
> +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
> +  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
> +
> +void
> +__attribute__ ((target("avx")))
> +IfThenElse (__m256 no) {
> +  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
> +}
> +void
> +__attribute__ ((target("avx512vl")))
> +EncodedFromDisplay() {
> +  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
> +  IfThenElse(__trans_tmp_11);
> +}
> --
> 2.18.1
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Check the type of mask while generating cond_op in gimple simplication.
  2021-08-30 12:24 ` Richard Biener
@ 2021-08-31 10:24   ` Hongtao Liu
  2021-08-31 11:56     ` Richard Biener
  0 siblings, 1 reply; 14+ messages in thread
From: Hongtao Liu @ 2021-08-31 10:24 UTC (permalink / raw)
  To: Richard Biener; +Cc: liuhongt, Richard Sandiford, GCC Patches

[-- Attachment #1: Type: text/plain, Size: 7354 bytes --]

On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
> >
> >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> > it doesn't check if mask type matches. It causes an ICE when expand cond_op
> > with mismatched mode.
> >   This patch add a function named cond_vectorized_internal_fn_supported_p
> >  to additionally check mask type than vectorized_internal_fn_supported_p.
> >
> >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> >   Ok for trunk?
> >
> > gcc/ChangeLog:
> >
> >         PR middle-end/102080
> >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
> >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
> >         * match.pd: Check the type of mask while generating cond_op in
> >         gimple simplication.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         PR middle-end/102080
> >         * gcc.target/i386/pr102080.c: New test.
> > ---
> >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
> >  gcc/internal-fn.h                        |  1 +
> >  gcc/match.pd                             | 24 ++++++++++++++++--------
> >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
> >  4 files changed, 55 insertions(+), 8 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> >
> > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> > index 1360a00f0b9..8b2b65db1a7 100644
> > --- a/gcc/internal-fn.c
> > +++ b/gcc/internal-fn.c
> > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
> >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
> >  }
> >
> > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> > +   doesn't check if mask type matches.  */
> > +bool
> > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> > +                                        tree mask_type)
> > +{
> > +  if (!vectorized_internal_fn_supported_p (ifn, type))
> > +    return false;
> > +
> > +  machine_mode mask_mode;
> > +  machine_mode vmode = TYPE_MODE (type);
> > +  int size1, size2;
> > +  if (VECTOR_MODE_P (vmode)
> > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> > +      && size1 != size2)
>
> Why do we check for equal size rather than just mode equality which
I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
not QImode, Changed the patch to check mode equality.
Update patch.

> I think would work for non-constant sized modes as well?  And when
> using sizes you'd instead use maybe_ne (GET_MODE_SIZE (mask_mode),
> GET_MODE_SIZE (TYPE_MODE (mask_type)))
>
> Thanks,
> Richard.
>
> > +    return false;
> > +
> > +  return true;
> > +}
> > +
> >  /* If TYPE is a vector type, return true if IFN is a direct internal
> >     function that is supported for that type.  If TYPE is a scalar type,
> >     return true if IFN is a direct internal function that is supported for
> > diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> > index 19d0f849a5a..f0aea00103c 100644
> > --- a/gcc/internal-fn.h
> > +++ b/gcc/internal-fn.h
> > @@ -236,5 +236,6 @@ extern void expand_PHI (internal_fn, gcall *);
> >  extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
> >
> >  extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
> > +extern bool cond_vectorized_internal_fn_supported_p (internal_fn, tree, tree);
> >
> >  #endif
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index e5bbb123a6a..72b1bc674db 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -6987,14 +6987,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >       cond_op (COND_BINARY)
> >   (simplify
> >    (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
> > -  (with { tree op_type = TREE_TYPE (@4); }
> > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > +  (with { tree op_type = TREE_TYPE (@4);
> > +         tree mask_type = TREE_TYPE (@0); }
> > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > +                                                op_type, mask_type)
> >         && element_precision (type) == element_precision (op_type))
> >      (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
> >   (simplify
> >    (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
> > -  (with { tree op_type = TREE_TYPE (@4); }
> > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > +  (with { tree op_type = TREE_TYPE (@4);
> > +         tree mask_type = TREE_TYPE (@0); }
> > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > +                                                op_type, mask_type)
> >         && element_precision (type) == element_precision (op_type))
> >      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
> >
> > @@ -7003,14 +7007,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >       cond_op (COND_TERNARY)
> >   (simplify
> >    (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
> > -  (with { tree op_type = TREE_TYPE (@5); }
> > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > +  (with { tree op_type = TREE_TYPE (@5);
> > +         tree mask_type = TREE_TYPE (@0); }
> > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > +                                                op_type, mask_type)
> >         && element_precision (type) == element_precision (op_type))
> >      (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
> >   (simplify
> >    (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
> > -  (with { tree op_type = TREE_TYPE (@5); }
> > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > +  (with { tree op_type = TREE_TYPE (@5);
> > +         tree mask_type = TREE_TYPE (@0); }
> > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > +                                                op_type, mask_type)
> >         && element_precision (type) == element_precision (op_type))
> >      (view_convert (cond_op (bit_not @0) @2 @3 @4
> >                   (view_convert:op_type @1)))))))
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
> > new file mode 100644
> > index 00000000000..6a40a75e1c5
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102080.c
> > @@ -0,0 +1,16 @@
> > +#include<immintrin.h>
> > +typedef float __m256 __attribute__((__vector_size__(32)));
> > +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
> > +  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
> > +
> > +void
> > +__attribute__ ((target("avx")))
> > +IfThenElse (__m256 no) {
> > +  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
> > +}
> > +void
> > +__attribute__ ((target("avx512vl")))
> > +EncodedFromDisplay() {
> > +  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
> > +  IfThenElse(__trans_tmp_11);
> > +}
> > --
> > 2.18.1
> >



-- 
BR,
Hongtao

[-- Attachment #2: 0001-Check-the-type-of-mask-while-generating-cond_op-in-g.patch --]
[-- Type: text/x-patch, Size: 5654 bytes --]

From 5ace48ad0f30d234e3f2e25503a4f041138b4952 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Fri, 27 Aug 2021 12:50:13 +0800
Subject: [PATCH] Check the type of mask while generating cond_op in gimple
 simplication.

gcc/ChangeLog:

	PR middle-end/102080
	* internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
	* internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
	* match.pd: Check the type of mask while generating cond_op in
	gimple simplication.

gcc/testsuite/ChangeLog:

	PR middle-end/102080
	* gcc.target/i386/pr102080.c: New test.
---
 gcc/internal-fn.c                        | 20 ++++++++++++++++++++
 gcc/internal-fn.h                        |  1 +
 gcc/match.pd                             | 24 ++++++++++++++++--------
 gcc/testsuite/gcc.target/i386/pr102080.c | 19 +++++++++++++++++++
 4 files changed, 56 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c

diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 1360a00f0b9..7f643f545d9 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -4102,6 +4102,26 @@ expand_internal_call (gcall *stmt)
   expand_internal_call (gimple_call_internal_fn (stmt), stmt);
 }
 
+/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
+   doesn't check if mask type matches.  */
+bool
+cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
+					 tree mask_type)
+{
+  if (!vectorized_internal_fn_supported_p (ifn, type))
+    return false;
+
+  machine_mode mask_mode;
+  machine_mode vmode = TYPE_MODE (type);
+  int size1, size2;
+  if (VECTOR_MODE_P (vmode)
+      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
+      && mask_mode != TYPE_MODE (mask_type))
+    return false;
+
+  return true;
+}
+
 /* If TYPE is a vector type, return true if IFN is a direct internal
    function that is supported for that type.  If TYPE is a scalar type,
    return true if IFN is a direct internal function that is supported for
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 19d0f849a5a..f0aea00103c 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -236,5 +236,6 @@ extern void expand_PHI (internal_fn, gcall *);
 extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
 
 extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
+extern bool cond_vectorized_internal_fn_supported_p (internal_fn, tree, tree);
 
 #endif
diff --git a/gcc/match.pd b/gcc/match.pd
index f421c74b62c..ded68ec6c71 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6986,14 +6986,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      cond_op (COND_BINARY)
  (simplify
   (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
-  (with { tree op_type = TREE_TYPE (@4); }
-   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+  (with { tree op_type = TREE_TYPE (@4);
+	  tree mask_type = TREE_TYPE (@0); }
+   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
+						 op_type, mask_type)
 	&& element_precision (type) == element_precision (op_type))
     (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
  (simplify
   (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
-  (with { tree op_type = TREE_TYPE (@4); }
-   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+  (with { tree op_type = TREE_TYPE (@4);
+	  tree mask_type = TREE_TYPE (@0); }
+   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
+						 op_type, mask_type)
 	&& element_precision (type) == element_precision (op_type))
     (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
 
@@ -7002,14 +7006,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      cond_op (COND_TERNARY)
  (simplify
   (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
-  (with { tree op_type = TREE_TYPE (@5); }
-   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+  (with { tree op_type = TREE_TYPE (@5);
+	  tree mask_type = TREE_TYPE (@0); }
+   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
+						 op_type, mask_type)
 	&& element_precision (type) == element_precision (op_type))
     (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
  (simplify
   (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
-  (with { tree op_type = TREE_TYPE (@5); }
-   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+  (with { tree op_type = TREE_TYPE (@5);
+	  tree mask_type = TREE_TYPE (@0); }
+   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
+						 op_type, mask_type)
 	&& element_precision (type) == element_precision (op_type))
     (view_convert (cond_op (bit_not @0) @2 @3 @4
 		  (view_convert:op_type @1)))))))
diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
new file mode 100644
index 00000000000..4c5ee32ee63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102080.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include<immintrin.h>
+typedef float __m256 __attribute__((__vector_size__(32)));
+__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
+  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
+
+void
+__attribute__ ((target("avx")))
+IfThenElse (__m256 no) {
+  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
+}
+void
+__attribute__ ((target("avx512vl")))
+EncodedFromDisplay() {
+  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
+  IfThenElse(__trans_tmp_11);
+}
-- 
2.18.1


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Check the type of mask while generating cond_op in gimple simplication.
  2021-08-31 10:24   ` Hongtao Liu
@ 2021-08-31 11:56     ` Richard Biener
  2021-09-01  6:33       ` Hongtao Liu
  0 siblings, 1 reply; 14+ messages in thread
From: Richard Biener @ 2021-08-31 11:56 UTC (permalink / raw)
  To: Hongtao Liu; +Cc: liuhongt, Richard Sandiford, GCC Patches

On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
> > >
> > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
> > > with mismatched mode.
> > >   This patch add a function named cond_vectorized_internal_fn_supported_p
> > >  to additionally check mask type than vectorized_internal_fn_supported_p.
> > >
> > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> > >   Ok for trunk?
> > >
> > > gcc/ChangeLog:
> > >
> > >         PR middle-end/102080
> > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
> > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
> > >         * match.pd: Check the type of mask while generating cond_op in
> > >         gimple simplication.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >         PR middle-end/102080
> > >         * gcc.target/i386/pr102080.c: New test.
> > > ---
> > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
> > >  gcc/internal-fn.h                        |  1 +
> > >  gcc/match.pd                             | 24 ++++++++++++++++--------
> > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
> > >  4 files changed, 55 insertions(+), 8 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> > >
> > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> > > index 1360a00f0b9..8b2b65db1a7 100644
> > > --- a/gcc/internal-fn.c
> > > +++ b/gcc/internal-fn.c
> > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
> > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
> > >  }
> > >
> > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> > > +   doesn't check if mask type matches.  */
> > > +bool
> > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> > > +                                        tree mask_type)
> > > +{
> > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
> > > +    return false;
> > > +
> > > +  machine_mode mask_mode;
> > > +  machine_mode vmode = TYPE_MODE (type);
> > > +  int size1, size2;
> > > +  if (VECTOR_MODE_P (vmode)
> > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> > > +      && size1 != size2)
> >
> > Why do we check for equal size rather than just mode equality which
> I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
> not QImode, Changed the patch to check mode equality.
> Update patch.

Looking at all this it seems the match.pd patterns should have not
used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
which is equivalent here because we're always working with vector modes?

And then shouldn't we look at the actual optab whether the mask mode matches
the expectation rather than going around via the target hook which may not have
enough context to decide which mask mode to use?

In any case if the approach of the patch is correct shouldn't it do

  if (VECTOR_MODE_P (vmode)
      && (!targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
             || mask_mode != TYPE_MODE (mask_type)))
    return false;

that is, not return true if there's no mask mode for the data mode?

Given the first observation should we call the function
direct_cond_internal_fn_supported_p () instead and as to the second
observation, look at the optab operands mode?

Richard.

> > I think would work for non-constant sized modes as well?  And when
> > using sizes you'd instead use maybe_ne (GET_MODE_SIZE (mask_mode),
> > GET_MODE_SIZE (TYPE_MODE (mask_type)))
> >
> > Thanks,
> > Richard.
> >
> > > +    return false;
> > > +
> > > +  return true;
> > > +}
> > > +
> > >  /* If TYPE is a vector type, return true if IFN is a direct internal
> > >     function that is supported for that type.  If TYPE is a scalar type,
> > >     return true if IFN is a direct internal function that is supported for
> > > diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> > > index 19d0f849a5a..f0aea00103c 100644
> > > --- a/gcc/internal-fn.h
> > > +++ b/gcc/internal-fn.h
> > > @@ -236,5 +236,6 @@ extern void expand_PHI (internal_fn, gcall *);
> > >  extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
> > >
> > >  extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
> > > +extern bool cond_vectorized_internal_fn_supported_p (internal_fn, tree, tree);
> > >
> > >  #endif
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index e5bbb123a6a..72b1bc674db 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -6987,14 +6987,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >       cond_op (COND_BINARY)
> > >   (simplify
> > >    (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
> > > -  (with { tree op_type = TREE_TYPE (@4); }
> > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > +  (with { tree op_type = TREE_TYPE (@4);
> > > +         tree mask_type = TREE_TYPE (@0); }
> > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > +                                                op_type, mask_type)
> > >         && element_precision (type) == element_precision (op_type))
> > >      (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
> > >   (simplify
> > >    (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
> > > -  (with { tree op_type = TREE_TYPE (@4); }
> > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > +  (with { tree op_type = TREE_TYPE (@4);
> > > +         tree mask_type = TREE_TYPE (@0); }
> > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > +                                                op_type, mask_type)
> > >         && element_precision (type) == element_precision (op_type))
> > >      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
> > >
> > > @@ -7003,14 +7007,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >       cond_op (COND_TERNARY)
> > >   (simplify
> > >    (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
> > > -  (with { tree op_type = TREE_TYPE (@5); }
> > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > +  (with { tree op_type = TREE_TYPE (@5);
> > > +         tree mask_type = TREE_TYPE (@0); }
> > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > +                                                op_type, mask_type)
> > >         && element_precision (type) == element_precision (op_type))
> > >      (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
> > >   (simplify
> > >    (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
> > > -  (with { tree op_type = TREE_TYPE (@5); }
> > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > +  (with { tree op_type = TREE_TYPE (@5);
> > > +         tree mask_type = TREE_TYPE (@0); }
> > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > +                                                op_type, mask_type)
> > >         && element_precision (type) == element_precision (op_type))
> > >      (view_convert (cond_op (bit_not @0) @2 @3 @4
> > >                   (view_convert:op_type @1)))))))
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
> > > new file mode 100644
> > > index 00000000000..6a40a75e1c5
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102080.c
> > > @@ -0,0 +1,16 @@
> > > +#include<immintrin.h>
> > > +typedef float __m256 __attribute__((__vector_size__(32)));
> > > +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
> > > +  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
> > > +
> > > +void
> > > +__attribute__ ((target("avx")))
> > > +IfThenElse (__m256 no) {
> > > +  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
> > > +}
> > > +void
> > > +__attribute__ ((target("avx512vl")))
> > > +EncodedFromDisplay() {
> > > +  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
> > > +  IfThenElse(__trans_tmp_11);
> > > +}
> > > --
> > > 2.18.1
> > >
>
>
>
> --
> BR,
> Hongtao

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Check the type of mask while generating cond_op in gimple simplication.
  2021-08-31 11:56     ` Richard Biener
@ 2021-09-01  6:33       ` Hongtao Liu
  2021-09-01 11:14         ` Richard Biener
  0 siblings, 1 reply; 14+ messages in thread
From: Hongtao Liu @ 2021-09-01  6:33 UTC (permalink / raw)
  To: Richard Biener; +Cc: liuhongt, Richard Sandiford, GCC Patches

[-- Attachment #1: Type: text/plain, Size: 9856 bytes --]

On Tue, Aug 31, 2021 at 7:56 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
> > > >
> > > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> > > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
> > > > with mismatched mode.
> > > >   This patch add a function named cond_vectorized_internal_fn_supported_p
> > > >  to additionally check mask type than vectorized_internal_fn_supported_p.
> > > >
> > > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> > > >   Ok for trunk?
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > >         PR middle-end/102080
> > > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
> > > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
> > > >         * match.pd: Check the type of mask while generating cond_op in
> > > >         gimple simplication.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > >         PR middle-end/102080
> > > >         * gcc.target/i386/pr102080.c: New test.
> > > > ---
> > > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
> > > >  gcc/internal-fn.h                        |  1 +
> > > >  gcc/match.pd                             | 24 ++++++++++++++++--------
> > > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
> > > >  4 files changed, 55 insertions(+), 8 deletions(-)
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> > > >
> > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> > > > index 1360a00f0b9..8b2b65db1a7 100644
> > > > --- a/gcc/internal-fn.c
> > > > +++ b/gcc/internal-fn.c
> > > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
> > > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
> > > >  }
> > > >
> > > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> > > > +   doesn't check if mask type matches.  */
> > > > +bool
> > > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> > > > +                                        tree mask_type)
> > > > +{
> > > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
> > > > +    return false;
> > > > +
> > > > +  machine_mode mask_mode;
> > > > +  machine_mode vmode = TYPE_MODE (type);
> > > > +  int size1, size2;
> > > > +  if (VECTOR_MODE_P (vmode)
> > > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> > > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> > > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> > > > +      && size1 != size2)
> > >
> > > Why do we check for equal size rather than just mode equality which
> > I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
> > not QImode, Changed the patch to check mode equality.
> > Update patch.
>
> Looking at all this it seems the match.pd patterns should have not
> used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
> which is equivalent here because we're always working with vector modes?
>
> And then shouldn't we look at the actual optab whether the mask mode matches
> the expectation rather than going around via the target hook which may not have
> enough context to decide which mask mode to use?
How about this?

+/* Return true if target supports cond_op with data TYPE and
+   mask MASK_TYPE.  */
+bool
+cond_internal_fn_supported_p (internal_fn ifn, tree type,
+       tree mask_type)
+{
+  tree_pair types = tree_pair (type, type);
+  optab tmp = direct_internal_fn_optab (ifn, types);
+  machine_mode vmode = TYPE_MODE (type);
+  insn_code icode = direct_optab_handler (tmp, vmode);
+  if (icode == CODE_FOR_nothing)
+    return false;
+
+  machine_mode mask_mode = TYPE_MODE (mask_type);
+  /* Can't create rtx and use insn_operand_matches here.  */
+  return insn_data[icode].operand[0].mode == vmode
+    && insn_data[icode].operand[1].mode == mask_mode;
+}
+

Update patch

>
> In any case if the approach of the patch is correct shouldn't it do
>
>   if (VECTOR_MODE_P (vmode)
>       && (!targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
>              || mask_mode != TYPE_MODE (mask_type)))
>     return false;
>
> that is, not return true if there's no mask mode for the data mode?
>
> Given the first observation should we call the function
> direct_cond_internal_fn_supported_p () instead and as to the second
> observation, look at the optab operands mode?
>
> Richard.
>
> > > I think would work for non-constant sized modes as well?  And when
> > > using sizes you'd instead use maybe_ne (GET_MODE_SIZE (mask_mode),
> > > GET_MODE_SIZE (TYPE_MODE (mask_type)))
> > >
> > > Thanks,
> > > Richard.
> > >
> > > > +    return false;
> > > > +
> > > > +  return true;
> > > > +}
> > > > +
> > > >  /* If TYPE is a vector type, return true if IFN is a direct internal
> > > >     function that is supported for that type.  If TYPE is a scalar type,
> > > >     return true if IFN is a direct internal function that is supported for
> > > > diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> > > > index 19d0f849a5a..f0aea00103c 100644
> > > > --- a/gcc/internal-fn.h
> > > > +++ b/gcc/internal-fn.h
> > > > @@ -236,5 +236,6 @@ extern void expand_PHI (internal_fn, gcall *);
> > > >  extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
> > > >
> > > >  extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
> > > > +extern bool cond_vectorized_internal_fn_supported_p (internal_fn, tree, tree);
> > > >
> > > >  #endif
> > > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > > index e5bbb123a6a..72b1bc674db 100644
> > > > --- a/gcc/match.pd
> > > > +++ b/gcc/match.pd
> > > > @@ -6987,14 +6987,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > > >       cond_op (COND_BINARY)
> > > >   (simplify
> > > >    (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
> > > > -  (with { tree op_type = TREE_TYPE (@4); }
> > > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > > +  (with { tree op_type = TREE_TYPE (@4);
> > > > +         tree mask_type = TREE_TYPE (@0); }
> > > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > > +                                                op_type, mask_type)
> > > >         && element_precision (type) == element_precision (op_type))
> > > >      (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
> > > >   (simplify
> > > >    (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
> > > > -  (with { tree op_type = TREE_TYPE (@4); }
> > > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > > +  (with { tree op_type = TREE_TYPE (@4);
> > > > +         tree mask_type = TREE_TYPE (@0); }
> > > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > > +                                                op_type, mask_type)
> > > >         && element_precision (type) == element_precision (op_type))
> > > >      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
> > > >
> > > > @@ -7003,14 +7007,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > > >       cond_op (COND_TERNARY)
> > > >   (simplify
> > > >    (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
> > > > -  (with { tree op_type = TREE_TYPE (@5); }
> > > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > > +  (with { tree op_type = TREE_TYPE (@5);
> > > > +         tree mask_type = TREE_TYPE (@0); }
> > > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > > +                                                op_type, mask_type)
> > > >         && element_precision (type) == element_precision (op_type))
> > > >      (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
> > > >   (simplify
> > > >    (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
> > > > -  (with { tree op_type = TREE_TYPE (@5); }
> > > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > > +  (with { tree op_type = TREE_TYPE (@5);
> > > > +         tree mask_type = TREE_TYPE (@0); }
> > > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > > +                                                op_type, mask_type)
> > > >         && element_precision (type) == element_precision (op_type))
> > > >      (view_convert (cond_op (bit_not @0) @2 @3 @4
> > > >                   (view_convert:op_type @1)))))))
> > > > diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
> > > > new file mode 100644
> > > > index 00000000000..6a40a75e1c5
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/i386/pr102080.c
> > > > @@ -0,0 +1,16 @@
> > > > +#include<immintrin.h>
> > > > +typedef float __m256 __attribute__((__vector_size__(32)));
> > > > +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
> > > > +  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
> > > > +
> > > > +void
> > > > +__attribute__ ((target("avx")))
> > > > +IfThenElse (__m256 no) {
> > > > +  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
> > > > +}
> > > > +void
> > > > +__attribute__ ((target("avx512vl")))
> > > > +EncodedFromDisplay() {
> > > > +  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
> > > > +  IfThenElse(__trans_tmp_11);
> > > > +}
> > > > --
> > > > 2.18.1
> > > >
> >
> >
> >
> > --
> > BR,
> > Hongtao



-- 
BR,
Hongtao

[-- Attachment #2: v3-0001-Check-the-type-of-mask-while-generating-cond_op-i.patch --]
[-- Type: text/x-patch, Size: 5663 bytes --]

From c9ea1ac434027a9fe0640e2be35fa676b77bc46e Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Fri, 27 Aug 2021 12:50:13 +0800
Subject: [PATCH v3] Check the type of mask while generating cond_op in gimple
 simplication.

gcc/ChangeLog:

	PR middle-end/102080
	* internal-fn.c (cond_internal_fn_supported_p): New functions.
	* internal-fn.h (cond_internal_fn_supported_p): New declaration.
	* match.pd: Check the type of mask while generating cond_op in
	gimple simplication.

gcc/testsuite/ChangeLog:

	PR middle-end/102080
	* gcc.target/i386/pr102080.c: New test.
---
 gcc/internal-fn.c                        | 19 +++++++++++++++++++
 gcc/internal-fn.h                        |  1 +
 gcc/match.pd                             | 24 ++++++++++++++++--------
 gcc/testsuite/gcc.target/i386/pr102080.c | 19 +++++++++++++++++++
 4 files changed, 55 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c

diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 1360a00f0b9..371d0d8b186 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -4102,6 +4102,25 @@ expand_internal_call (gcall *stmt)
   expand_internal_call (gimple_call_internal_fn (stmt), stmt);
 }
 
+/* Return true if target support cond_op with data TYPE and
+   mask MASK_TYPE.  */
+bool
+cond_internal_fn_supported_p (internal_fn ifn, tree type,
+			      tree mask_type)
+{
+  tree_pair types = tree_pair (type, type);
+  optab tmp = direct_internal_fn_optab (ifn, types);
+  machine_mode vmode = TYPE_MODE (type);
+  insn_code icode = direct_optab_handler (tmp, vmode);
+  if (icode == CODE_FOR_nothing)
+    return false;
+
+  machine_mode mask_mode = TYPE_MODE (mask_type);
+  /* Can't create rtx and use insn_operand_matches here.  */
+  return insn_data[icode].operand[0].mode == vmode
+    && insn_data[icode].operand[1].mode == mask_mode;
+}
+
 /* If TYPE is a vector type, return true if IFN is a direct internal
    function that is supported for that type.  If TYPE is a scalar type,
    return true if IFN is a direct internal function that is supported for
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 19d0f849a5a..c78d238cf08 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -236,5 +236,6 @@ extern void expand_PHI (internal_fn, gcall *);
 extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
 
 extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
+extern bool cond_internal_fn_supported_p (internal_fn, tree, tree);
 
 #endif
diff --git a/gcc/match.pd b/gcc/match.pd
index f421c74b62c..560e8f65a9b 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6986,14 +6986,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      cond_op (COND_BINARY)
  (simplify
   (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
-  (with { tree op_type = TREE_TYPE (@4); }
-   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+  (with { tree op_type = TREE_TYPE (@4);
+	  tree mask_type = TREE_TYPE (@0); }
+   (if (cond_internal_fn_supported_p (as_internal_fn (cond_op),
+				      op_type, mask_type)
 	&& element_precision (type) == element_precision (op_type))
     (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
  (simplify
   (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
-  (with { tree op_type = TREE_TYPE (@4); }
-   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+  (with { tree op_type = TREE_TYPE (@4);
+	  tree mask_type = TREE_TYPE (@0); }
+   (if (cond_internal_fn_supported_p (as_internal_fn (cond_op),
+						 op_type, mask_type)
 	&& element_precision (type) == element_precision (op_type))
     (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
 
@@ -7002,14 +7006,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      cond_op (COND_TERNARY)
  (simplify
   (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
-  (with { tree op_type = TREE_TYPE (@5); }
-   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+  (with { tree op_type = TREE_TYPE (@5);
+	  tree mask_type = TREE_TYPE (@0); }
+   (if (cond_internal_fn_supported_p (as_internal_fn (cond_op),
+						 op_type, mask_type)
 	&& element_precision (type) == element_precision (op_type))
     (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
  (simplify
   (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
-  (with { tree op_type = TREE_TYPE (@5); }
-   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+  (with { tree op_type = TREE_TYPE (@5);
+	  tree mask_type = TREE_TYPE (@0); }
+   (if (cond_internal_fn_supported_p (as_internal_fn (cond_op),
+						 op_type, mask_type)
 	&& element_precision (type) == element_precision (op_type))
     (view_convert (cond_op (bit_not @0) @2 @3 @4
 		  (view_convert:op_type @1)))))))
diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
new file mode 100644
index 00000000000..4c5ee32ee63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102080.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include<immintrin.h>
+typedef float __m256 __attribute__((__vector_size__(32)));
+__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
+  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
+
+void
+__attribute__ ((target("avx")))
+IfThenElse (__m256 no) {
+  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
+}
+void
+__attribute__ ((target("avx512vl")))
+EncodedFromDisplay() {
+  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
+  IfThenElse(__trans_tmp_11);
+}
-- 
2.18.1


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Check the type of mask while generating cond_op in gimple simplication.
  2021-09-01  6:33       ` Hongtao Liu
@ 2021-09-01 11:14         ` Richard Biener
  2021-09-01 12:52           ` Richard Sandiford
  0 siblings, 1 reply; 14+ messages in thread
From: Richard Biener @ 2021-09-01 11:14 UTC (permalink / raw)
  To: Hongtao Liu; +Cc: liuhongt, Richard Sandiford, GCC Patches

On Wed, Sep 1, 2021 at 8:28 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Tue, Aug 31, 2021 at 7:56 PM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> > On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
> > >
> > > On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
> > > <gcc-patches@gcc.gnu.org> wrote:
> > > >
> > > > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
> > > > >
> > > > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> > > > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
> > > > > with mismatched mode.
> > > > >   This patch add a function named cond_vectorized_internal_fn_supported_p
> > > > >  to additionally check mask type than vectorized_internal_fn_supported_p.
> > > > >
> > > > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> > > > >   Ok for trunk?
> > > > >
> > > > > gcc/ChangeLog:
> > > > >
> > > > >         PR middle-end/102080
> > > > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
> > > > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
> > > > >         * match.pd: Check the type of mask while generating cond_op in
> > > > >         gimple simplication.
> > > > >
> > > > > gcc/testsuite/ChangeLog:
> > > > >
> > > > >         PR middle-end/102080
> > > > >         * gcc.target/i386/pr102080.c: New test.
> > > > > ---
> > > > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
> > > > >  gcc/internal-fn.h                        |  1 +
> > > > >  gcc/match.pd                             | 24 ++++++++++++++++--------
> > > > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
> > > > >  4 files changed, 55 insertions(+), 8 deletions(-)
> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> > > > >
> > > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> > > > > index 1360a00f0b9..8b2b65db1a7 100644
> > > > > --- a/gcc/internal-fn.c
> > > > > +++ b/gcc/internal-fn.c
> > > > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
> > > > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
> > > > >  }
> > > > >
> > > > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> > > > > +   doesn't check if mask type matches.  */
> > > > > +bool
> > > > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> > > > > +                                        tree mask_type)
> > > > > +{
> > > > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
> > > > > +    return false;
> > > > > +
> > > > > +  machine_mode mask_mode;
> > > > > +  machine_mode vmode = TYPE_MODE (type);
> > > > > +  int size1, size2;
> > > > > +  if (VECTOR_MODE_P (vmode)
> > > > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> > > > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> > > > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> > > > > +      && size1 != size2)
> > > >
> > > > Why do we check for equal size rather than just mode equality which
> > > I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
> > > not QImode, Changed the patch to check mode equality.
> > > Update patch.
> >
> > Looking at all this it seems the match.pd patterns should have not
> > used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
> > which is equivalent here because we're always working with vector modes?
> >
> > And then shouldn't we look at the actual optab whether the mask mode matches
> > the expectation rather than going around via the target hook which may not have
> > enough context to decide which mask mode to use?
> How about this?
>
> +/* Return true if target supports cond_op with data TYPE and
> +   mask MASK_TYPE.  */
> +bool
> +cond_internal_fn_supported_p (internal_fn ifn, tree type,
> +       tree mask_type)
> +{
> +  tree_pair types = tree_pair (type, type);
> +  optab tmp = direct_internal_fn_optab (ifn, types);
> +  machine_mode vmode = TYPE_MODE (type);
> +  insn_code icode = direct_optab_handler (tmp, vmode);
> +  if (icode == CODE_FOR_nothing)
> +    return false;
> +
> +  machine_mode mask_mode = TYPE_MODE (mask_type);
> +  /* Can't create rtx and use insn_operand_matches here.  */
> +  return insn_data[icode].operand[0].mode == vmode
> +    && insn_data[icode].operand[1].mode == mask_mode;
> +}
> +

Yeah, sth like that, though the operand[0].mode test should be
redudnant.  I think we should assert or have a whiltelist
for the internal function we support to be queried this way.
Not sure if we can directly access the 'cond_binary/cond_ternary'
classification used in internal-fn.def, that would be best.

Richard, what are your thoughts about all this?

Thanks,
Richard.

> Update patch
>
> >
> > In any case if the approach of the patch is correct shouldn't it do
> >
> >   if (VECTOR_MODE_P (vmode)
> >       && (!targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> >              || mask_mode != TYPE_MODE (mask_type)))
> >     return false;
> >
> > that is, not return true if there's no mask mode for the data mode?
> >
> > Given the first observation should we call the function
> > direct_cond_internal_fn_supported_p () instead and as to the second
> > observation, look at the optab operands mode?
> >
> > Richard.
> >
> > > > I think would work for non-constant sized modes as well?  And when
> > > > using sizes you'd instead use maybe_ne (GET_MODE_SIZE (mask_mode),
> > > > GET_MODE_SIZE (TYPE_MODE (mask_type)))
> > > >
> > > > Thanks,
> > > > Richard.
> > > >
> > > > > +    return false;
> > > > > +
> > > > > +  return true;
> > > > > +}
> > > > > +
> > > > >  /* If TYPE is a vector type, return true if IFN is a direct internal
> > > > >     function that is supported for that type.  If TYPE is a scalar type,
> > > > >     return true if IFN is a direct internal function that is supported for
> > > > > diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> > > > > index 19d0f849a5a..f0aea00103c 100644
> > > > > --- a/gcc/internal-fn.h
> > > > > +++ b/gcc/internal-fn.h
> > > > > @@ -236,5 +236,6 @@ extern void expand_PHI (internal_fn, gcall *);
> > > > >  extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
> > > > >
> > > > >  extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
> > > > > +extern bool cond_vectorized_internal_fn_supported_p (internal_fn, tree, tree);
> > > > >
> > > > >  #endif
> > > > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > > > index e5bbb123a6a..72b1bc674db 100644
> > > > > --- a/gcc/match.pd
> > > > > +++ b/gcc/match.pd
> > > > > @@ -6987,14 +6987,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > > > >       cond_op (COND_BINARY)
> > > > >   (simplify
> > > > >    (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
> > > > > -  (with { tree op_type = TREE_TYPE (@4); }
> > > > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > > > +  (with { tree op_type = TREE_TYPE (@4);
> > > > > +         tree mask_type = TREE_TYPE (@0); }
> > > > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > > > +                                                op_type, mask_type)
> > > > >         && element_precision (type) == element_precision (op_type))
> > > > >      (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
> > > > >   (simplify
> > > > >    (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
> > > > > -  (with { tree op_type = TREE_TYPE (@4); }
> > > > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > > > +  (with { tree op_type = TREE_TYPE (@4);
> > > > > +         tree mask_type = TREE_TYPE (@0); }
> > > > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > > > +                                                op_type, mask_type)
> > > > >         && element_precision (type) == element_precision (op_type))
> > > > >      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
> > > > >
> > > > > @@ -7003,14 +7007,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > > > >       cond_op (COND_TERNARY)
> > > > >   (simplify
> > > > >    (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
> > > > > -  (with { tree op_type = TREE_TYPE (@5); }
> > > > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > > > +  (with { tree op_type = TREE_TYPE (@5);
> > > > > +         tree mask_type = TREE_TYPE (@0); }
> > > > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > > > +                                                op_type, mask_type)
> > > > >         && element_precision (type) == element_precision (op_type))
> > > > >      (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
> > > > >   (simplify
> > > > >    (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
> > > > > -  (with { tree op_type = TREE_TYPE (@5); }
> > > > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > > > +  (with { tree op_type = TREE_TYPE (@5);
> > > > > +         tree mask_type = TREE_TYPE (@0); }
> > > > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > > > +                                                op_type, mask_type)
> > > > >         && element_precision (type) == element_precision (op_type))
> > > > >      (view_convert (cond_op (bit_not @0) @2 @3 @4
> > > > >                   (view_convert:op_type @1)))))))
> > > > > diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
> > > > > new file mode 100644
> > > > > index 00000000000..6a40a75e1c5
> > > > > --- /dev/null
> > > > > +++ b/gcc/testsuite/gcc.target/i386/pr102080.c
> > > > > @@ -0,0 +1,16 @@
> > > > > +#include<immintrin.h>
> > > > > +typedef float __m256 __attribute__((__vector_size__(32)));
> > > > > +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
> > > > > +  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
> > > > > +
> > > > > +void
> > > > > +__attribute__ ((target("avx")))
> > > > > +IfThenElse (__m256 no) {
> > > > > +  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
> > > > > +}
> > > > > +void
> > > > > +__attribute__ ((target("avx512vl")))
> > > > > +EncodedFromDisplay() {
> > > > > +  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
> > > > > +  IfThenElse(__trans_tmp_11);
> > > > > +}
> > > > > --
> > > > > 2.18.1
> > > > >
> > >
> > >
> > >
> > > --
> > > BR,
> > > Hongtao
>
>
>
> --
> BR,
> Hongtao

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Check the type of mask while generating cond_op in gimple simplication.
  2021-09-01 11:14         ` Richard Biener
@ 2021-09-01 12:52           ` Richard Sandiford
  2021-09-01 13:33             ` Richard Biener
  2021-09-02  6:42             ` Hongtao Liu
  0 siblings, 2 replies; 14+ messages in thread
From: Richard Sandiford @ 2021-09-01 12:52 UTC (permalink / raw)
  To: Richard Biener; +Cc: Hongtao Liu, liuhongt, GCC Patches

Richard Biener <richard.guenther@gmail.com> writes:
> On Wed, Sep 1, 2021 at 8:28 AM Hongtao Liu <crazylht@gmail.com> wrote:
>>
>> On Tue, Aug 31, 2021 at 7:56 PM Richard Biener
>> <richard.guenther@gmail.com> wrote:
>> >
>> > On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
>> > >
>> > > On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
>> > > <gcc-patches@gcc.gnu.org> wrote:
>> > > >
>> > > > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
>> > > > >
>> > > > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
>> > > > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
>> > > > > with mismatched mode.
>> > > > >   This patch add a function named cond_vectorized_internal_fn_supported_p
>> > > > >  to additionally check mask type than vectorized_internal_fn_supported_p.
>> > > > >
>> > > > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
>> > > > >   Ok for trunk?
>> > > > >
>> > > > > gcc/ChangeLog:
>> > > > >
>> > > > >         PR middle-end/102080
>> > > > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
>> > > > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
>> > > > >         * match.pd: Check the type of mask while generating cond_op in
>> > > > >         gimple simplication.
>> > > > >
>> > > > > gcc/testsuite/ChangeLog:
>> > > > >
>> > > > >         PR middle-end/102080
>> > > > >         * gcc.target/i386/pr102080.c: New test.
>> > > > > ---
>> > > > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
>> > > > >  gcc/internal-fn.h                        |  1 +
>> > > > >  gcc/match.pd                             | 24 ++++++++++++++++--------
>> > > > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
>> > > > >  4 files changed, 55 insertions(+), 8 deletions(-)
>> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
>> > > > >
>> > > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
>> > > > > index 1360a00f0b9..8b2b65db1a7 100644
>> > > > > --- a/gcc/internal-fn.c
>> > > > > +++ b/gcc/internal-fn.c
>> > > > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
>> > > > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
>> > > > >  }
>> > > > >
>> > > > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
>> > > > > +   doesn't check if mask type matches.  */
>> > > > > +bool
>> > > > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
>> > > > > +                                        tree mask_type)
>> > > > > +{
>> > > > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
>> > > > > +    return false;
>> > > > > +
>> > > > > +  machine_mode mask_mode;
>> > > > > +  machine_mode vmode = TYPE_MODE (type);
>> > > > > +  int size1, size2;
>> > > > > +  if (VECTOR_MODE_P (vmode)
>> > > > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
>> > > > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
>> > > > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
>> > > > > +      && size1 != size2)
>> > > >
>> > > > Why do we check for equal size rather than just mode equality which
>> > > I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
>> > > not QImode, Changed the patch to check mode equality.
>> > > Update patch.
>> >
>> > Looking at all this it seems the match.pd patterns should have not
>> > used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
>> > which is equivalent here because we're always working with vector modes?

Yeah, looks like it.

>> > And then shouldn't we look at the actual optab whether the mask mode matches
>> > the expectation rather than going around via the target hook which may not have
>> > enough context to decide which mask mode to use?
>> How about this?
>>
>> +/* Return true if target supports cond_op with data TYPE and
>> +   mask MASK_TYPE.  */
>> +bool
>> +cond_internal_fn_supported_p (internal_fn ifn, tree type,
>> +       tree mask_type)
>> +{
>> +  tree_pair types = tree_pair (type, type);
>> +  optab tmp = direct_internal_fn_optab (ifn, types);
>> +  machine_mode vmode = TYPE_MODE (type);
>> +  insn_code icode = direct_optab_handler (tmp, vmode);
>> +  if (icode == CODE_FOR_nothing)
>> +    return false;
>> +
>> +  machine_mode mask_mode = TYPE_MODE (mask_type);
>> +  /* Can't create rtx and use insn_operand_matches here.  */
>> +  return insn_data[icode].operand[0].mode == vmode
>> +    && insn_data[icode].operand[1].mode == mask_mode;
>> +}
>> +
>
> Yeah, sth like that, though the operand[0].mode test should be
> redudnant.  I think we should assert or have a whiltelist
> for the internal function we support to be queried this way.
> Not sure if we can directly access the 'cond_binary/cond_ternary'
> classification used in internal-fn.def, that would be best.
>
> Richard, what are your thoughts about all this?

IMO using get_mask_mode was right.  The optab documentation says:

  Operands 0, 2, 3 and 4 all have mode @var{m}.  Operand 1 is a scalar
  integer if @var{m} is scalar, otherwise it has the mode returned by
  @code{TARGET_VECTORIZE_GET_MASK_MODE}.

Allowing targets to use optabs to enforce different mask modes for
different operations would open up a mess of combinations.

In other words, I think cond_vectorized_internal_fn_supported_p
is really testing two things:

(1) is the mask type/vector type combination well-formed?
(2) is the internal function supported for the vector type?

where (1) is a gimple question and (2) is a target question.

I guess there's an argument that (1) should be part of the match.pd
condition instead, alongside the element_precision check.  That would
add to the cut-&-paste though. :-(

Alternatively, I guess we would add:

  bool is_truth_type_for (tree type, tree truth_type);

to return true if truth_type is equal to truth_type_for (type)
(but without having to call truth_type_for).  We could then use:

  is_truth_type_for (op_type, TREE_TYPE (@0))

instead of:

  element_precision (type) == element_precision (op_type)

since it should be a strictly stronger condition.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Check the type of mask while generating cond_op in gimple simplication.
  2021-09-01 12:52           ` Richard Sandiford
@ 2021-09-01 13:33             ` Richard Biener
  2021-09-02  6:42             ` Hongtao Liu
  1 sibling, 0 replies; 14+ messages in thread
From: Richard Biener @ 2021-09-01 13:33 UTC (permalink / raw)
  To: Richard Biener, Hongtao Liu, liuhongt, GCC Patches, Richard Sandiford

On Wed, Sep 1, 2021 at 2:52 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Richard Biener <richard.guenther@gmail.com> writes:
> > On Wed, Sep 1, 2021 at 8:28 AM Hongtao Liu <crazylht@gmail.com> wrote:
> >>
> >> On Tue, Aug 31, 2021 at 7:56 PM Richard Biener
> >> <richard.guenther@gmail.com> wrote:
> >> >
> >> > On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
> >> > >
> >> > > On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
> >> > > <gcc-patches@gcc.gnu.org> wrote:
> >> > > >
> >> > > > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
> >> > > > >
> >> > > > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> >> > > > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
> >> > > > > with mismatched mode.
> >> > > > >   This patch add a function named cond_vectorized_internal_fn_supported_p
> >> > > > >  to additionally check mask type than vectorized_internal_fn_supported_p.
> >> > > > >
> >> > > > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> >> > > > >   Ok for trunk?
> >> > > > >
> >> > > > > gcc/ChangeLog:
> >> > > > >
> >> > > > >         PR middle-end/102080
> >> > > > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
> >> > > > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
> >> > > > >         * match.pd: Check the type of mask while generating cond_op in
> >> > > > >         gimple simplication.
> >> > > > >
> >> > > > > gcc/testsuite/ChangeLog:
> >> > > > >
> >> > > > >         PR middle-end/102080
> >> > > > >         * gcc.target/i386/pr102080.c: New test.
> >> > > > > ---
> >> > > > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
> >> > > > >  gcc/internal-fn.h                        |  1 +
> >> > > > >  gcc/match.pd                             | 24 ++++++++++++++++--------
> >> > > > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
> >> > > > >  4 files changed, 55 insertions(+), 8 deletions(-)
> >> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> >> > > > >
> >> > > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> >> > > > > index 1360a00f0b9..8b2b65db1a7 100644
> >> > > > > --- a/gcc/internal-fn.c
> >> > > > > +++ b/gcc/internal-fn.c
> >> > > > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
> >> > > > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
> >> > > > >  }
> >> > > > >
> >> > > > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> >> > > > > +   doesn't check if mask type matches.  */
> >> > > > > +bool
> >> > > > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> >> > > > > +                                        tree mask_type)
> >> > > > > +{
> >> > > > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
> >> > > > > +    return false;
> >> > > > > +
> >> > > > > +  machine_mode mask_mode;
> >> > > > > +  machine_mode vmode = TYPE_MODE (type);
> >> > > > > +  int size1, size2;
> >> > > > > +  if (VECTOR_MODE_P (vmode)
> >> > > > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> >> > > > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> >> > > > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> >> > > > > +      && size1 != size2)
> >> > > >
> >> > > > Why do we check for equal size rather than just mode equality which
> >> > > I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
> >> > > not QImode, Changed the patch to check mode equality.
> >> > > Update patch.
> >> >
> >> > Looking at all this it seems the match.pd patterns should have not
> >> > used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
> >> > which is equivalent here because we're always working with vector modes?
>
> Yeah, looks like it.
>
> >> > And then shouldn't we look at the actual optab whether the mask mode matches
> >> > the expectation rather than going around via the target hook which may not have
> >> > enough context to decide which mask mode to use?
> >> How about this?
> >>
> >> +/* Return true if target supports cond_op with data TYPE and
> >> +   mask MASK_TYPE.  */
> >> +bool
> >> +cond_internal_fn_supported_p (internal_fn ifn, tree type,
> >> +       tree mask_type)
> >> +{
> >> +  tree_pair types = tree_pair (type, type);
> >> +  optab tmp = direct_internal_fn_optab (ifn, types);
> >> +  machine_mode vmode = TYPE_MODE (type);
> >> +  insn_code icode = direct_optab_handler (tmp, vmode);
> >> +  if (icode == CODE_FOR_nothing)
> >> +    return false;
> >> +
> >> +  machine_mode mask_mode = TYPE_MODE (mask_type);
> >> +  /* Can't create rtx and use insn_operand_matches here.  */
> >> +  return insn_data[icode].operand[0].mode == vmode
> >> +    && insn_data[icode].operand[1].mode == mask_mode;
> >> +}
> >> +
> >
> > Yeah, sth like that, though the operand[0].mode test should be
> > redudnant.  I think we should assert or have a whiltelist
> > for the internal function we support to be queried this way.
> > Not sure if we can directly access the 'cond_binary/cond_ternary'
> > classification used in internal-fn.def, that would be best.
> >
> > Richard, what are your thoughts about all this?
>
> IMO using get_mask_mode was right.  The optab documentation says:
>
>   Operands 0, 2, 3 and 4 all have mode @var{m}.  Operand 1 is a scalar
>   integer if @var{m} is scalar, otherwise it has the mode returned by
>   @code{TARGET_VECTORIZE_GET_MASK_MODE}.
>
> Allowing targets to use optabs to enforce different mask modes for
> different operations would open up a mess of combinations.

Meh!  And I thought this was the way out of my AVX512 vs AVX2 mask
mess with mask_gather_load ...

> In other words, I think cond_vectorized_internal_fn_supported_p
> is really testing two things:
>
> (1) is the mask type/vector type combination well-formed?

But can we always determine this without looking at the actual
operation?  That is, do we force targets to be consistent here
when writing their machine description and supporting more
than one mask variant?

> (2) is the internal function supported for the vector type?
>
> where (1) is a gimple question and (2) is a target question.
>
> I guess there's an argument that (1) should be part of the match.pd
> condition instead, alongside the element_precision check.  That would
> add to the cut-&-paste though. :-(
>
> Alternatively, I guess we would add:
>
>   bool is_truth_type_for (tree type, tree truth_type);
>
> to return true if truth_type is equal to truth_type_for (type)
> (but without having to call truth_type_for).  We could then use:
>
>   is_truth_type_for (op_type, TREE_TYPE (@0))
>
> instead of:
>
>   element_precision (type) == element_precision (op_type)
>
> since it should be a strictly stronger condition.

I guess that would work, yes.  Won't exactly help me with
mask_gather_load but for the cond_* IFNs it's certainly good.

Thanks,
Richard.

>
> Thanks,
> Richard

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Check the type of mask while generating cond_op in gimple simplication.
  2021-09-01 12:52           ` Richard Sandiford
  2021-09-01 13:33             ` Richard Biener
@ 2021-09-02  6:42             ` Hongtao Liu
  2021-09-02 17:54               ` Richard Sandiford
  1 sibling, 1 reply; 14+ messages in thread
From: Hongtao Liu @ 2021-09-02  6:42 UTC (permalink / raw)
  To: Richard Biener, Hongtao Liu, liuhongt, GCC Patches, Richard Sandiford

[-- Attachment #1: Type: text/plain, Size: 6756 bytes --]

On Wed, Sep 1, 2021 at 8:52 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Richard Biener <richard.guenther@gmail.com> writes:
> > On Wed, Sep 1, 2021 at 8:28 AM Hongtao Liu <crazylht@gmail.com> wrote:
> >>
> >> On Tue, Aug 31, 2021 at 7:56 PM Richard Biener
> >> <richard.guenther@gmail.com> wrote:
> >> >
> >> > On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
> >> > >
> >> > > On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
> >> > > <gcc-patches@gcc.gnu.org> wrote:
> >> > > >
> >> > > > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
> >> > > > >
> >> > > > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> >> > > > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
> >> > > > > with mismatched mode.
> >> > > > >   This patch add a function named cond_vectorized_internal_fn_supported_p
> >> > > > >  to additionally check mask type than vectorized_internal_fn_supported_p.
> >> > > > >
> >> > > > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> >> > > > >   Ok for trunk?
> >> > > > >
> >> > > > > gcc/ChangeLog:
> >> > > > >
> >> > > > >         PR middle-end/102080
> >> > > > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
> >> > > > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
> >> > > > >         * match.pd: Check the type of mask while generating cond_op in
> >> > > > >         gimple simplication.
> >> > > > >
> >> > > > > gcc/testsuite/ChangeLog:
> >> > > > >
> >> > > > >         PR middle-end/102080
> >> > > > >         * gcc.target/i386/pr102080.c: New test.
> >> > > > > ---
> >> > > > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
> >> > > > >  gcc/internal-fn.h                        |  1 +
> >> > > > >  gcc/match.pd                             | 24 ++++++++++++++++--------
> >> > > > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
> >> > > > >  4 files changed, 55 insertions(+), 8 deletions(-)
> >> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> >> > > > >
> >> > > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> >> > > > > index 1360a00f0b9..8b2b65db1a7 100644
> >> > > > > --- a/gcc/internal-fn.c
> >> > > > > +++ b/gcc/internal-fn.c
> >> > > > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
> >> > > > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
> >> > > > >  }
> >> > > > >
> >> > > > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> >> > > > > +   doesn't check if mask type matches.  */
> >> > > > > +bool
> >> > > > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> >> > > > > +                                        tree mask_type)
> >> > > > > +{
> >> > > > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
> >> > > > > +    return false;
> >> > > > > +
> >> > > > > +  machine_mode mask_mode;
> >> > > > > +  machine_mode vmode = TYPE_MODE (type);
> >> > > > > +  int size1, size2;
> >> > > > > +  if (VECTOR_MODE_P (vmode)
> >> > > > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> >> > > > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> >> > > > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> >> > > > > +      && size1 != size2)
> >> > > >
> >> > > > Why do we check for equal size rather than just mode equality which
> >> > > I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
> >> > > not QImode, Changed the patch to check mode equality.
> >> > > Update patch.
> >> >
> >> > Looking at all this it seems the match.pd patterns should have not
> >> > used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
> >> > which is equivalent here because we're always working with vector modes?
>
> Yeah, looks like it.
>
> >> > And then shouldn't we look at the actual optab whether the mask mode matches
> >> > the expectation rather than going around via the target hook which may not have
> >> > enough context to decide which mask mode to use?
> >> How about this?
> >>
> >> +/* Return true if target supports cond_op with data TYPE and
> >> +   mask MASK_TYPE.  */
> >> +bool
> >> +cond_internal_fn_supported_p (internal_fn ifn, tree type,
> >> +       tree mask_type)
> >> +{
> >> +  tree_pair types = tree_pair (type, type);
> >> +  optab tmp = direct_internal_fn_optab (ifn, types);
> >> +  machine_mode vmode = TYPE_MODE (type);
> >> +  insn_code icode = direct_optab_handler (tmp, vmode);
> >> +  if (icode == CODE_FOR_nothing)
> >> +    return false;
> >> +
> >> +  machine_mode mask_mode = TYPE_MODE (mask_type);
> >> +  /* Can't create rtx and use insn_operand_matches here.  */
> >> +  return insn_data[icode].operand[0].mode == vmode
> >> +    && insn_data[icode].operand[1].mode == mask_mode;
> >> +}
> >> +
> >
> > Yeah, sth like that, though the operand[0].mode test should be
> > redudnant.  I think we should assert or have a whiltelist
> > for the internal function we support to be queried this way.
> > Not sure if we can directly access the 'cond_binary/cond_ternary'
> > classification used in internal-fn.def, that would be best.
> >
> > Richard, what are your thoughts about all this?
>
> IMO using get_mask_mode was right.  The optab documentation says:
>
>   Operands 0, 2, 3 and 4 all have mode @var{m}.  Operand 1 is a scalar
>   integer if @var{m} is scalar, otherwise it has the mode returned by
>   @code{TARGET_VECTORIZE_GET_MASK_MODE}.
>
> Allowing targets to use optabs to enforce different mask modes for
> different operations would open up a mess of combinations.
>
> In other words, I think cond_vectorized_internal_fn_supported_p
> is really testing two things:
>
> (1) is the mask type/vector type combination well-formed?
> (2) is the internal function supported for the vector type?
>
> where (1) is a gimple question and (2) is a target question.
>
> I guess there's an argument that (1) should be part of the match.pd
> condition instead, alongside the element_precision check.  That would
> add to the cut-&-paste though. :-(
>
> Alternatively, I guess we would add:
>
>   bool is_truth_type_for (tree type, tree truth_type);
>
> to return true if truth_type is equal to truth_type_for (type)
> (but without having to call truth_type_for).  We could then use:
>
>   is_truth_type_for (op_type, TREE_TYPE (@0))
>
> instead of:
>
>   element_precision (type) == element_precision (op_type)
>
> since it should be a strictly stronger condition.
Thanks for your advice, it sounds more reasonable.
Here is the updated patch.
>
> Thanks,
> Richard



-- 
BR,
Hongtao

[-- Attachment #2: v4-0001-Check-mask-type-when-doing-cond_op-related-gimple.patch --]
[-- Type: text/x-patch, Size: 4497 bytes --]

From ae192d4a164b8d73adb06d6e28864f717741158c Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Thu, 2 Sep 2021 13:05:54 +0800
Subject: [PATCH v4] Check mask type when doing cond_op related gimple
 simplification.

gcc/ChangeLog:

	PR middle-end/102080
	* match.pd: Check mask type when doing cond_op related gimple
	simplification.
	* tree.c (is_truth_type_for): New function.
	* tree.h (is_truth_type_for): New declaration.

gcc/testsuite/ChangeLog:

	PR middle-end/102080
	* gcc.target/i386/pr102080.c: New test.
---
 gcc/match.pd                             |  8 ++++----
 gcc/testsuite/gcc.target/i386/pr102080.c | 19 +++++++++++++++++++
 gcc/tree.c                               |  7 +++++++
 gcc/tree.h                               |  1 +
 4 files changed, 31 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c

diff --git a/gcc/match.pd b/gcc/match.pd
index f421c74b62c..c9a27f46ed2 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6988,13 +6988,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
   (with { tree op_type = TREE_TYPE (@4); }
    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
-	&& element_precision (type) == element_precision (op_type))
+	&& is_truth_type_for (type, TREE_TYPE (@0)))
     (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
  (simplify
   (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
   (with { tree op_type = TREE_TYPE (@4); }
    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
-	&& element_precision (type) == element_precision (op_type))
+	&& is_truth_type_for (type, TREE_TYPE (@0)))
     (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
 
 /* Same for ternary operations.  */
@@ -7004,13 +7004,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
   (with { tree op_type = TREE_TYPE (@5); }
    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
-	&& element_precision (type) == element_precision (op_type))
+	&& is_truth_type_for (type, TREE_TYPE (@0)))
     (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
  (simplify
   (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
   (with { tree op_type = TREE_TYPE (@5); }
    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
-	&& element_precision (type) == element_precision (op_type))
+	&& is_truth_type_for (type, TREE_TYPE (@0)))
     (view_convert (cond_op (bit_not @0) @2 @3 @4
 		  (view_convert:op_type @1)))))))
 #endif
diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
new file mode 100644
index 00000000000..4c5ee32ee63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102080.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include<immintrin.h>
+typedef float __m256 __attribute__((__vector_size__(32)));
+__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
+  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
+
+void
+__attribute__ ((target("avx")))
+IfThenElse (__m256 no) {
+  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
+}
+void
+__attribute__ ((target("avx512vl")))
+EncodedFromDisplay() {
+  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
+  IfThenElse(__trans_tmp_11);
+}
diff --git a/gcc/tree.c b/gcc/tree.c
index cba3bca41b3..88c2221eabb 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -10723,6 +10723,13 @@ signed_type_for (tree type)
   return signed_or_unsigned_type_for (0, type);
 }
 
+bool
+is_truth_type_for (tree type, tree truth_type)
+{
+  tree tmp = truth_type_for (type);
+  return tmp == truth_type;
+}
+
 /* If TYPE is a vector type, return a signed integer vector type with the
    same width and number of subparts. Otherwise return boolean_type_node.  */
 
diff --git a/gcc/tree.h b/gcc/tree.h
index 060a41f6991..c8542bfd476 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4556,6 +4556,7 @@ extern tree build_string_literal (unsigned, const char * = NULL,
 extern tree signed_or_unsigned_type_for (int, tree);
 extern tree signed_type_for (tree);
 extern tree unsigned_type_for (tree);
+bool is_truth_type_for (tree, tree);
 extern tree truth_type_for (tree);
 extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
 extern tree build_pointer_type (tree);
-- 
2.18.1


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Check the type of mask while generating cond_op in gimple simplication.
  2021-09-02  6:42             ` Hongtao Liu
@ 2021-09-02 17:54               ` Richard Sandiford
  2021-09-06  6:54                 ` Richard Biener
  0 siblings, 1 reply; 14+ messages in thread
From: Richard Sandiford @ 2021-09-02 17:54 UTC (permalink / raw)
  To: Hongtao Liu via Gcc-patches; +Cc: Richard Biener, Hongtao Liu, liuhongt

Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> On Wed, Sep 1, 2021 at 8:52 PM Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>>
>> Richard Biener <richard.guenther@gmail.com> writes:
>> > On Wed, Sep 1, 2021 at 8:28 AM Hongtao Liu <crazylht@gmail.com> wrote:
>> >>
>> >> On Tue, Aug 31, 2021 at 7:56 PM Richard Biener
>> >> <richard.guenther@gmail.com> wrote:
>> >> >
>> >> > On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
>> >> > >
>> >> > > On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
>> >> > > <gcc-patches@gcc.gnu.org> wrote:
>> >> > > >
>> >> > > > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
>> >> > > > >
>> >> > > > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
>> >> > > > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
>> >> > > > > with mismatched mode.
>> >> > > > >   This patch add a function named cond_vectorized_internal_fn_supported_p
>> >> > > > >  to additionally check mask type than vectorized_internal_fn_supported_p.
>> >> > > > >
>> >> > > > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
>> >> > > > >   Ok for trunk?
>> >> > > > >
>> >> > > > > gcc/ChangeLog:
>> >> > > > >
>> >> > > > >         PR middle-end/102080
>> >> > > > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
>> >> > > > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
>> >> > > > >         * match.pd: Check the type of mask while generating cond_op in
>> >> > > > >         gimple simplication.
>> >> > > > >
>> >> > > > > gcc/testsuite/ChangeLog:
>> >> > > > >
>> >> > > > >         PR middle-end/102080
>> >> > > > >         * gcc.target/i386/pr102080.c: New test.
>> >> > > > > ---
>> >> > > > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
>> >> > > > >  gcc/internal-fn.h                        |  1 +
>> >> > > > >  gcc/match.pd                             | 24 ++++++++++++++++--------
>> >> > > > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
>> >> > > > >  4 files changed, 55 insertions(+), 8 deletions(-)
>> >> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
>> >> > > > >
>> >> > > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
>> >> > > > > index 1360a00f0b9..8b2b65db1a7 100644
>> >> > > > > --- a/gcc/internal-fn.c
>> >> > > > > +++ b/gcc/internal-fn.c
>> >> > > > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
>> >> > > > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
>> >> > > > >  }
>> >> > > > >
>> >> > > > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
>> >> > > > > +   doesn't check if mask type matches.  */
>> >> > > > > +bool
>> >> > > > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
>> >> > > > > +                                        tree mask_type)
>> >> > > > > +{
>> >> > > > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
>> >> > > > > +    return false;
>> >> > > > > +
>> >> > > > > +  machine_mode mask_mode;
>> >> > > > > +  machine_mode vmode = TYPE_MODE (type);
>> >> > > > > +  int size1, size2;
>> >> > > > > +  if (VECTOR_MODE_P (vmode)
>> >> > > > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
>> >> > > > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
>> >> > > > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
>> >> > > > > +      && size1 != size2)
>> >> > > >
>> >> > > > Why do we check for equal size rather than just mode equality which
>> >> > > I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
>> >> > > not QImode, Changed the patch to check mode equality.
>> >> > > Update patch.
>> >> >
>> >> > Looking at all this it seems the match.pd patterns should have not
>> >> > used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
>> >> > which is equivalent here because we're always working with vector modes?
>>
>> Yeah, looks like it.
>>
>> >> > And then shouldn't we look at the actual optab whether the mask mode matches
>> >> > the expectation rather than going around via the target hook which may not have
>> >> > enough context to decide which mask mode to use?
>> >> How about this?
>> >>
>> >> +/* Return true if target supports cond_op with data TYPE and
>> >> +   mask MASK_TYPE.  */
>> >> +bool
>> >> +cond_internal_fn_supported_p (internal_fn ifn, tree type,
>> >> +       tree mask_type)
>> >> +{
>> >> +  tree_pair types = tree_pair (type, type);
>> >> +  optab tmp = direct_internal_fn_optab (ifn, types);
>> >> +  machine_mode vmode = TYPE_MODE (type);
>> >> +  insn_code icode = direct_optab_handler (tmp, vmode);
>> >> +  if (icode == CODE_FOR_nothing)
>> >> +    return false;
>> >> +
>> >> +  machine_mode mask_mode = TYPE_MODE (mask_type);
>> >> +  /* Can't create rtx and use insn_operand_matches here.  */
>> >> +  return insn_data[icode].operand[0].mode == vmode
>> >> +    && insn_data[icode].operand[1].mode == mask_mode;
>> >> +}
>> >> +
>> >
>> > Yeah, sth like that, though the operand[0].mode test should be
>> > redudnant.  I think we should assert or have a whiltelist
>> > for the internal function we support to be queried this way.
>> > Not sure if we can directly access the 'cond_binary/cond_ternary'
>> > classification used in internal-fn.def, that would be best.
>> >
>> > Richard, what are your thoughts about all this?
>>
>> IMO using get_mask_mode was right.  The optab documentation says:
>>
>>   Operands 0, 2, 3 and 4 all have mode @var{m}.  Operand 1 is a scalar
>>   integer if @var{m} is scalar, otherwise it has the mode returned by
>>   @code{TARGET_VECTORIZE_GET_MASK_MODE}.
>>
>> Allowing targets to use optabs to enforce different mask modes for
>> different operations would open up a mess of combinations.
>>
>> In other words, I think cond_vectorized_internal_fn_supported_p
>> is really testing two things:
>>
>> (1) is the mask type/vector type combination well-formed?
>> (2) is the internal function supported for the vector type?
>>
>> where (1) is a gimple question and (2) is a target question.
>>
>> I guess there's an argument that (1) should be part of the match.pd
>> condition instead, alongside the element_precision check.  That would
>> add to the cut-&-paste though. :-(
>>
>> Alternatively, I guess we would add:
>>
>>   bool is_truth_type_for (tree type, tree truth_type);
>>
>> to return true if truth_type is equal to truth_type_for (type)
>> (but without having to call truth_type_for).  We could then use:
>>
>>   is_truth_type_for (op_type, TREE_TYPE (@0))
>>
>> instead of:
>>
>>   element_precision (type) == element_precision (op_type)
>>
>> since it should be a strictly stronger condition.
> Thanks for your advice, it sounds more reasonable.
> Here is the updated patch.
>>
>> Thanks,
>> Richard
>
>
>
> -- 
> BR,
> Hongtao
>
> From ae192d4a164b8d73adb06d6e28864f717741158c Mon Sep 17 00:00:00 2001
> From: liuhongt <hongtao.liu@intel.com>
> Date: Thu, 2 Sep 2021 13:05:54 +0800
> Subject: [PATCH v4] Check mask type when doing cond_op related gimple
>  simplification.
>
> gcc/ChangeLog:
>
> 	PR middle-end/102080
> 	* match.pd: Check mask type when doing cond_op related gimple
> 	simplification.
> 	* tree.c (is_truth_type_for): New function.
> 	* tree.h (is_truth_type_for): New declaration.
>
> gcc/testsuite/ChangeLog:
>
> 	PR middle-end/102080
> 	* gcc.target/i386/pr102080.c: New test.
> ---
>  gcc/match.pd                             |  8 ++++----
>  gcc/testsuite/gcc.target/i386/pr102080.c | 19 +++++++++++++++++++
>  gcc/tree.c                               |  7 +++++++
>  gcc/tree.h                               |  1 +
>  4 files changed, 31 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index f421c74b62c..c9a27f46ed2 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -6988,13 +6988,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
>    (with { tree op_type = TREE_TYPE (@4); }
>     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> -	&& element_precision (type) == element_precision (op_type))
> +	&& is_truth_type_for (type, TREE_TYPE (@0)))

Why pass “type” rather than “op_type”?  “type” is the type of the
original expression, and if the original vec_cond is well-formed,
@0 should already have the right truth type for “type”.  Here we're
trying to convert the expression into a conditional operation on
“op_type”, so we need to test whether that's valid.

>      (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
>   (simplify
>    (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
>    (with { tree op_type = TREE_TYPE (@4); }
>     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> -	&& element_precision (type) == element_precision (op_type))
> +	&& is_truth_type_for (type, TREE_TYPE (@0)))
>      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
>  
>  /* Same for ternary operations.  */
> @@ -7004,13 +7004,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
>    (with { tree op_type = TREE_TYPE (@5); }
>     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> -	&& element_precision (type) == element_precision (op_type))
> +	&& is_truth_type_for (type, TREE_TYPE (@0)))
>      (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
>   (simplify
>    (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
>    (with { tree op_type = TREE_TYPE (@5); }
>     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> -	&& element_precision (type) == element_precision (op_type))
> +	&& is_truth_type_for (type, TREE_TYPE (@0)))
>      (view_convert (cond_op (bit_not @0) @2 @3 @4
>  		  (view_convert:op_type @1)))))))
>  #endif
> diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
> new file mode 100644
> index 00000000000..4c5ee32ee63
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102080.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include<immintrin.h>
> +typedef float __m256 __attribute__((__vector_size__(32)));
> +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
> +  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
> +
> +void
> +__attribute__ ((target("avx")))
> +IfThenElse (__m256 no) {
> +  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
> +}
> +void
> +__attribute__ ((target("avx512vl")))
> +EncodedFromDisplay() {
> +  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
> +  IfThenElse(__trans_tmp_11);
> +}
> diff --git a/gcc/tree.c b/gcc/tree.c
> index cba3bca41b3..88c2221eabb 100644
> --- a/gcc/tree.c
> +++ b/gcc/tree.c
> @@ -10723,6 +10723,13 @@ signed_type_for (tree type)
>    return signed_or_unsigned_type_for (0, type);
>  }
>  
> +bool
> +is_truth_type_for (tree type, tree truth_type)
> +{
> +  tree tmp = truth_type_for (type);
> +  return tmp == truth_type;
> +}

The idea was to try to avoid calling truth_type to create a type.
Instead we can use similar logic to truth_type to tell whether type
has the right form.  I think the rules are:

- For VECTOR_TYPEs:
  - The truth type must be a VECTOR_BOOLEAN_TYPE.
  - The number of elements must match (known_eq).
  - Also:
    - If !VECTOR_BOOLEAN_TYPE_P and targetm.vectorize.get_mask_mode
      exists, the truth type must have that mode.
    - Otherwise, the types must have the same size.
- Otherwise, the truth type must be a BOOLEAN_TYPE.

(Richi please correct me if I'm wrong.)

Thanks,
Richard


> +
>  /* If TYPE is a vector type, return a signed integer vector type with the
>     same width and number of subparts. Otherwise return boolean_type_node.  */
>  
> diff --git a/gcc/tree.h b/gcc/tree.h
> index 060a41f6991..c8542bfd476 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -4556,6 +4556,7 @@ extern tree build_string_literal (unsigned, const char * = NULL,
>  extern tree signed_or_unsigned_type_for (int, tree);
>  extern tree signed_type_for (tree);
>  extern tree unsigned_type_for (tree);
> +bool is_truth_type_for (tree, tree);
>  extern tree truth_type_for (tree);
>  extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
>  extern tree build_pointer_type (tree);

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Check the type of mask while generating cond_op in gimple simplication.
  2021-09-02 17:54               ` Richard Sandiford
@ 2021-09-06  6:54                 ` Richard Biener
  2021-09-06  9:01                   ` Hongtao Liu
  0 siblings, 1 reply; 14+ messages in thread
From: Richard Biener @ 2021-09-06  6:54 UTC (permalink / raw)
  To: Hongtao Liu via Gcc-patches, Richard Biener, Hongtao Liu,
	liuhongt, Richard Sandiford

On Thu, Sep 2, 2021 at 7:54 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> > On Wed, Sep 1, 2021 at 8:52 PM Richard Sandiford
> > <richard.sandiford@arm.com> wrote:
> >>
> >> Richard Biener <richard.guenther@gmail.com> writes:
> >> > On Wed, Sep 1, 2021 at 8:28 AM Hongtao Liu <crazylht@gmail.com> wrote:
> >> >>
> >> >> On Tue, Aug 31, 2021 at 7:56 PM Richard Biener
> >> >> <richard.guenther@gmail.com> wrote:
> >> >> >
> >> >> > On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
> >> >> > >
> >> >> > > On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
> >> >> > > <gcc-patches@gcc.gnu.org> wrote:
> >> >> > > >
> >> >> > > > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
> >> >> > > > >
> >> >> > > > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> >> >> > > > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
> >> >> > > > > with mismatched mode.
> >> >> > > > >   This patch add a function named cond_vectorized_internal_fn_supported_p
> >> >> > > > >  to additionally check mask type than vectorized_internal_fn_supported_p.
> >> >> > > > >
> >> >> > > > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> >> >> > > > >   Ok for trunk?
> >> >> > > > >
> >> >> > > > > gcc/ChangeLog:
> >> >> > > > >
> >> >> > > > >         PR middle-end/102080
> >> >> > > > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
> >> >> > > > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
> >> >> > > > >         * match.pd: Check the type of mask while generating cond_op in
> >> >> > > > >         gimple simplication.
> >> >> > > > >
> >> >> > > > > gcc/testsuite/ChangeLog:
> >> >> > > > >
> >> >> > > > >         PR middle-end/102080
> >> >> > > > >         * gcc.target/i386/pr102080.c: New test.
> >> >> > > > > ---
> >> >> > > > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
> >> >> > > > >  gcc/internal-fn.h                        |  1 +
> >> >> > > > >  gcc/match.pd                             | 24 ++++++++++++++++--------
> >> >> > > > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
> >> >> > > > >  4 files changed, 55 insertions(+), 8 deletions(-)
> >> >> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> >> >> > > > >
> >> >> > > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> >> >> > > > > index 1360a00f0b9..8b2b65db1a7 100644
> >> >> > > > > --- a/gcc/internal-fn.c
> >> >> > > > > +++ b/gcc/internal-fn.c
> >> >> > > > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
> >> >> > > > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
> >> >> > > > >  }
> >> >> > > > >
> >> >> > > > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> >> >> > > > > +   doesn't check if mask type matches.  */
> >> >> > > > > +bool
> >> >> > > > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> >> >> > > > > +                                        tree mask_type)
> >> >> > > > > +{
> >> >> > > > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
> >> >> > > > > +    return false;
> >> >> > > > > +
> >> >> > > > > +  machine_mode mask_mode;
> >> >> > > > > +  machine_mode vmode = TYPE_MODE (type);
> >> >> > > > > +  int size1, size2;
> >> >> > > > > +  if (VECTOR_MODE_P (vmode)
> >> >> > > > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> >> >> > > > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> >> >> > > > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> >> >> > > > > +      && size1 != size2)
> >> >> > > >
> >> >> > > > Why do we check for equal size rather than just mode equality which
> >> >> > > I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
> >> >> > > not QImode, Changed the patch to check mode equality.
> >> >> > > Update patch.
> >> >> >
> >> >> > Looking at all this it seems the match.pd patterns should have not
> >> >> > used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
> >> >> > which is equivalent here because we're always working with vector modes?
> >>
> >> Yeah, looks like it.
> >>
> >> >> > And then shouldn't we look at the actual optab whether the mask mode matches
> >> >> > the expectation rather than going around via the target hook which may not have
> >> >> > enough context to decide which mask mode to use?
> >> >> How about this?
> >> >>
> >> >> +/* Return true if target supports cond_op with data TYPE and
> >> >> +   mask MASK_TYPE.  */
> >> >> +bool
> >> >> +cond_internal_fn_supported_p (internal_fn ifn, tree type,
> >> >> +       tree mask_type)
> >> >> +{
> >> >> +  tree_pair types = tree_pair (type, type);
> >> >> +  optab tmp = direct_internal_fn_optab (ifn, types);
> >> >> +  machine_mode vmode = TYPE_MODE (type);
> >> >> +  insn_code icode = direct_optab_handler (tmp, vmode);
> >> >> +  if (icode == CODE_FOR_nothing)
> >> >> +    return false;
> >> >> +
> >> >> +  machine_mode mask_mode = TYPE_MODE (mask_type);
> >> >> +  /* Can't create rtx and use insn_operand_matches here.  */
> >> >> +  return insn_data[icode].operand[0].mode == vmode
> >> >> +    && insn_data[icode].operand[1].mode == mask_mode;
> >> >> +}
> >> >> +
> >> >
> >> > Yeah, sth like that, though the operand[0].mode test should be
> >> > redudnant.  I think we should assert or have a whiltelist
> >> > for the internal function we support to be queried this way.
> >> > Not sure if we can directly access the 'cond_binary/cond_ternary'
> >> > classification used in internal-fn.def, that would be best.
> >> >
> >> > Richard, what are your thoughts about all this?
> >>
> >> IMO using get_mask_mode was right.  The optab documentation says:
> >>
> >>   Operands 0, 2, 3 and 4 all have mode @var{m}.  Operand 1 is a scalar
> >>   integer if @var{m} is scalar, otherwise it has the mode returned by
> >>   @code{TARGET_VECTORIZE_GET_MASK_MODE}.
> >>
> >> Allowing targets to use optabs to enforce different mask modes for
> >> different operations would open up a mess of combinations.
> >>
> >> In other words, I think cond_vectorized_internal_fn_supported_p
> >> is really testing two things:
> >>
> >> (1) is the mask type/vector type combination well-formed?
> >> (2) is the internal function supported for the vector type?
> >>
> >> where (1) is a gimple question and (2) is a target question.
> >>
> >> I guess there's an argument that (1) should be part of the match.pd
> >> condition instead, alongside the element_precision check.  That would
> >> add to the cut-&-paste though. :-(
> >>
> >> Alternatively, I guess we would add:
> >>
> >>   bool is_truth_type_for (tree type, tree truth_type);
> >>
> >> to return true if truth_type is equal to truth_type_for (type)
> >> (but without having to call truth_type_for).  We could then use:
> >>
> >>   is_truth_type_for (op_type, TREE_TYPE (@0))
> >>
> >> instead of:
> >>
> >>   element_precision (type) == element_precision (op_type)
> >>
> >> since it should be a strictly stronger condition.
> > Thanks for your advice, it sounds more reasonable.
> > Here is the updated patch.
> >>
> >> Thanks,
> >> Richard
> >
> >
> >
> > --
> > BR,
> > Hongtao
> >
> > From ae192d4a164b8d73adb06d6e28864f717741158c Mon Sep 17 00:00:00 2001
> > From: liuhongt <hongtao.liu@intel.com>
> > Date: Thu, 2 Sep 2021 13:05:54 +0800
> > Subject: [PATCH v4] Check mask type when doing cond_op related gimple
> >  simplification.
> >
> > gcc/ChangeLog:
> >
> >       PR middle-end/102080
> >       * match.pd: Check mask type when doing cond_op related gimple
> >       simplification.
> >       * tree.c (is_truth_type_for): New function.
> >       * tree.h (is_truth_type_for): New declaration.
> >
> > gcc/testsuite/ChangeLog:
> >
> >       PR middle-end/102080
> >       * gcc.target/i386/pr102080.c: New test.
> > ---
> >  gcc/match.pd                             |  8 ++++----
> >  gcc/testsuite/gcc.target/i386/pr102080.c | 19 +++++++++++++++++++
> >  gcc/tree.c                               |  7 +++++++
> >  gcc/tree.h                               |  1 +
> >  4 files changed, 31 insertions(+), 4 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index f421c74b62c..c9a27f46ed2 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -6988,13 +6988,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >    (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
> >    (with { tree op_type = TREE_TYPE (@4); }
> >     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > -     && element_precision (type) == element_precision (op_type))
> > +     && is_truth_type_for (type, TREE_TYPE (@0)))
>
> Why pass “type” rather than “op_type”?  “type” is the type of the
> original expression, and if the original vec_cond is well-formed,
> @0 should already have the right truth type for “type”.  Here we're
> trying to convert the expression into a conditional operation on
> “op_type”, so we need to test whether that's valid.
>
> >      (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
> >   (simplify
> >    (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
> >    (with { tree op_type = TREE_TYPE (@4); }
> >     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > -     && element_precision (type) == element_precision (op_type))
> > +     && is_truth_type_for (type, TREE_TYPE (@0)))
> >      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
> >
> >  /* Same for ternary operations.  */
> > @@ -7004,13 +7004,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >    (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
> >    (with { tree op_type = TREE_TYPE (@5); }
> >     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > -     && element_precision (type) == element_precision (op_type))
> > +     && is_truth_type_for (type, TREE_TYPE (@0)))
> >      (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
> >   (simplify
> >    (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
> >    (with { tree op_type = TREE_TYPE (@5); }
> >     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > -     && element_precision (type) == element_precision (op_type))
> > +     && is_truth_type_for (type, TREE_TYPE (@0)))
> >      (view_convert (cond_op (bit_not @0) @2 @3 @4
> >                 (view_convert:op_type @1)))))))
> >  #endif
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
> > new file mode 100644
> > index 00000000000..4c5ee32ee63
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102080.c
> > @@ -0,0 +1,19 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include<immintrin.h>
> > +typedef float __m256 __attribute__((__vector_size__(32)));
> > +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
> > +  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
> > +
> > +void
> > +__attribute__ ((target("avx")))
> > +IfThenElse (__m256 no) {
> > +  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
> > +}
> > +void
> > +__attribute__ ((target("avx512vl")))
> > +EncodedFromDisplay() {
> > +  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
> > +  IfThenElse(__trans_tmp_11);
> > +}
> > diff --git a/gcc/tree.c b/gcc/tree.c
> > index cba3bca41b3..88c2221eabb 100644
> > --- a/gcc/tree.c
> > +++ b/gcc/tree.c
> > @@ -10723,6 +10723,13 @@ signed_type_for (tree type)
> >    return signed_or_unsigned_type_for (0, type);
> >  }
> >
> > +bool
> > +is_truth_type_for (tree type, tree truth_type)
> > +{
> > +  tree tmp = truth_type_for (type);
> > +  return tmp == truth_type;
> > +}
>
> The idea was to try to avoid calling truth_type to create a type.
> Instead we can use similar logic to truth_type to tell whether type
> has the right form.  I think the rules are:
>
> - For VECTOR_TYPEs:
>   - The truth type must be a VECTOR_BOOLEAN_TYPE.
>   - The number of elements must match (known_eq).
>   - Also:
>     - If !VECTOR_BOOLEAN_TYPE_P and targetm.vectorize.get_mask_mode
>       exists, the truth type must have that mode.
>     - Otherwise, the types must have the same size.

For vector types it's

  VECTOR_BOOLEAN_TYPE_P (truth_type)
  && known_eq (number of elements)
  && targetm.vectorize.get_mask_mode (TYPE_MODE (type)) == TYPE_MODE
(truth_type)

I think we're only interested in the case that TYPE_MODE (type) is a
vector mode given
we're doing optab checks.  If we want to cover !VECTOR_MODE vector type 'type'
then it would be known_eq (component sizes) I think.

Not sure if we need to test for !TYPE_UNSIGNED (truth_type).

> - Otherwise, the truth type must be a BOOLEAN_TYPE.

I'm not sure which case you're refering to here - when 'type' is a scalar type?
I suppose we'd want to allow all types that trivially convert to
boolean_type_node
then which means useless_type_conversion_p (boolean_type_node, truth_type).

> (Richi please correct me if I'm wrong.)
>
> Thanks,
> Richard
>
>
> > +
> >  /* If TYPE is a vector type, return a signed integer vector type with the
> >     same width and number of subparts. Otherwise return boolean_type_node.  */
> >
> > diff --git a/gcc/tree.h b/gcc/tree.h
> > index 060a41f6991..c8542bfd476 100644
> > --- a/gcc/tree.h
> > +++ b/gcc/tree.h
> > @@ -4556,6 +4556,7 @@ extern tree build_string_literal (unsigned, const char * = NULL,
> >  extern tree signed_or_unsigned_type_for (int, tree);
> >  extern tree signed_type_for (tree);
> >  extern tree unsigned_type_for (tree);
> > +bool is_truth_type_for (tree, tree);
> >  extern tree truth_type_for (tree);
> >  extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
> >  extern tree build_pointer_type (tree);

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Check the type of mask while generating cond_op in gimple simplication.
  2021-09-06  6:54                 ` Richard Biener
@ 2021-09-06  9:01                   ` Hongtao Liu
  2021-09-16  6:27                     ` [PATCH] Check mask type when doing cond_op related gimple simplification liuhongt
  0 siblings, 1 reply; 14+ messages in thread
From: Hongtao Liu @ 2021-09-06  9:01 UTC (permalink / raw)
  To: Richard Biener; +Cc: Hongtao Liu via Gcc-patches, liuhongt, Richard Sandiford

[-- Attachment #1: Type: text/plain, Size: 15236 bytes --]

On Mon, Sep 6, 2021 at 2:54 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Thu, Sep 2, 2021 at 7:54 PM Richard Sandiford
> <richard.sandiford@arm.com> wrote:
> >
> > Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> > > On Wed, Sep 1, 2021 at 8:52 PM Richard Sandiford
> > > <richard.sandiford@arm.com> wrote:
> > >>
> > >> Richard Biener <richard.guenther@gmail.com> writes:
> > >> > On Wed, Sep 1, 2021 at 8:28 AM Hongtao Liu <crazylht@gmail.com> wrote:
> > >> >>
> > >> >> On Tue, Aug 31, 2021 at 7:56 PM Richard Biener
> > >> >> <richard.guenther@gmail.com> wrote:
> > >> >> >
> > >> >> > On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
> > >> >> > >
> > >> >> > > On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
> > >> >> > > <gcc-patches@gcc.gnu.org> wrote:
> > >> >> > > >
> > >> >> > > > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
> > >> >> > > > >
> > >> >> > > > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> > >> >> > > > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
> > >> >> > > > > with mismatched mode.
> > >> >> > > > >   This patch add a function named cond_vectorized_internal_fn_supported_p
> > >> >> > > > >  to additionally check mask type than vectorized_internal_fn_supported_p.
> > >> >> > > > >
> > >> >> > > > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> > >> >> > > > >   Ok for trunk?
> > >> >> > > > >
> > >> >> > > > > gcc/ChangeLog:
> > >> >> > > > >
> > >> >> > > > >         PR middle-end/102080
> > >> >> > > > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
> > >> >> > > > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
> > >> >> > > > >         * match.pd: Check the type of mask while generating cond_op in
> > >> >> > > > >         gimple simplication.
> > >> >> > > > >
> > >> >> > > > > gcc/testsuite/ChangeLog:
> > >> >> > > > >
> > >> >> > > > >         PR middle-end/102080
> > >> >> > > > >         * gcc.target/i386/pr102080.c: New test.
> > >> >> > > > > ---
> > >> >> > > > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
> > >> >> > > > >  gcc/internal-fn.h                        |  1 +
> > >> >> > > > >  gcc/match.pd                             | 24 ++++++++++++++++--------
> > >> >> > > > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
> > >> >> > > > >  4 files changed, 55 insertions(+), 8 deletions(-)
> > >> >> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> > >> >> > > > >
> > >> >> > > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> > >> >> > > > > index 1360a00f0b9..8b2b65db1a7 100644
> > >> >> > > > > --- a/gcc/internal-fn.c
> > >> >> > > > > +++ b/gcc/internal-fn.c
> > >> >> > > > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
> > >> >> > > > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
> > >> >> > > > >  }
> > >> >> > > > >
> > >> >> > > > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> > >> >> > > > > +   doesn't check if mask type matches.  */
> > >> >> > > > > +bool
> > >> >> > > > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> > >> >> > > > > +                                        tree mask_type)
> > >> >> > > > > +{
> > >> >> > > > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
> > >> >> > > > > +    return false;
> > >> >> > > > > +
> > >> >> > > > > +  machine_mode mask_mode;
> > >> >> > > > > +  machine_mode vmode = TYPE_MODE (type);
> > >> >> > > > > +  int size1, size2;
> > >> >> > > > > +  if (VECTOR_MODE_P (vmode)
> > >> >> > > > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> > >> >> > > > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> > >> >> > > > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> > >> >> > > > > +      && size1 != size2)
> > >> >> > > >
> > >> >> > > > Why do we check for equal size rather than just mode equality which
> > >> >> > > I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
> > >> >> > > not QImode, Changed the patch to check mode equality.
> > >> >> > > Update patch.
> > >> >> >
> > >> >> > Looking at all this it seems the match.pd patterns should have not
> > >> >> > used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
> > >> >> > which is equivalent here because we're always working with vector modes?
> > >>
> > >> Yeah, looks like it.
> > >>
> > >> >> > And then shouldn't we look at the actual optab whether the mask mode matches
> > >> >> > the expectation rather than going around via the target hook which may not have
> > >> >> > enough context to decide which mask mode to use?
> > >> >> How about this?
> > >> >>
> > >> >> +/* Return true if target supports cond_op with data TYPE and
> > >> >> +   mask MASK_TYPE.  */
> > >> >> +bool
> > >> >> +cond_internal_fn_supported_p (internal_fn ifn, tree type,
> > >> >> +       tree mask_type)
> > >> >> +{
> > >> >> +  tree_pair types = tree_pair (type, type);
> > >> >> +  optab tmp = direct_internal_fn_optab (ifn, types);
> > >> >> +  machine_mode vmode = TYPE_MODE (type);
> > >> >> +  insn_code icode = direct_optab_handler (tmp, vmode);
> > >> >> +  if (icode == CODE_FOR_nothing)
> > >> >> +    return false;
> > >> >> +
> > >> >> +  machine_mode mask_mode = TYPE_MODE (mask_type);
> > >> >> +  /* Can't create rtx and use insn_operand_matches here.  */
> > >> >> +  return insn_data[icode].operand[0].mode == vmode
> > >> >> +    && insn_data[icode].operand[1].mode == mask_mode;
> > >> >> +}
> > >> >> +
> > >> >
> > >> > Yeah, sth like that, though the operand[0].mode test should be
> > >> > redudnant.  I think we should assert or have a whiltelist
> > >> > for the internal function we support to be queried this way.
> > >> > Not sure if we can directly access the 'cond_binary/cond_ternary'
> > >> > classification used in internal-fn.def, that would be best.
> > >> >
> > >> > Richard, what are your thoughts about all this?
> > >>
> > >> IMO using get_mask_mode was right.  The optab documentation says:
> > >>
> > >>   Operands 0, 2, 3 and 4 all have mode @var{m}.  Operand 1 is a scalar
> > >>   integer if @var{m} is scalar, otherwise it has the mode returned by
> > >>   @code{TARGET_VECTORIZE_GET_MASK_MODE}.
> > >>
> > >> Allowing targets to use optabs to enforce different mask modes for
> > >> different operations would open up a mess of combinations.
> > >>
> > >> In other words, I think cond_vectorized_internal_fn_supported_p
> > >> is really testing two things:
> > >>
> > >> (1) is the mask type/vector type combination well-formed?
> > >> (2) is the internal function supported for the vector type?
> > >>
> > >> where (1) is a gimple question and (2) is a target question.
> > >>
> > >> I guess there's an argument that (1) should be part of the match.pd
> > >> condition instead, alongside the element_precision check.  That would
> > >> add to the cut-&-paste though. :-(
> > >>
> > >> Alternatively, I guess we would add:
> > >>
> > >>   bool is_truth_type_for (tree type, tree truth_type);
> > >>
> > >> to return true if truth_type is equal to truth_type_for (type)
> > >> (but without having to call truth_type_for).  We could then use:
> > >>
> > >>   is_truth_type_for (op_type, TREE_TYPE (@0))
> > >>
> > >> instead of:
> > >>
> > >>   element_precision (type) == element_precision (op_type)
> > >>
> > >> since it should be a strictly stronger condition.
> > > Thanks for your advice, it sounds more reasonable.
> > > Here is the updated patch.
> > >>
> > >> Thanks,
> > >> Richard
> > >
> > >
> > >
> > > --
> > > BR,
> > > Hongtao
> > >
> > > From ae192d4a164b8d73adb06d6e28864f717741158c Mon Sep 17 00:00:00 2001
> > > From: liuhongt <hongtao.liu@intel.com>
> > > Date: Thu, 2 Sep 2021 13:05:54 +0800
> > > Subject: [PATCH v4] Check mask type when doing cond_op related gimple
> > >  simplification.
> > >
> > > gcc/ChangeLog:
> > >
> > >       PR middle-end/102080
> > >       * match.pd: Check mask type when doing cond_op related gimple
> > >       simplification.
> > >       * tree.c (is_truth_type_for): New function.
> > >       * tree.h (is_truth_type_for): New declaration.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >       PR middle-end/102080
> > >       * gcc.target/i386/pr102080.c: New test.
> > > ---
> > >  gcc/match.pd                             |  8 ++++----
> > >  gcc/testsuite/gcc.target/i386/pr102080.c | 19 +++++++++++++++++++
> > >  gcc/tree.c                               |  7 +++++++
> > >  gcc/tree.h                               |  1 +
> > >  4 files changed, 31 insertions(+), 4 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index f421c74b62c..c9a27f46ed2 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -6988,13 +6988,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >    (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
> > >    (with { tree op_type = TREE_TYPE (@4); }
> > >     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > -     && element_precision (type) == element_precision (op_type))
> > > +     && is_truth_type_for (type, TREE_TYPE (@0)))
> >
> > Why pass “type” rather than “op_type”?  “type” is the type of the
Sorry for the typo, changed.
> > original expression, and if the original vec_cond is well-formed,
> > @0 should already have the right truth type for “type”.  Here we're
> > trying to convert the expression into a conditional operation on
> > “op_type”, so we need to test whether that's valid.
> >
> > >      (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
> > >   (simplify
> > >    (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
> > >    (with { tree op_type = TREE_TYPE (@4); }
> > >     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > -     && element_precision (type) == element_precision (op_type))
> > > +     && is_truth_type_for (type, TREE_TYPE (@0)))
> > >      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
> > >
> > >  /* Same for ternary operations.  */
> > > @@ -7004,13 +7004,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >    (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
> > >    (with { tree op_type = TREE_TYPE (@5); }
> > >     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > -     && element_precision (type) == element_precision (op_type))
> > > +     && is_truth_type_for (type, TREE_TYPE (@0)))
> > >      (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
> > >   (simplify
> > >    (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
> > >    (with { tree op_type = TREE_TYPE (@5); }
> > >     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > -     && element_precision (type) == element_precision (op_type))
> > > +     && is_truth_type_for (type, TREE_TYPE (@0)))
> > >      (view_convert (cond_op (bit_not @0) @2 @3 @4
> > >                 (view_convert:op_type @1)))))))
> > >  #endif
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
> > > new file mode 100644
> > > index 00000000000..4c5ee32ee63
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102080.c
> > > @@ -0,0 +1,19 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include<immintrin.h>
> > > +typedef float __m256 __attribute__((__vector_size__(32)));
> > > +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
> > > +  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
> > > +
> > > +void
> > > +__attribute__ ((target("avx")))
> > > +IfThenElse (__m256 no) {
> > > +  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
> > > +}
> > > +void
> > > +__attribute__ ((target("avx512vl")))
> > > +EncodedFromDisplay() {
> > > +  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
> > > +  IfThenElse(__trans_tmp_11);
> > > +}
> > > diff --git a/gcc/tree.c b/gcc/tree.c
> > > index cba3bca41b3..88c2221eabb 100644
> > > --- a/gcc/tree.c
> > > +++ b/gcc/tree.c
> > > @@ -10723,6 +10723,13 @@ signed_type_for (tree type)
> > >    return signed_or_unsigned_type_for (0, type);
> > >  }
> > >
> > > +bool
> > > +is_truth_type_for (tree type, tree truth_type)
> > > +{
> > > +  tree tmp = truth_type_for (type);
> > > +  return tmp == truth_type;
> > > +}
> >
> > The idea was to try to avoid calling truth_type to create a type.
> > Instead we can use similar logic to truth_type to tell whether type
> > has the right form.  I think the rules are:
> >
> > - For VECTOR_TYPEs:
> >   - The truth type must be a VECTOR_BOOLEAN_TYPE.
> >   - The number of elements must match (known_eq).
> >   - Also:
> >     - If !VECTOR_BOOLEAN_TYPE_P and targetm.vectorize.get_mask_mode
> >       exists, the truth type must have that mode.
> >     - Otherwise, the types must have the same size.
>
> For vector types it's
>
>   VECTOR_BOOLEAN_TYPE_P (truth_type)
>   && known_eq (number of elements)
>   && targetm.vectorize.get_mask_mode (TYPE_MODE (type)) == TYPE_MODE
> (truth_type)
>
> I think we're only interested in the case that TYPE_MODE (type) is a
> vector mode given
> we're doing optab checks.  If we want to cover !VECTOR_MODE vector type 'type'
> then it would be known_eq (component sizes) I think.
>
> Not sure if we need to test for !TYPE_UNSIGNED (truth_type).
>
> > - Otherwise, the truth type must be a BOOLEAN_TYPE.
>
> I'm not sure which case you're refering to here - when 'type' is a scalar type?
> I suppose we'd want to allow all types that trivially convert to
> boolean_type_node
Update patch.
> then which means useless_type_conversion_p (boolean_type_node, truth_type).
>
> > (Richi please correct me if I'm wrong.)
> >
> > Thanks,
> > Richard
> >
> >
> > > +
> > >  /* If TYPE is a vector type, return a signed integer vector type with the
> > >     same width and number of subparts. Otherwise return boolean_type_node.  */
> > >
> > > diff --git a/gcc/tree.h b/gcc/tree.h
> > > index 060a41f6991..c8542bfd476 100644
> > > --- a/gcc/tree.h
> > > +++ b/gcc/tree.h
> > > @@ -4556,6 +4556,7 @@ extern tree build_string_literal (unsigned, const char * = NULL,
> > >  extern tree signed_or_unsigned_type_for (int, tree);
> > >  extern tree signed_type_for (tree);
> > >  extern tree unsigned_type_for (tree);
> > > +bool is_truth_type_for (tree, tree);
> > >  extern tree truth_type_for (tree);
> > >  extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
> > >  extern tree build_pointer_type (tree);



-- 
BR,
Hongtao

[-- Attachment #2: v5-0001-Check-mask-type-when-doing-cond_op-related-gimple.patch --]
[-- Type: text/x-patch, Size: 5320 bytes --]

From 5015f89a085eeaba2e2304a37794dafdc0c455be Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Thu, 2 Sep 2021 13:05:54 +0800
Subject: [PATCH v5] Check mask type when doing cond_op related gimple
 simplification.

gcc/ChangeLog:

	PR middle-end/102080
	* match.pd: Check mask type when doing cond_op related gimple
	simplification.
	* tree.c (is_truth_type_for): New function.
	* tree.h (is_truth_type_for): New declaration.

gcc/testsuite/ChangeLog:

	PR middle-end/102080
	* gcc.target/i386/pr102080.c: New test.
---
 gcc/match.pd                             |  8 +++----
 gcc/testsuite/gcc.target/i386/pr102080.c | 19 ++++++++++++++++
 gcc/tree.c                               | 29 ++++++++++++++++++++++++
 gcc/tree.h                               |  1 +
 4 files changed, 53 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c

diff --git a/gcc/match.pd b/gcc/match.pd
index f920bc4b7c1..f76b4a7b793 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -7003,13 +7003,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
   (with { tree op_type = TREE_TYPE (@4); }
    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
-	&& element_precision (type) == element_precision (op_type))
+	&& is_truth_type_for (op_type, TREE_TYPE (@0)))
     (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
  (simplify
   (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
   (with { tree op_type = TREE_TYPE (@4); }
    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
-	&& element_precision (type) == element_precision (op_type))
+	&& is_truth_type_for (op_type, TREE_TYPE (@0)))
     (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
 
 /* Same for ternary operations.  */
@@ -7019,13 +7019,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
   (with { tree op_type = TREE_TYPE (@5); }
    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
-	&& element_precision (type) == element_precision (op_type))
+	&& is_truth_type_for (op_type, TREE_TYPE (@0)))
     (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
  (simplify
   (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
   (with { tree op_type = TREE_TYPE (@5); }
    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
-	&& element_precision (type) == element_precision (op_type))
+	&& is_truth_type_for (op_type, TREE_TYPE (@0)))
     (view_convert (cond_op (bit_not @0) @2 @3 @4
 		  (view_convert:op_type @1)))))))
 #endif
diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
new file mode 100644
index 00000000000..4c5ee32ee63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102080.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include<immintrin.h>
+typedef float __m256 __attribute__((__vector_size__(32)));
+__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
+  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
+
+void
+__attribute__ ((target("avx")))
+IfThenElse (__m256 no) {
+  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
+}
+void
+__attribute__ ((target("avx512vl")))
+EncodedFromDisplay() {
+  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
+  IfThenElse(__trans_tmp_11);
+}
diff --git a/gcc/tree.c b/gcc/tree.c
index a89d50a30ad..5aa5413fcb9 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -10723,6 +10723,35 @@ signed_type_for (tree type)
   return signed_or_unsigned_type_for (0, type);
 }
 
+/* - For VECTOR_TYPEs:
+    - The truth type must be a VECTOR_BOOLEAN_TYPE.
+    - The number of elements must match (known_eq).
+    - targetm.vectorize.get_mask_mode exists, and exactly
+      the same mode as the truth type.
+   - Otherwise, the truth type must be a BOOLEAN_TYPE
+     or useless_type_conversion_p to BOOLEAN_TYPE.  */
+bool
+is_truth_type_for (tree type, tree truth_type)
+{
+  machine_mode mask_mode = TYPE_MODE (truth_type);
+  machine_mode vmode = TYPE_MODE (type);
+  machine_mode tmask_mode;
+
+  if (TREE_CODE (type) == VECTOR_TYPE)
+    {
+      if (VECTOR_BOOLEAN_TYPE_P (truth_type)
+	  && known_eq (TYPE_VECTOR_SUBPARTS (type),
+		       TYPE_VECTOR_SUBPARTS (truth_type))
+	  && targetm.vectorize.get_mask_mode (vmode).exists (&tmask_mode)
+	  && tmask_mode == mask_mode)
+	return true;
+
+      return false;
+    }
+
+  return useless_type_conversion_p (boolean_type_node, truth_type);
+}
+
 /* If TYPE is a vector type, return a signed integer vector type with the
    same width and number of subparts. Otherwise return boolean_type_node.  */
 
diff --git a/gcc/tree.h b/gcc/tree.h
index 1559fe060a0..828025b5583 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4583,6 +4583,7 @@ extern tree build_string_literal (unsigned, const char * = NULL,
 extern tree signed_or_unsigned_type_for (int, tree);
 extern tree signed_type_for (tree);
 extern tree unsigned_type_for (tree);
+bool is_truth_type_for (tree, tree);
 extern tree truth_type_for (tree);
 extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
 extern tree build_pointer_type (tree);
-- 
2.27.0


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] Check mask type when doing cond_op related gimple simplification.
  2021-09-06  9:01                   ` Hongtao Liu
@ 2021-09-16  6:27                     ` liuhongt
  2021-09-16  8:27                       ` Richard Biener
  0 siblings, 1 reply; 14+ messages in thread
From: liuhongt @ 2021-09-16  6:27 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.guenther, richard.sandiford

Ping.

  Bootstrapped and regtest on x86_64-linux-gnu{-m32,}, aarch64-unknown-linux-gnu{-m32,}
  Ok for trunk?

gcc/ChangeLog:

	PR middle-end/102080
	* match.pd: Check mask type when doing cond_op related gimple
	simplification.
	* tree.c (is_truth_type_for): New function.
	* tree.h (is_truth_type_for): New declaration.

gcc/testsuite/ChangeLog:

	PR middle-end/102080
	* gcc.target/i386/pr102080.c: New test.
---
 gcc/match.pd                             |  8 +++----
 gcc/testsuite/gcc.target/i386/pr102080.c | 19 ++++++++++++++++
 gcc/tree.c                               | 29 ++++++++++++++++++++++++
 gcc/tree.h                               |  1 +
 4 files changed, 53 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 008f7758c96..41f9e6d97f0 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -7020,13 +7020,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
   (with { tree op_type = TREE_TYPE (@4); }
    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
-	&& element_precision (type) == element_precision (op_type))
+	&& is_truth_type_for (op_type, TREE_TYPE (@0)))
     (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
  (simplify
   (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
   (with { tree op_type = TREE_TYPE (@4); }
    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
-	&& element_precision (type) == element_precision (op_type))
+	&& is_truth_type_for (op_type, TREE_TYPE (@0)))
     (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
 
 /* Same for ternary operations.  */
@@ -7036,13 +7036,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
   (with { tree op_type = TREE_TYPE (@5); }
    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
-	&& element_precision (type) == element_precision (op_type))
+	&& is_truth_type_for (op_type, TREE_TYPE (@0)))
     (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
  (simplify
   (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
   (with { tree op_type = TREE_TYPE (@5); }
    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
-	&& element_precision (type) == element_precision (op_type))
+	&& is_truth_type_for (op_type, TREE_TYPE (@0)))
     (view_convert (cond_op (bit_not @0) @2 @3 @4
 		  (view_convert:op_type @1)))))))
 #endif
diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
new file mode 100644
index 00000000000..4c5ee32ee63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102080.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include<immintrin.h>
+typedef float __m256 __attribute__((__vector_size__(32)));
+__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
+  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
+
+void
+__attribute__ ((target("avx")))
+IfThenElse (__m256 no) {
+  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
+}
+void
+__attribute__ ((target("avx512vl")))
+EncodedFromDisplay() {
+  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
+  IfThenElse(__trans_tmp_11);
+}
diff --git a/gcc/tree.c b/gcc/tree.c
index 3d15948fd1a..994775d7314 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -10737,6 +10737,35 @@ signed_type_for (tree type)
   return signed_or_unsigned_type_for (0, type);
 }
 
+/* - For VECTOR_TYPEs:
+    - The truth type must be a VECTOR_BOOLEAN_TYPE.
+    - The number of elements must match (known_eq).
+    - targetm.vectorize.get_mask_mode exists, and exactly
+      the same mode as the truth type.
+   - Otherwise, the truth type must be a BOOLEAN_TYPE
+     or useless_type_conversion_p to BOOLEAN_TYPE.  */
+bool
+is_truth_type_for (tree type, tree truth_type)
+{
+  machine_mode mask_mode = TYPE_MODE (truth_type);
+  machine_mode vmode = TYPE_MODE (type);
+  machine_mode tmask_mode;
+
+  if (TREE_CODE (type) == VECTOR_TYPE)
+    {
+      if (VECTOR_BOOLEAN_TYPE_P (truth_type)
+	  && known_eq (TYPE_VECTOR_SUBPARTS (type),
+		       TYPE_VECTOR_SUBPARTS (truth_type))
+	  && targetm.vectorize.get_mask_mode (vmode).exists (&tmask_mode)
+	  && tmask_mode == mask_mode)
+	return true;
+
+      return false;
+    }
+
+  return useless_type_conversion_p (boolean_type_node, truth_type);
+}
+
 /* If TYPE is a vector type, return a signed integer vector type with the
    same width and number of subparts. Otherwise return boolean_type_node.  */
 
diff --git a/gcc/tree.h b/gcc/tree.h
index 7274ba75f59..06c027da450 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4591,6 +4591,7 @@ extern tree build_string_literal (unsigned, const char * = NULL,
 extern tree signed_or_unsigned_type_for (int, tree);
 extern tree signed_type_for (tree);
 extern tree unsigned_type_for (tree);
+bool is_truth_type_for (tree, tree);
 extern tree truth_type_for (tree);
 extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
 extern tree build_pointer_type (tree);
-- 
2.27.0


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Check mask type when doing cond_op related gimple simplification.
  2021-09-16  6:27                     ` [PATCH] Check mask type when doing cond_op related gimple simplification liuhongt
@ 2021-09-16  8:27                       ` Richard Biener
  0 siblings, 0 replies; 14+ messages in thread
From: Richard Biener @ 2021-09-16  8:27 UTC (permalink / raw)
  To: liuhongt; +Cc: GCC Patches, Richard Sandiford

On Thu, Sep 16, 2021 at 8:27 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> Ping.
>
>   Bootstrapped and regtest on x86_64-linux-gnu{-m32,}, aarch64-unknown-linux-gnu{-m32,}
>   Ok for trunk?
>
> gcc/ChangeLog:
>
>         PR middle-end/102080
>         * match.pd: Check mask type when doing cond_op related gimple
>         simplification.
>         * tree.c (is_truth_type_for): New function.
>         * tree.h (is_truth_type_for): New declaration.
>
> gcc/testsuite/ChangeLog:
>
>         PR middle-end/102080
>         * gcc.target/i386/pr102080.c: New test.
> ---
>  gcc/match.pd                             |  8 +++----
>  gcc/testsuite/gcc.target/i386/pr102080.c | 19 ++++++++++++++++
>  gcc/tree.c                               | 29 ++++++++++++++++++++++++
>  gcc/tree.h                               |  1 +
>  4 files changed, 53 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 008f7758c96..41f9e6d97f0 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -7020,13 +7020,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
>    (with { tree op_type = TREE_TYPE (@4); }
>     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> -       && element_precision (type) == element_precision (op_type))
> +       && is_truth_type_for (op_type, TREE_TYPE (@0)))
>      (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
>   (simplify
>    (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
>    (with { tree op_type = TREE_TYPE (@4); }
>     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> -       && element_precision (type) == element_precision (op_type))
> +       && is_truth_type_for (op_type, TREE_TYPE (@0)))
>      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
>
>  /* Same for ternary operations.  */
> @@ -7036,13 +7036,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
>    (with { tree op_type = TREE_TYPE (@5); }
>     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> -       && element_precision (type) == element_precision (op_type))
> +       && is_truth_type_for (op_type, TREE_TYPE (@0)))
>      (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
>   (simplify
>    (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
>    (with { tree op_type = TREE_TYPE (@5); }
>     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> -       && element_precision (type) == element_precision (op_type))
> +       && is_truth_type_for (op_type, TREE_TYPE (@0)))
>      (view_convert (cond_op (bit_not @0) @2 @3 @4
>                   (view_convert:op_type @1)))))))
>  #endif
> diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
> new file mode 100644
> index 00000000000..4c5ee32ee63
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102080.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include<immintrin.h>
> +typedef float __m256 __attribute__((__vector_size__(32)));
> +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
> +  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
> +
> +void
> +__attribute__ ((target("avx")))
> +IfThenElse (__m256 no) {
> +  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
> +}
> +void
> +__attribute__ ((target("avx512vl")))
> +EncodedFromDisplay() {
> +  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
> +  IfThenElse(__trans_tmp_11);
> +}
> diff --git a/gcc/tree.c b/gcc/tree.c
> index 3d15948fd1a..994775d7314 100644
> --- a/gcc/tree.c
> +++ b/gcc/tree.c
> @@ -10737,6 +10737,35 @@ signed_type_for (tree type)
>    return signed_or_unsigned_type_for (0, type);
>  }
>
> +/* - For VECTOR_TYPEs:
> +    - The truth type must be a VECTOR_BOOLEAN_TYPE.
> +    - The number of elements must match (known_eq).
> +    - targetm.vectorize.get_mask_mode exists, and exactly
> +      the same mode as the truth type.
> +   - Otherwise, the truth type must be a BOOLEAN_TYPE
> +     or useless_type_conversion_p to BOOLEAN_TYPE.  */
> +bool
> +is_truth_type_for (tree type, tree truth_type)
> +{
> +  machine_mode mask_mode = TYPE_MODE (truth_type);
> +  machine_mode vmode = TYPE_MODE (type);
> +  machine_mode tmask_mode;
> +
> +  if (TREE_CODE (type) == VECTOR_TYPE)
> +    {
> +      if (VECTOR_BOOLEAN_TYPE_P (truth_type)
> +         && known_eq (TYPE_VECTOR_SUBPARTS (type),
> +                      TYPE_VECTOR_SUBPARTS (truth_type))
> +         && targetm.vectorize.get_mask_mode (vmode).exists (&tmask_mode)
> +         && tmask_mode == mask_mode)
> +       return true;
> +
> +      return false;
> +    }
> +
> +  return useless_type_conversion_p (boolean_type_node, truth_type);
> +}
> +
>  /* If TYPE is a vector type, return a signed integer vector type with the
>     same width and number of subparts. Otherwise return boolean_type_node.  */
>
> diff --git a/gcc/tree.h b/gcc/tree.h
> index 7274ba75f59..06c027da450 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -4591,6 +4591,7 @@ extern tree build_string_literal (unsigned, const char * = NULL,
>  extern tree signed_or_unsigned_type_for (int, tree);
>  extern tree signed_type_for (tree);
>  extern tree unsigned_type_for (tree);
> +bool is_truth_type_for (tree, tree);

just for style add 'extern' to follow surrounding code.

The patch looks OK to me with that change.

Thanks,
Richard.

>  extern tree truth_type_for (tree);
>  extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
>  extern tree build_pointer_type (tree);
> --
> 2.27.0
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2021-09-16  8:27 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-27  6:52 [PATCH] Check the type of mask while generating cond_op in gimple simplication liuhongt
2021-08-30 12:24 ` Richard Biener
2021-08-31 10:24   ` Hongtao Liu
2021-08-31 11:56     ` Richard Biener
2021-09-01  6:33       ` Hongtao Liu
2021-09-01 11:14         ` Richard Biener
2021-09-01 12:52           ` Richard Sandiford
2021-09-01 13:33             ` Richard Biener
2021-09-02  6:42             ` Hongtao Liu
2021-09-02 17:54               ` Richard Sandiford
2021-09-06  6:54                 ` Richard Biener
2021-09-06  9:01                   ` Hongtao Liu
2021-09-16  6:27                     ` [PATCH] Check mask type when doing cond_op related gimple simplification liuhongt
2021-09-16  8:27                       ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).