public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Optimize A < B ? A : B to MIN_EXPR.
@ 2023-12-19  5:38 liuhongt
  2023-12-19 12:48 ` Richard Biener
  0 siblings, 1 reply; 4+ messages in thread
From: liuhongt @ 2023-12-19  5:38 UTC (permalink / raw)
  To: gcc-patches; +Cc: crazylht, hjl.tools

Similar for A < B ? B : A to MAX_EXPR.
There're codes in the frontend to optimize such pattern but failed to
handle testcase in the PR since it's exposed at gimple level when
folding backend builtins.

pr95906 now can be optimized to MAX_EXPR as it's commented in the
testcase.

// FIXME: this should further optimize to a MAX_EXPR
 typedef signed char v16i8 __attribute__((vector_size(16)));
 v16i8 f(v16i8 a, v16i8 b)


Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk? (or maybe wait for GCC 15).

gcc/ChangeLog:

	PR target/104401
	* match.pd (A < B ? A : B -> MIN_EXPR): New patten match.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr104401.c: New test.
	* gcc.dg/tree-ssa/pr95906.c: Adjust testcase.
---
 gcc/match.pd                             | 20 ++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr95906.c  |  3 +--
 gcc/testsuite/gcc.target/i386/pr104401.c | 27 ++++++++++++++++++++++++
 3 files changed, 48 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr104401.c

diff --git a/gcc/match.pd b/gcc/match.pd
index d57e29bfe1d..9584a70aa3d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5263,6 +5263,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      (view_convert:type
        (vec_cond @4 (view_convert:vtype @2) (view_convert:vtype @3)))))))
 
+/* Optimize A < B ? A : B to MIN (A, B)
+	    A > B ? A : B to MAX (A, B).  */
+(for cmp (lt le gt ge)
+     minmax (min min max max)
+     MINMAX (MIN_EXPR MIN_EXPR MAX_EXPR MAX_EXPR)
+ (simplify
+  (vec_cond (cmp @0 @1) @0 @1)
+   (if (VECTOR_INTEGER_TYPE_P (type)
+       && target_supports_op_p (type, MINMAX, optab_vector))
+    (minmax @0 @1))))
+
+(for cmp (lt le gt ge)
+     minmax (max max min min)
+     MINMAX (MAX_EXPR MAX_EXPR MIN_EXPR MIN_EXPR)
+ (simplify
+  (vec_cond (cmp @0 @1) @1 @0)
+   (if (VECTOR_INTEGER_TYPE_P (type)
+       && target_supports_op_p (type, MINMAX, optab_vector))
+    (minmax @0 @1))))
+
 /* c1 ? c2 ? a : b : b  -->  (c1 & c2) ? a : b  */
 (simplify
  (vec_cond @0 (vec_cond:s @1 @2 @3) @3)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
index 3d820a58e93..d15670f3e9e 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
@@ -1,7 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -fdump-tree-forwprop3-raw -w -Wno-psabi" } */
 
-// FIXME: this should further optimize to a MAX_EXPR
 typedef signed char v16i8 __attribute__((vector_size(16)));
 v16i8 f(v16i8 a, v16i8 b)
 {
@@ -10,4 +9,4 @@ v16i8 f(v16i8 a, v16i8 b)
 }
 
 /* { dg-final { scan-tree-dump-not "bit_(and|ior)_expr" "forwprop3" } } */
-/* { dg-final { scan-tree-dump-times "vec_cond_expr" 1 "forwprop3" } } */
+/* { dg-final { scan-tree-dump-times "max_expr" 1 "forwprop3" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr104401.c b/gcc/testsuite/gcc.target/i386/pr104401.c
new file mode 100644
index 00000000000..8ce7ff88d9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr104401.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1" } */
+/* { dg-final { scan-assembler-times "pminsd" 2 } } */
+/* { dg-final { scan-assembler-times "pmaxsd" 2 } } */
+
+#include <smmintrin.h>
+
+__m128i min32(__m128i value, __m128i input)
+{
+  return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input));
+}
+
+__m128i max32(__m128i value, __m128i input)
+{
+  return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(value, input));
+}
+
+__m128i min32_1(__m128i value, __m128i input)
+{
+  return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(input, value));
+}
+
+__m128i max32_1(__m128i value, __m128i input)
+{
+  return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(input, value));
+}
+
-- 
2.31.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Optimize A < B ? A : B to MIN_EXPR.
  2023-12-19  5:38 [PATCH] Optimize A < B ? A : B to MIN_EXPR liuhongt
@ 2023-12-19 12:48 ` Richard Biener
  2024-01-09 10:46   ` liuhongt
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Biener @ 2023-12-19 12:48 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches, crazylht, hjl.tools

On Tue, Dec 19, 2023 at 6:39 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> Similar for A < B ? B : A to MAX_EXPR.
> There're codes in the frontend to optimize such pattern but failed to
> handle testcase in the PR since it's exposed at gimple level when
> folding backend builtins.
>
> pr95906 now can be optimized to MAX_EXPR as it's commented in the
> testcase.
>
> // FIXME: this should further optimize to a MAX_EXPR
>  typedef signed char v16i8 __attribute__((vector_size(16)));
>  v16i8 f(v16i8 a, v16i8 b)
>
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk? (or maybe wait for GCC 15).

I wonder if you can amend the existing patterns instead by iterating
over cond/vec_cond.  There are quite some (look for uses of
minmax_from_comparison) that could be adapted to vectors.

The ones matching the simple form you match are

#if GIMPLE
/* A >= B ? A : B -> max (A, B) and friends.  The code is still
   in fold_cond_expr_with_comparison for GENERIC folding with
   some extra constraints.  */
(for cmp (eq ne le lt unle unlt ge gt unge ungt uneq ltgt)
 (simplify
  (cond (cmp:c (nop_convert1?@c0 @0) (nop_convert2?@c1 @1))
        (convert3? @0) (convert4? @1))
  (if (!HONOR_SIGNED_ZEROS (type)
...

I think.  Consider at least placing the new patterns next to that.

> gcc/ChangeLog:
>
>         PR target/104401
>         * match.pd (A < B ? A : B -> MIN_EXPR): New patten match.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr104401.c: New test.
>         * gcc.dg/tree-ssa/pr95906.c: Adjust testcase.
> ---
>  gcc/match.pd                             | 20 ++++++++++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr95906.c  |  3 +--
>  gcc/testsuite/gcc.target/i386/pr104401.c | 27 ++++++++++++++++++++++++
>  3 files changed, 48 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr104401.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index d57e29bfe1d..9584a70aa3d 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -5263,6 +5263,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>       (view_convert:type
>         (vec_cond @4 (view_convert:vtype @2) (view_convert:vtype @3)))))))
>
> +/* Optimize A < B ? A : B to MIN (A, B)
> +           A > B ? A : B to MAX (A, B).  */
> +(for cmp (lt le gt ge)
> +     minmax (min min max max)
> +     MINMAX (MIN_EXPR MIN_EXPR MAX_EXPR MAX_EXPR)
> + (simplify
> +  (vec_cond (cmp @0 @1) @0 @1)
> +   (if (VECTOR_INTEGER_TYPE_P (type)
> +       && target_supports_op_p (type, MINMAX, optab_vector))
> +    (minmax @0 @1))))
> +
> +(for cmp (lt le gt ge)
> +     minmax (max max min min)
> +     MINMAX (MAX_EXPR MAX_EXPR MIN_EXPR MIN_EXPR)
> + (simplify
> +  (vec_cond (cmp @0 @1) @1 @0)
> +   (if (VECTOR_INTEGER_TYPE_P (type)
> +       && target_supports_op_p (type, MINMAX, optab_vector))
> +    (minmax @0 @1))))
> +
>  /* c1 ? c2 ? a : b : b  -->  (c1 & c2) ? a : b  */
>  (simplify
>   (vec_cond @0 (vec_cond:s @1 @2 @3) @3)
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
> index 3d820a58e93..d15670f3e9e 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
> @@ -1,7 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -fdump-tree-forwprop3-raw -w -Wno-psabi" } */
>
> -// FIXME: this should further optimize to a MAX_EXPR
>  typedef signed char v16i8 __attribute__((vector_size(16)));
>  v16i8 f(v16i8 a, v16i8 b)
>  {
> @@ -10,4 +9,4 @@ v16i8 f(v16i8 a, v16i8 b)
>  }
>
>  /* { dg-final { scan-tree-dump-not "bit_(and|ior)_expr" "forwprop3" } } */
> -/* { dg-final { scan-tree-dump-times "vec_cond_expr" 1 "forwprop3" } } */
> +/* { dg-final { scan-tree-dump-times "max_expr" 1 "forwprop3" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr104401.c b/gcc/testsuite/gcc.target/i386/pr104401.c
> new file mode 100644
> index 00000000000..8ce7ff88d9e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr104401.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse4.1" } */
> +/* { dg-final { scan-assembler-times "pminsd" 2 } } */
> +/* { dg-final { scan-assembler-times "pmaxsd" 2 } } */
> +
> +#include <smmintrin.h>
> +
> +__m128i min32(__m128i value, __m128i input)
> +{
> +  return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input));
> +}
> +
> +__m128i max32(__m128i value, __m128i input)
> +{
> +  return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(value, input));
> +}
> +
> +__m128i min32_1(__m128i value, __m128i input)
> +{
> +  return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(input, value));
> +}
> +
> +__m128i max32_1(__m128i value, __m128i input)
> +{
> +  return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(input, value));
> +}
> +
> --
> 2.31.1
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] Optimize A < B ? A : B to MIN_EXPR.
  2023-12-19 12:48 ` Richard Biener
@ 2024-01-09 10:46   ` liuhongt
  2024-01-10 12:01     ` Richard Biener
  0 siblings, 1 reply; 4+ messages in thread
From: liuhongt @ 2024-01-09 10:46 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.guenther

> I wonder if you can amend the existing patterns instead by iterating
> over cond/vec_cond.  There are quite some (look for uses of
> minmax_from_comparison) that could be adapted to vectors.
>
> The ones matching the simple form you match are
>
> #if GIMPLE
> /* A >= B ? A : B -> max (A, B) and friends.  The code is still
>    in fold_cond_expr_with_comparison for GENERIC folding with
>    some extra constraints.  */
> (for cmp (eq ne le lt unle unlt ge gt unge ungt uneq ltgt)
>  (simplify
>   (cond (cmp:c (nop_convert1?@c0 @0) (nop_convert2?@c1 @1))
>         (convert3? @0) (convert4? @1))
>   (if (!HONOR_SIGNED_ZEROS (type)
> ...
This pattern is a conditional operation that treats a vector as a complete
unit, it's more like cbranchm which is different from vec_cond_expr.
So I add my patterns after this.
>
> I think.  Consider at least placing the new patterns next to that.


Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

Similar for A < B ? B : A to MAX_EXPR.
There're codes in the frontend to optimize such pattern but failed to
handle testcase in the PR since it's exposed at gimple level when
folding backend builtins.

pr95906 now can be optimized to MAX_EXPR as it's commented in the
testcase.

// FIXME: this should further optimize to a MAX_EXPR
 typedef signed char v16i8 __attribute__((vector_size(16)));
 v16i8 f(v16i8 a, v16i8 b)

gcc/ChangeLog:

	PR target/104401
	* match.pd (VEC_COND_EXPR: A < B ? A : B -> MIN_EXPR): New patten match.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr104401.c: New test.
	* gcc.dg/tree-ssa/pr95906.c: Adjust testcase.
---
 gcc/match.pd                             | 21 ++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr95906.c  |  3 +--
 gcc/testsuite/gcc.target/i386/pr104401.c | 27 ++++++++++++++++++++++++
 3 files changed, 49 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr104401.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 7b4b15acc41..d8e2009a83f 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5672,6 +5672,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       (if (VECTOR_TYPE_P (type))
        (view_convert @c0)
        (convert @c0))))))))
+
+/* This is for VEC_COND_EXPR
+   Optimize A < B ? A : B to MIN (A, B)
+	    A > B ? A : B to MAX (A, B).  */
+(for cmp (lt le ungt unge gt ge unlt unle)
+     minmax (min min min min max max max max)
+     MINMAX (MIN_EXPR MIN_EXPR MIN_EXPR MIN_EXPR MAX_EXPR MAX_EXPR MAX_EXPR MAX_EXPR)
+ (simplify
+  (vec_cond (cmp @0 @1) @0 @1)
+   (if (VECTOR_INTEGER_TYPE_P (type)
+       && target_supports_op_p (type, MINMAX, optab_vector))
+    (minmax @0 @1))))
+
+(for cmp (lt le ungt unge gt ge unlt unle)
+     minmax (max max max max min min min min)
+     MINMAX (MAX_EXPR MAX_EXPR MAX_EXPR MAX_EXPR MIN_EXPR MIN_EXPR MIN_EXPR MIN_EXPR)
+ (simplify
+  (vec_cond (cmp @0 @1) @1 @0)
+   (if (VECTOR_INTEGER_TYPE_P (type)
+       && target_supports_op_p (type, MINMAX, optab_vector))
+    (minmax @0 @1))))
 #endif
 
 (for cnd (cond vec_cond)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
index 3d820a58e93..d15670f3e9e 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
@@ -1,7 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -fdump-tree-forwprop3-raw -w -Wno-psabi" } */
 
-// FIXME: this should further optimize to a MAX_EXPR
 typedef signed char v16i8 __attribute__((vector_size(16)));
 v16i8 f(v16i8 a, v16i8 b)
 {
@@ -10,4 +9,4 @@ v16i8 f(v16i8 a, v16i8 b)
 }
 
 /* { dg-final { scan-tree-dump-not "bit_(and|ior)_expr" "forwprop3" } } */
-/* { dg-final { scan-tree-dump-times "vec_cond_expr" 1 "forwprop3" } } */
+/* { dg-final { scan-tree-dump-times "max_expr" 1 "forwprop3" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr104401.c b/gcc/testsuite/gcc.target/i386/pr104401.c
new file mode 100644
index 00000000000..8ce7ff88d9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr104401.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1" } */
+/* { dg-final { scan-assembler-times "pminsd" 2 } } */
+/* { dg-final { scan-assembler-times "pmaxsd" 2 } } */
+
+#include <smmintrin.h>
+
+__m128i min32(__m128i value, __m128i input)
+{
+  return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input));
+}
+
+__m128i max32(__m128i value, __m128i input)
+{
+  return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(value, input));
+}
+
+__m128i min32_1(__m128i value, __m128i input)
+{
+  return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(input, value));
+}
+
+__m128i max32_1(__m128i value, __m128i input)
+{
+  return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(input, value));
+}
+
-- 
2.31.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Optimize A < B ? A : B to MIN_EXPR.
  2024-01-09 10:46   ` liuhongt
@ 2024-01-10 12:01     ` Richard Biener
  0 siblings, 0 replies; 4+ messages in thread
From: Richard Biener @ 2024-01-10 12:01 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches

On Tue, Jan 9, 2024 at 11:48 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> > I wonder if you can amend the existing patterns instead by iterating
> > over cond/vec_cond.  There are quite some (look for uses of
> > minmax_from_comparison) that could be adapted to vectors.
> >
> > The ones matching the simple form you match are
> >
> > #if GIMPLE
> > /* A >= B ? A : B -> max (A, B) and friends.  The code is still
> >    in fold_cond_expr_with_comparison for GENERIC folding with
> >    some extra constraints.  */
> > (for cmp (eq ne le lt unle unlt ge gt unge ungt uneq ltgt)
> >  (simplify
> >   (cond (cmp:c (nop_convert1?@c0 @0) (nop_convert2?@c1 @1))
> >         (convert3? @0) (convert4? @1))
> >   (if (!HONOR_SIGNED_ZEROS (type)
> > ...
> This pattern is a conditional operation that treats a vector as a complete
> unit, it's more like cbranchm which is different from vec_cond_expr.
> So I add my patterns after this.
> >
> > I think.  Consider at least placing the new patterns next to that.
>
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?

OK.

Richard.

> Similar for A < B ? B : A to MAX_EXPR.
> There're codes in the frontend to optimize such pattern but failed to
> handle testcase in the PR since it's exposed at gimple level when
> folding backend builtins.
>
> pr95906 now can be optimized to MAX_EXPR as it's commented in the
> testcase.
>
> // FIXME: this should further optimize to a MAX_EXPR
>  typedef signed char v16i8 __attribute__((vector_size(16)));
>  v16i8 f(v16i8 a, v16i8 b)
>
> gcc/ChangeLog:
>
>         PR target/104401
>         * match.pd (VEC_COND_EXPR: A < B ? A : B -> MIN_EXPR): New patten match.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr104401.c: New test.
>         * gcc.dg/tree-ssa/pr95906.c: Adjust testcase.
> ---
>  gcc/match.pd                             | 21 ++++++++++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr95906.c  |  3 +--
>  gcc/testsuite/gcc.target/i386/pr104401.c | 27 ++++++++++++++++++++++++
>  3 files changed, 49 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr104401.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 7b4b15acc41..d8e2009a83f 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -5672,6 +5672,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>        (if (VECTOR_TYPE_P (type))
>         (view_convert @c0)
>         (convert @c0))))))))
> +
> +/* This is for VEC_COND_EXPR
> +   Optimize A < B ? A : B to MIN (A, B)
> +           A > B ? A : B to MAX (A, B).  */
> +(for cmp (lt le ungt unge gt ge unlt unle)
> +     minmax (min min min min max max max max)
> +     MINMAX (MIN_EXPR MIN_EXPR MIN_EXPR MIN_EXPR MAX_EXPR MAX_EXPR MAX_EXPR MAX_EXPR)
> + (simplify
> +  (vec_cond (cmp @0 @1) @0 @1)
> +   (if (VECTOR_INTEGER_TYPE_P (type)
> +       && target_supports_op_p (type, MINMAX, optab_vector))
> +    (minmax @0 @1))))
> +
> +(for cmp (lt le ungt unge gt ge unlt unle)
> +     minmax (max max max max min min min min)
> +     MINMAX (MAX_EXPR MAX_EXPR MAX_EXPR MAX_EXPR MIN_EXPR MIN_EXPR MIN_EXPR MIN_EXPR)
> + (simplify
> +  (vec_cond (cmp @0 @1) @1 @0)
> +   (if (VECTOR_INTEGER_TYPE_P (type)
> +       && target_supports_op_p (type, MINMAX, optab_vector))
> +    (minmax @0 @1))))
>  #endif
>
>  (for cnd (cond vec_cond)
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
> index 3d820a58e93..d15670f3e9e 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
> @@ -1,7 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -fdump-tree-forwprop3-raw -w -Wno-psabi" } */
>
> -// FIXME: this should further optimize to a MAX_EXPR
>  typedef signed char v16i8 __attribute__((vector_size(16)));
>  v16i8 f(v16i8 a, v16i8 b)
>  {
> @@ -10,4 +9,4 @@ v16i8 f(v16i8 a, v16i8 b)
>  }
>
>  /* { dg-final { scan-tree-dump-not "bit_(and|ior)_expr" "forwprop3" } } */
> -/* { dg-final { scan-tree-dump-times "vec_cond_expr" 1 "forwprop3" } } */
> +/* { dg-final { scan-tree-dump-times "max_expr" 1 "forwprop3" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr104401.c b/gcc/testsuite/gcc.target/i386/pr104401.c
> new file mode 100644
> index 00000000000..8ce7ff88d9e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr104401.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse4.1" } */
> +/* { dg-final { scan-assembler-times "pminsd" 2 } } */
> +/* { dg-final { scan-assembler-times "pmaxsd" 2 } } */
> +
> +#include <smmintrin.h>
> +
> +__m128i min32(__m128i value, __m128i input)
> +{
> +  return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input));
> +}
> +
> +__m128i max32(__m128i value, __m128i input)
> +{
> +  return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(value, input));
> +}
> +
> +__m128i min32_1(__m128i value, __m128i input)
> +{
> +  return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(input, value));
> +}
> +
> +__m128i max32_1(__m128i value, __m128i input)
> +{
> +  return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(input, value));
> +}
> +
> --
> 2.31.1
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-01-10 12:06 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-19  5:38 [PATCH] Optimize A < B ? A : B to MIN_EXPR liuhongt
2023-12-19 12:48 ` Richard Biener
2024-01-09 10:46   ` liuhongt
2024-01-10 12:01     ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).