public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v1] Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar int
@ 2024-06-26  1:45 pan2.li
  2024-06-26 13:51 ` Richard Biener
  2024-06-27  5:12 ` [PATCH v2] " pan2.li
  0 siblings, 2 replies; 6+ messages in thread
From: pan2.li @ 2024-06-26  1:45 UTC (permalink / raw)
  To: gcc-patches
  Cc: juzhe.zhong, kito.cheng, richard.guenther, jeffreyalaw,
	rdapp.gcc, Pan Li

From: Pan Li <pan2.li@intel.com>

This patch would like to add the middle-end presentation for the
saturation truncation.  Aka set the result of truncated value to
the max value when overflow.  It will take the pattern similar
as below.

Form 1:
  #define DEF_SAT_U_TRUC_FMT_1(WT, NT) \
  NT __attribute__((noinline))         \
  sat_u_truc_##T##_fmt_1 (WT x)        \
  {                                    \
    bool overflow = x > (WT)(NT)(-1);  \
    return ((NT)x) | (NT)-overflow;    \
  }

For example, truncated uint16_t to uint8_t, we have

* SAT_TRUNC (254)   => 254
* SAT_TRUNC (255)   => 255
* SAT_TRUNC (256)   => 255
* SAT_TRUNC (65536) => 255

Given below SAT_TRUNC from uint64_t to uint32_t.

DEF_SAT_U_TRUC_FMT_1 (uint64_t, uint32_t)

Before this patch:
__attribute__((noinline))
uint32_t sat_u_truc_T_fmt_1 (uint64_t x)
{
  _Bool overflow;
  unsigned int _1;
  unsigned int _2;
  unsigned int _3;
  uint32_t _6;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  overflow_5 = x_4(D) > 4294967295;
  _1 = (unsigned int) x_4(D);
  _2 = (unsigned int) overflow_5;
  _3 = -_2;
  _6 = _1 | _3;
  return _6;
;;    succ:       EXIT

}

After this patch:
__attribute__((noinline))
uint32_t sat_u_truc_T_fmt_1 (uint64_t x)
{
  uint32_t _6;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _6 = .SAT_TRUNC (x_4(D)); [tail call]
  return _6;
;;    succ:       EXIT

}

The below tests are passed for this patch:
*. The rv64gcv fully regression tests.
*. The rv64gcv build with glibc.
*. The x86 bootstrap tests.
*. The x86 fully regression tests.

gcc/ChangeLog:

	* internal-fn.def (SAT_TRUNC): Add new signed IFN sat_trunc as
	unary_convert.
	* match.pd: Add new matching pattern for unsigned int sat_trunc.
	* optabs.def (OPTAB_CL): Add unsigned and signed optab.
	* tree-ssa-math-opts.cc (gimple_unsigend_integer_sat_trunc): Add
	new decl for the matching pattern generated func.
	(match_unsigned_saturation_trunc): Add new func impl to match
	the .SAT_TRUNC.
	(math_opts_dom_walker::after_dom_children): Add .SAT_TRUNC match
	function under BIT_IOR_EXPR case.
	* tree.cc (integer_half_truncated_all_ones_p): Add new func impl
	to filter the truncated threshold.
	* tree.h (integer_half_truncated_all_ones_p): Add new func decl.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/internal-fn.def       |  2 ++
 gcc/match.pd              | 12 +++++++++++-
 gcc/optabs.def            |  3 +++
 gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++++++++++
 gcc/tree.cc               | 22 ++++++++++++++++++++++
 gcc/tree.h                |  6 ++++++
 6 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index a8c83437ada..915d329c05a 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -278,6 +278,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
 DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
 DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_TRUNC, ECF_CONST, first, sstrunc, ustrunc, unary_convert)
+
 DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
 DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
 DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
diff --git a/gcc/match.pd b/gcc/match.pd
index 3d0689c9312..d4062434cc7 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -39,7 +39,8 @@ along with GCC; see the file COPYING3.  If not see
    HONOR_NANS
    uniform_vector_p
    expand_vec_cmp_expr_p
-   bitmask_inv_cst_vector_p)
+   bitmask_inv_cst_vector_p
+   integer_half_truncated_all_ones_p)
 
 /* Operator lists.  */
 (define_operator_list tcc_comparison
@@ -3210,6 +3211,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* Unsigned saturation truncate, case 1 (), sizeof (WT) > sizeof (NT).
+   SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))).  */
+(match (unsigend_integer_sat_trunc @0)
+ (bit_ior:c (negate (convert (gt @0 integer_half_truncated_all_ones_p)))
+   (convert @0))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+      && TYPE_UNSIGNED (TREE_TYPE (@0))
+      && tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0))))))
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
    x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index bc2611abdc2..4eaffe96c19 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -63,6 +63,9 @@ OPTAB_CX(fractuns_optab, "fractuns$Q$b$I$a2")
 OPTAB_CL(satfract_optab, "satfract$b$Q$a2", SAT_FRACT, "satfract", gen_satfract_conv_libfunc)
 OPTAB_CL(satfractuns_optab, "satfractuns$I$b$Q$a2", UNSIGNED_SAT_FRACT, "satfractuns", gen_satfractuns_conv_libfunc)
 
+OPTAB_CL(ustrunc_optab, "ustrunc$b$a2", US_TRUNCATE, "ustrunc", gen_satfract_conv_libfunc)
+OPTAB_CL(sstrunc_optab, "sstrunc$b$a2", SS_TRUNCATE, "sstrunc", gen_satfract_conv_libfunc)
+
 OPTAB_CD(sfixtrunc_optab, "fix_trunc$F$b$I$a2")
 OPTAB_CD(ufixtrunc_optab, "fixuns_trunc$F$b$I$a2")
 
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 57085488722..64bc70c29b3 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4088,6 +4088,7 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
 
 extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
+extern bool gimple_unsigend_integer_sat_trunc (tree, tree*, tree (*)(tree));
 
 static void
 build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
@@ -4216,6 +4217,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
 					ops[0], ops[1]);
 }
 
+/*
+ * Try to match saturation unsigned sub.
+ * uint16_t x_4(D);
+ * uint8_t _6;
+ * overflow_5 = x_4(D) > 255;
+ * _1 = (unsigned char) x_4(D);
+ * _2 = (unsigned char) overflow_5;
+ * _3 = -_2;
+ * _6 = _1 | _3;
+ * =>
+ * _6 = .SAT_TRUNC (x_4(D));
+ * */
+static void
+match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt)
+{
+  tree ops[1];
+  tree lhs = gimple_assign_lhs (stmt);
+  tree type = TREE_TYPE (lhs);
+
+  if (gimple_unsigend_integer_sat_trunc (lhs, ops, NULL)
+    && direct_internal_fn_supported_p (IFN_SAT_TRUNC,
+				       tree_pair (type, TREE_TYPE (ops[0])),
+				       OPTIMIZE_FOR_BOTH))
+    {
+      gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
+      gimple_call_set_lhs (call, lhs);
+      gsi_replace (gsi, call, /* update_eh_info */ true);
+    }
+}
+
 /* Recognize for unsigned x
    x = y - z;
    if (x > y)
@@ -6188,6 +6219,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
 
 	    case BIT_IOR_EXPR:
 	      match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt));
+	      match_unsigned_saturation_trunc (&gsi, as_a<gassign *> (stmt));
 	      /* fall-through  */
 	    case BIT_XOR_EXPR:
 	      match_uaddc_usubc (&gsi, stmt, code);
diff --git a/gcc/tree.cc b/gcc/tree.cc
index 2d2d5b6db6e..4572e6fc42b 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -2944,6 +2944,28 @@ integer_all_onesp (const_tree expr)
 	  == wi::to_wide (expr));
 }
 
+/* Return true if EXPR is an integer constant of all ones with half
+   truncated in precision.  Or return false.  For example:
+   uint16_t a = 255;   // true.
+   uint16_t b = 0;     // false.
+   uint16_t c = 65545; // false.  */
+bool
+integer_half_truncated_all_ones_p (const_tree expr)
+{
+  if (TREE_CODE (expr) != INTEGER_CST)
+    return false;
+
+  unsigned precision = TYPE_PRECISION (TREE_TYPE (expr));
+
+  gcc_assert (precision <= 64);
+
+  unsigned trunc_prec = precision / 2;
+  wide_int trunc_max = wi::uhwi ((uint64_t)-1 >> (64 - trunc_prec), precision);
+  wide_int expr_int = wi::to_wide (expr, precision);
+
+  return trunc_max == expr_int;
+}
+
 /* Return true if EXPR is the integer constant minus one, or a location
    wrapper for such a constant.  */
 
diff --git a/gcc/tree.h b/gcc/tree.h
index 28e8e71b036..0237826dd23 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -5205,6 +5205,12 @@ extern bool integer_each_onep (const_tree);
 
 extern bool integer_all_onesp (const_tree);
 
+/* integer_half_truncated_all_ones_p (tree x) will return true if x is
+   the integer constant that the half truncated bits are all 1.
+   For example, uint16_t type with 255 constant integer will be true.  */
+
+extern bool integer_half_truncated_all_ones_p (const_tree expr);
+
 /* integer_minus_onep (tree x) is nonzero if X is an integer constant of
    value -1.  */
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v1] Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar int
  2024-06-26  1:45 [PATCH v1] Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar int pan2.li
@ 2024-06-26 13:51 ` Richard Biener
  2024-06-26 14:18   ` Li, Pan2
  2024-06-27  5:12 ` [PATCH v2] " pan2.li
  1 sibling, 1 reply; 6+ messages in thread
From: Richard Biener @ 2024-06-26 13:51 UTC (permalink / raw)
  To: pan2.li; +Cc: gcc-patches, juzhe.zhong, kito.cheng, jeffreyalaw, rdapp.gcc

On Wed, Jun 26, 2024 at 3:46 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to add the middle-end presentation for the
> saturation truncation.  Aka set the result of truncated value to
> the max value when overflow.  It will take the pattern similar
> as below.
>
> Form 1:
>   #define DEF_SAT_U_TRUC_FMT_1(WT, NT) \
>   NT __attribute__((noinline))         \
>   sat_u_truc_##T##_fmt_1 (WT x)        \
>   {                                    \
>     bool overflow = x > (WT)(NT)(-1);  \
>     return ((NT)x) | (NT)-overflow;    \
>   }
>
> For example, truncated uint16_t to uint8_t, we have
>
> * SAT_TRUNC (254)   => 254
> * SAT_TRUNC (255)   => 255
> * SAT_TRUNC (256)   => 255
> * SAT_TRUNC (65536) => 255
>
> Given below SAT_TRUNC from uint64_t to uint32_t.
>
> DEF_SAT_U_TRUC_FMT_1 (uint64_t, uint32_t)
>
> Before this patch:
> __attribute__((noinline))
> uint32_t sat_u_truc_T_fmt_1 (uint64_t x)
> {
>   _Bool overflow;
>   unsigned int _1;
>   unsigned int _2;
>   unsigned int _3;
>   uint32_t _6;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   overflow_5 = x_4(D) > 4294967295;
>   _1 = (unsigned int) x_4(D);
>   _2 = (unsigned int) overflow_5;
>   _3 = -_2;
>   _6 = _1 | _3;
>   return _6;
> ;;    succ:       EXIT
>
> }
>
> After this patch:
> __attribute__((noinline))
> uint32_t sat_u_truc_T_fmt_1 (uint64_t x)
> {
>   uint32_t _6;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _6 = .SAT_TRUNC (x_4(D)); [tail call]
>   return _6;
> ;;    succ:       EXIT
>
> }
>
> The below tests are passed for this patch:
> *. The rv64gcv fully regression tests.
> *. The rv64gcv build with glibc.
> *. The x86 bootstrap tests.
> *. The x86 fully regression tests.
>
> gcc/ChangeLog:
>
>         * internal-fn.def (SAT_TRUNC): Add new signed IFN sat_trunc as
>         unary_convert.
>         * match.pd: Add new matching pattern for unsigned int sat_trunc.
>         * optabs.def (OPTAB_CL): Add unsigned and signed optab.
>         * tree-ssa-math-opts.cc (gimple_unsigend_integer_sat_trunc): Add
>         new decl for the matching pattern generated func.
>         (match_unsigned_saturation_trunc): Add new func impl to match
>         the .SAT_TRUNC.
>         (math_opts_dom_walker::after_dom_children): Add .SAT_TRUNC match
>         function under BIT_IOR_EXPR case.
>         * tree.cc (integer_half_truncated_all_ones_p): Add new func impl
>         to filter the truncated threshold.
>         * tree.h (integer_half_truncated_all_ones_p): Add new func decl.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/internal-fn.def       |  2 ++
>  gcc/match.pd              | 12 +++++++++++-
>  gcc/optabs.def            |  3 +++
>  gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++++++++++
>  gcc/tree.cc               | 22 ++++++++++++++++++++++
>  gcc/tree.h                |  6 ++++++
>  6 files changed, 76 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index a8c83437ada..915d329c05a 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -278,6 +278,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
>  DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
>  DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary)
>
> +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_TRUNC, ECF_CONST, first, sstrunc, ustrunc, unary_convert)
> +
>  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
>  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
>  DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 3d0689c9312..d4062434cc7 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -39,7 +39,8 @@ along with GCC; see the file COPYING3.  If not see
>     HONOR_NANS
>     uniform_vector_p
>     expand_vec_cmp_expr_p
> -   bitmask_inv_cst_vector_p)
> +   bitmask_inv_cst_vector_p
> +   integer_half_truncated_all_ones_p)
>
>  /* Operator lists.  */
>  (define_operator_list tcc_comparison
> @@ -3210,6 +3211,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* Unsigned saturation truncate, case 1 (), sizeof (WT) > sizeof (NT).
> +   SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))).  */
> +(match (unsigend_integer_sat_trunc @0)

unsigned

> + (bit_ior:c (negate (convert (gt @0 integer_half_truncated_all_ones_p)))
> +   (convert @0))
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0))))))

This type size relation doesn't match
integer_half_truncated_all_ones_p, that works
based on TYPE_PRECISION.  Don't you maybe want to scrap
integer_half_truncated_all_ones_p
as too restrictive and instead verify that TYPE_PRECISION (type) is
less than the
precision of @0 and that the INTEGER_CST compared against matches
'type's precision mask?

> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index bc2611abdc2..4eaffe96c19 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -63,6 +63,9 @@ OPTAB_CX(fractuns_optab, "fractuns$Q$b$I$a2")
>  OPTAB_CL(satfract_optab, "satfract$b$Q$a2", SAT_FRACT, "satfract", gen_satfract_conv_libfunc)
>  OPTAB_CL(satfractuns_optab, "satfractuns$I$b$Q$a2", UNSIGNED_SAT_FRACT, "satfractuns", gen_satfractuns_conv_libfunc)
>
> +OPTAB_CL(ustrunc_optab, "ustrunc$b$a2", US_TRUNCATE, "ustrunc", gen_satfract_conv_libfunc)
> +OPTAB_CL(sstrunc_optab, "sstrunc$b$a2", SS_TRUNCATE, "sstrunc", gen_satfract_conv_libfunc)

Those libfuncs do not exist so use NULL for them.

> +
>  OPTAB_CD(sfixtrunc_optab, "fix_trunc$F$b$I$a2")
>  OPTAB_CD(ufixtrunc_optab, "fixuns_trunc$F$b$I$a2")
>
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 57085488722..64bc70c29b3 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4088,6 +4088,7 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
>
>  extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
>  extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
> +extern bool gimple_unsigend_integer_sat_trunc (tree, tree*, tree (*)(tree));
>
>  static void
>  build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
> @@ -4216,6 +4217,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
>                                         ops[0], ops[1]);
>  }
>
> +/*
> + * Try to match saturation unsigned sub.
> + * uint16_t x_4(D);
> + * uint8_t _6;
> + * overflow_5 = x_4(D) > 255;
> + * _1 = (unsigned char) x_4(D);
> + * _2 = (unsigned char) overflow_5;
> + * _3 = -_2;
> + * _6 = _1 | _3;
> + * =>
> + * _6 = .SAT_TRUNC (x_4(D));
> + * */
> +static void
> +match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt)
> +{
> +  tree ops[1];
> +  tree lhs = gimple_assign_lhs (stmt);
> +  tree type = TREE_TYPE (lhs);
> +
> +  if (gimple_unsigend_integer_sat_trunc (lhs, ops, NULL)
> +    && direct_internal_fn_supported_p (IFN_SAT_TRUNC,
> +                                      tree_pair (type, TREE_TYPE (ops[0])),
> +                                      OPTIMIZE_FOR_BOTH))
> +    {
> +      gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
> +      gimple_call_set_lhs (call, lhs);
> +      gsi_replace (gsi, call, /* update_eh_info */ true);
> +    }
> +}
> +
>  /* Recognize for unsigned x
>     x = y - z;
>     if (x > y)
> @@ -6188,6 +6219,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>
>             case BIT_IOR_EXPR:
>               match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt));
> +             match_unsigned_saturation_trunc (&gsi, as_a<gassign *> (stmt));
>               /* fall-through  */
>             case BIT_XOR_EXPR:
>               match_uaddc_usubc (&gsi, stmt, code);
> diff --git a/gcc/tree.cc b/gcc/tree.cc
> index 2d2d5b6db6e..4572e6fc42b 100644
> --- a/gcc/tree.cc
> +++ b/gcc/tree.cc
> @@ -2944,6 +2944,28 @@ integer_all_onesp (const_tree expr)
>           == wi::to_wide (expr));
>  }
>
> +/* Return true if EXPR is an integer constant of all ones with half
> +   truncated in precision.  Or return false.  For example:
> +   uint16_t a = 255;   // true.
> +   uint16_t b = 0;     // false.
> +   uint16_t c = 65545; // false.  */
> +bool
> +integer_half_truncated_all_ones_p (const_tree expr)
> +{
> +  if (TREE_CODE (expr) != INTEGER_CST)
> +    return false;
> +
> +  unsigned precision = TYPE_PRECISION (TREE_TYPE (expr));
> +
> +  gcc_assert (precision <= 64);
> +
> +  unsigned trunc_prec = precision / 2;
> +  wide_int trunc_max = wi::uhwi ((uint64_t)-1 >> (64 - trunc_prec), precision);

There is wi::mask which doesn't suffer from being limited to max 64bits.

> +  wide_int expr_int = wi::to_wide (expr, precision);
> +
> +  return trunc_max == expr_int;
> +}
> +
>  /* Return true if EXPR is the integer constant minus one, or a location
>     wrapper for such a constant.  */
>
> diff --git a/gcc/tree.h b/gcc/tree.h
> index 28e8e71b036..0237826dd23 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -5205,6 +5205,12 @@ extern bool integer_each_onep (const_tree);
>
>  extern bool integer_all_onesp (const_tree);
>
> +/* integer_half_truncated_all_ones_p (tree x) will return true if x is
> +   the integer constant that the half truncated bits are all 1.
> +   For example, uint16_t type with 255 constant integer will be true.  */
> +
> +extern bool integer_half_truncated_all_ones_p (const_tree expr);
> +
>  /* integer_minus_onep (tree x) is nonzero if X is an integer constant of
>     value -1.  */
>
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH v1] Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar int
  2024-06-26 13:51 ` Richard Biener
@ 2024-06-26 14:18   ` Li, Pan2
  0 siblings, 0 replies; 6+ messages in thread
From: Li, Pan2 @ 2024-06-26 14:18 UTC (permalink / raw)
  To: Richard Biener
  Cc: gcc-patches, juzhe.zhong, kito.cheng, jeffreyalaw, rdapp.gcc

Thanks Richard, will address the comments in v2.

Pan

-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Wednesday, June 26, 2024 9:52 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar int

On Wed, Jun 26, 2024 at 3:46 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to add the middle-end presentation for the
> saturation truncation.  Aka set the result of truncated value to
> the max value when overflow.  It will take the pattern similar
> as below.
>
> Form 1:
>   #define DEF_SAT_U_TRUC_FMT_1(WT, NT) \
>   NT __attribute__((noinline))         \
>   sat_u_truc_##T##_fmt_1 (WT x)        \
>   {                                    \
>     bool overflow = x > (WT)(NT)(-1);  \
>     return ((NT)x) | (NT)-overflow;    \
>   }
>
> For example, truncated uint16_t to uint8_t, we have
>
> * SAT_TRUNC (254)   => 254
> * SAT_TRUNC (255)   => 255
> * SAT_TRUNC (256)   => 255
> * SAT_TRUNC (65536) => 255
>
> Given below SAT_TRUNC from uint64_t to uint32_t.
>
> DEF_SAT_U_TRUC_FMT_1 (uint64_t, uint32_t)
>
> Before this patch:
> __attribute__((noinline))
> uint32_t sat_u_truc_T_fmt_1 (uint64_t x)
> {
>   _Bool overflow;
>   unsigned int _1;
>   unsigned int _2;
>   unsigned int _3;
>   uint32_t _6;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   overflow_5 = x_4(D) > 4294967295;
>   _1 = (unsigned int) x_4(D);
>   _2 = (unsigned int) overflow_5;
>   _3 = -_2;
>   _6 = _1 | _3;
>   return _6;
> ;;    succ:       EXIT
>
> }
>
> After this patch:
> __attribute__((noinline))
> uint32_t sat_u_truc_T_fmt_1 (uint64_t x)
> {
>   uint32_t _6;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _6 = .SAT_TRUNC (x_4(D)); [tail call]
>   return _6;
> ;;    succ:       EXIT
>
> }
>
> The below tests are passed for this patch:
> *. The rv64gcv fully regression tests.
> *. The rv64gcv build with glibc.
> *. The x86 bootstrap tests.
> *. The x86 fully regression tests.
>
> gcc/ChangeLog:
>
>         * internal-fn.def (SAT_TRUNC): Add new signed IFN sat_trunc as
>         unary_convert.
>         * match.pd: Add new matching pattern for unsigned int sat_trunc.
>         * optabs.def (OPTAB_CL): Add unsigned and signed optab.
>         * tree-ssa-math-opts.cc (gimple_unsigend_integer_sat_trunc): Add
>         new decl for the matching pattern generated func.
>         (match_unsigned_saturation_trunc): Add new func impl to match
>         the .SAT_TRUNC.
>         (math_opts_dom_walker::after_dom_children): Add .SAT_TRUNC match
>         function under BIT_IOR_EXPR case.
>         * tree.cc (integer_half_truncated_all_ones_p): Add new func impl
>         to filter the truncated threshold.
>         * tree.h (integer_half_truncated_all_ones_p): Add new func decl.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/internal-fn.def       |  2 ++
>  gcc/match.pd              | 12 +++++++++++-
>  gcc/optabs.def            |  3 +++
>  gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++++++++++
>  gcc/tree.cc               | 22 ++++++++++++++++++++++
>  gcc/tree.h                |  6 ++++++
>  6 files changed, 76 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index a8c83437ada..915d329c05a 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -278,6 +278,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
>  DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
>  DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary)
>
> +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_TRUNC, ECF_CONST, first, sstrunc, ustrunc, unary_convert)
> +
>  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
>  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
>  DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 3d0689c9312..d4062434cc7 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -39,7 +39,8 @@ along with GCC; see the file COPYING3.  If not see
>     HONOR_NANS
>     uniform_vector_p
>     expand_vec_cmp_expr_p
> -   bitmask_inv_cst_vector_p)
> +   bitmask_inv_cst_vector_p
> +   integer_half_truncated_all_ones_p)
>
>  /* Operator lists.  */
>  (define_operator_list tcc_comparison
> @@ -3210,6 +3211,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* Unsigned saturation truncate, case 1 (), sizeof (WT) > sizeof (NT).
> +   SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))).  */
> +(match (unsigend_integer_sat_trunc @0)

unsigned

> + (bit_ior:c (negate (convert (gt @0 integer_half_truncated_all_ones_p)))
> +   (convert @0))
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0))))))

This type size relation doesn't match
integer_half_truncated_all_ones_p, that works
based on TYPE_PRECISION.  Don't you maybe want to scrap
integer_half_truncated_all_ones_p
as too restrictive and instead verify that TYPE_PRECISION (type) is
less than the
precision of @0 and that the INTEGER_CST compared against matches
'type's precision mask?

> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index bc2611abdc2..4eaffe96c19 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -63,6 +63,9 @@ OPTAB_CX(fractuns_optab, "fractuns$Q$b$I$a2")
>  OPTAB_CL(satfract_optab, "satfract$b$Q$a2", SAT_FRACT, "satfract", gen_satfract_conv_libfunc)
>  OPTAB_CL(satfractuns_optab, "satfractuns$I$b$Q$a2", UNSIGNED_SAT_FRACT, "satfractuns", gen_satfractuns_conv_libfunc)
>
> +OPTAB_CL(ustrunc_optab, "ustrunc$b$a2", US_TRUNCATE, "ustrunc", gen_satfract_conv_libfunc)
> +OPTAB_CL(sstrunc_optab, "sstrunc$b$a2", SS_TRUNCATE, "sstrunc", gen_satfract_conv_libfunc)

Those libfuncs do not exist so use NULL for them.

> +
>  OPTAB_CD(sfixtrunc_optab, "fix_trunc$F$b$I$a2")
>  OPTAB_CD(ufixtrunc_optab, "fixuns_trunc$F$b$I$a2")
>
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 57085488722..64bc70c29b3 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4088,6 +4088,7 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
>
>  extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
>  extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
> +extern bool gimple_unsigend_integer_sat_trunc (tree, tree*, tree (*)(tree));
>
>  static void
>  build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
> @@ -4216,6 +4217,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
>                                         ops[0], ops[1]);
>  }
>
> +/*
> + * Try to match saturation unsigned sub.
> + * uint16_t x_4(D);
> + * uint8_t _6;
> + * overflow_5 = x_4(D) > 255;
> + * _1 = (unsigned char) x_4(D);
> + * _2 = (unsigned char) overflow_5;
> + * _3 = -_2;
> + * _6 = _1 | _3;
> + * =>
> + * _6 = .SAT_TRUNC (x_4(D));
> + * */
> +static void
> +match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt)
> +{
> +  tree ops[1];
> +  tree lhs = gimple_assign_lhs (stmt);
> +  tree type = TREE_TYPE (lhs);
> +
> +  if (gimple_unsigend_integer_sat_trunc (lhs, ops, NULL)
> +    && direct_internal_fn_supported_p (IFN_SAT_TRUNC,
> +                                      tree_pair (type, TREE_TYPE (ops[0])),
> +                                      OPTIMIZE_FOR_BOTH))
> +    {
> +      gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
> +      gimple_call_set_lhs (call, lhs);
> +      gsi_replace (gsi, call, /* update_eh_info */ true);
> +    }
> +}
> +
>  /* Recognize for unsigned x
>     x = y - z;
>     if (x > y)
> @@ -6188,6 +6219,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>
>             case BIT_IOR_EXPR:
>               match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt));
> +             match_unsigned_saturation_trunc (&gsi, as_a<gassign *> (stmt));
>               /* fall-through  */
>             case BIT_XOR_EXPR:
>               match_uaddc_usubc (&gsi, stmt, code);
> diff --git a/gcc/tree.cc b/gcc/tree.cc
> index 2d2d5b6db6e..4572e6fc42b 100644
> --- a/gcc/tree.cc
> +++ b/gcc/tree.cc
> @@ -2944,6 +2944,28 @@ integer_all_onesp (const_tree expr)
>           == wi::to_wide (expr));
>  }
>
> +/* Return true if EXPR is an integer constant of all ones with half
> +   truncated in precision.  Or return false.  For example:
> +   uint16_t a = 255;   // true.
> +   uint16_t b = 0;     // false.
> +   uint16_t c = 65545; // false.  */
> +bool
> +integer_half_truncated_all_ones_p (const_tree expr)
> +{
> +  if (TREE_CODE (expr) != INTEGER_CST)
> +    return false;
> +
> +  unsigned precision = TYPE_PRECISION (TREE_TYPE (expr));
> +
> +  gcc_assert (precision <= 64);
> +
> +  unsigned trunc_prec = precision / 2;
> +  wide_int trunc_max = wi::uhwi ((uint64_t)-1 >> (64 - trunc_prec), precision);

There is wi::mask which doesn't suffer from being limited to max 64bits.

> +  wide_int expr_int = wi::to_wide (expr, precision);
> +
> +  return trunc_max == expr_int;
> +}
> +
>  /* Return true if EXPR is the integer constant minus one, or a location
>     wrapper for such a constant.  */
>
> diff --git a/gcc/tree.h b/gcc/tree.h
> index 28e8e71b036..0237826dd23 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -5205,6 +5205,12 @@ extern bool integer_each_onep (const_tree);
>
>  extern bool integer_all_onesp (const_tree);
>
> +/* integer_half_truncated_all_ones_p (tree x) will return true if x is
> +   the integer constant that the half truncated bits are all 1.
> +   For example, uint16_t type with 255 constant integer will be true.  */
> +
> +extern bool integer_half_truncated_all_ones_p (const_tree expr);
> +
>  /* integer_minus_onep (tree x) is nonzero if X is an integer constant of
>     value -1.  */
>
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2] Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar int
  2024-06-26  1:45 [PATCH v1] Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar int pan2.li
  2024-06-26 13:51 ` Richard Biener
@ 2024-06-27  5:12 ` pan2.li
  2024-06-27  6:07   ` Richard Biener
  1 sibling, 1 reply; 6+ messages in thread
From: pan2.li @ 2024-06-27  5:12 UTC (permalink / raw)
  To: gcc-patches
  Cc: juzhe.zhong, kito.cheng, richard.guenther, jeffreyalaw,
	rdapp.gcc, Pan Li

From: Pan Li <pan2.li@intel.com>

This patch would like to add the middle-end presentation for the
saturation truncation.  Aka set the result of truncated value to
the max value when overflow.  It will take the pattern similar
as below.

Form 1:
  #define DEF_SAT_U_TRUC_FMT_1(WT, NT) \
  NT __attribute__((noinline))         \
  sat_u_truc_##T##_fmt_1 (WT x)        \
  {                                    \
    bool overflow = x > (WT)(NT)(-1);  \
    return ((NT)x) | (NT)-overflow;    \
  }

For example, truncated uint16_t to uint8_t, we have

* SAT_TRUNC (254)   => 254
* SAT_TRUNC (255)   => 255
* SAT_TRUNC (256)   => 255
* SAT_TRUNC (65536) => 255

Given below SAT_TRUNC from uint64_t to uint32_t.

DEF_SAT_U_TRUC_FMT_1 (uint64_t, uint32_t)

Before this patch:
__attribute__((noinline))
uint32_t sat_u_truc_T_fmt_1 (uint64_t x)
{
  _Bool overflow;
  unsigned int _1;
  unsigned int _2;
  unsigned int _3;
  uint32_t _6;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  overflow_5 = x_4(D) > 4294967295;
  _1 = (unsigned int) x_4(D);
  _2 = (unsigned int) overflow_5;
  _3 = -_2;
  _6 = _1 | _3;
  return _6;
;;    succ:       EXIT

}

After this patch:
__attribute__((noinline))
uint32_t sat_u_truc_T_fmt_1 (uint64_t x)
{
  uint32_t _6;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _6 = .SAT_TRUNC (x_4(D)); [tail call]
  return _6;
;;    succ:       EXIT

}

The below tests are passed for this patch:
*. The rv64gcv fully regression tests.
*. The rv64gcv build with glibc.
*. The x86 bootstrap tests.
*. The x86 fully regression tests.

gcc/ChangeLog:

	* internal-fn.def (SAT_TRUNC): Add new signed IFN sat_trunc as
	unary_convert.
	* match.pd: Add new matching pattern for unsigned int sat_trunc.
	* optabs.def (OPTAB_CL): Add unsigned and signed optab.
	* tree-ssa-math-opts.cc (gimple_unsigend_integer_sat_trunc): Add
	new decl for the matching pattern generated func.
	(match_unsigned_saturation_trunc): Add new func impl to match
	the .SAT_TRUNC.
	(math_opts_dom_walker::after_dom_children): Add .SAT_TRUNC match
	function under BIT_IOR_EXPR case.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/internal-fn.def       |  2 ++
 gcc/match.pd              | 16 ++++++++++++++++
 gcc/optabs.def            |  3 +++
 gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 53 insertions(+)

diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index a8c83437ada..915d329c05a 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -278,6 +278,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
 DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
 DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_TRUNC, ECF_CONST, first, sstrunc, ustrunc, unary_convert)
+
 DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
 DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
 DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
diff --git a/gcc/match.pd b/gcc/match.pd
index 3d0689c9312..06120a1c62c 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3210,6 +3210,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* Unsigned saturation truncate, case 1 (), sizeof (WT) > sizeof (NT).
+   SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))).  */
+(match (unsigned_integer_sat_trunc @0)
+ (bit_ior:c (negate (convert (gt @0 INTEGER_CST@1)))
+   (convert @0))
+ (with {
+   unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0));
+   unsigned otype_precision = TYPE_PRECISION (type);
+   wide_int trunc_max = wi::mask (itype_precision / 2, false, itype_precision);
+   wide_int int_cst = wi::to_wide (@1, itype_precision);
+  }
+  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+       && TYPE_UNSIGNED (TREE_TYPE (@0))
+       && otype_precision < itype_precision
+       && wi::eq_p (trunc_max, int_cst)))))
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
    x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index bc2611abdc2..c16580ce956 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -63,6 +63,9 @@ OPTAB_CX(fractuns_optab, "fractuns$Q$b$I$a2")
 OPTAB_CL(satfract_optab, "satfract$b$Q$a2", SAT_FRACT, "satfract", gen_satfract_conv_libfunc)
 OPTAB_CL(satfractuns_optab, "satfractuns$I$b$Q$a2", UNSIGNED_SAT_FRACT, "satfractuns", gen_satfractuns_conv_libfunc)
 
+OPTAB_CL(ustrunc_optab, "ustrunc$b$a2", US_TRUNCATE, "ustrunc", NULL)
+OPTAB_CL(sstrunc_optab, "sstrunc$b$a2", SS_TRUNCATE, "sstrunc", NULL)
+
 OPTAB_CD(sfixtrunc_optab, "fix_trunc$F$b$I$a2")
 OPTAB_CD(ufixtrunc_optab, "fixuns_trunc$F$b$I$a2")
 
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 57085488722..3783a874699 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4088,6 +4088,7 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
 
 extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
+extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
 
 static void
 build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
@@ -4216,6 +4217,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
 					ops[0], ops[1]);
 }
 
+/*
+ * Try to match saturation unsigned sub.
+ * uint16_t x_4(D);
+ * uint8_t _6;
+ * overflow_5 = x_4(D) > 255;
+ * _1 = (unsigned char) x_4(D);
+ * _2 = (unsigned char) overflow_5;
+ * _3 = -_2;
+ * _6 = _1 | _3;
+ * =>
+ * _6 = .SAT_TRUNC (x_4(D));
+ * */
+static void
+match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt)
+{
+  tree ops[1];
+  tree lhs = gimple_assign_lhs (stmt);
+  tree type = TREE_TYPE (lhs);
+
+  if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
+    && direct_internal_fn_supported_p (IFN_SAT_TRUNC,
+				       tree_pair (type, TREE_TYPE (ops[0])),
+				       OPTIMIZE_FOR_BOTH))
+    {
+      gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
+      gimple_call_set_lhs (call, lhs);
+      gsi_replace (gsi, call, /* update_eh_info */ true);
+    }
+}
+
 /* Recognize for unsigned x
    x = y - z;
    if (x > y)
@@ -6188,6 +6219,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
 
 	    case BIT_IOR_EXPR:
 	      match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt));
+	      match_unsigned_saturation_trunc (&gsi, as_a<gassign *> (stmt));
 	      /* fall-through  */
 	    case BIT_XOR_EXPR:
 	      match_uaddc_usubc (&gsi, stmt, code);
-- 
2.34.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar int
  2024-06-27  5:12 ` [PATCH v2] " pan2.li
@ 2024-06-27  6:07   ` Richard Biener
  2024-06-27  6:14     ` Li, Pan2
  0 siblings, 1 reply; 6+ messages in thread
From: Richard Biener @ 2024-06-27  6:07 UTC (permalink / raw)
  To: pan2.li; +Cc: gcc-patches, juzhe.zhong, kito.cheng, jeffreyalaw, rdapp.gcc

On Thu, Jun 27, 2024 at 7:12 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to add the middle-end presentation for the
> saturation truncation.  Aka set the result of truncated value to
> the max value when overflow.  It will take the pattern similar
> as below.
>
> Form 1:
>   #define DEF_SAT_U_TRUC_FMT_1(WT, NT) \
>   NT __attribute__((noinline))         \
>   sat_u_truc_##T##_fmt_1 (WT x)        \
>   {                                    \
>     bool overflow = x > (WT)(NT)(-1);  \
>     return ((NT)x) | (NT)-overflow;    \
>   }
>
> For example, truncated uint16_t to uint8_t, we have
>
> * SAT_TRUNC (254)   => 254
> * SAT_TRUNC (255)   => 255
> * SAT_TRUNC (256)   => 255
> * SAT_TRUNC (65536) => 255
>
> Given below SAT_TRUNC from uint64_t to uint32_t.
>
> DEF_SAT_U_TRUC_FMT_1 (uint64_t, uint32_t)
>
> Before this patch:
> __attribute__((noinline))
> uint32_t sat_u_truc_T_fmt_1 (uint64_t x)
> {
>   _Bool overflow;
>   unsigned int _1;
>   unsigned int _2;
>   unsigned int _3;
>   uint32_t _6;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   overflow_5 = x_4(D) > 4294967295;
>   _1 = (unsigned int) x_4(D);
>   _2 = (unsigned int) overflow_5;
>   _3 = -_2;
>   _6 = _1 | _3;
>   return _6;
> ;;    succ:       EXIT
>
> }
>
> After this patch:
> __attribute__((noinline))
> uint32_t sat_u_truc_T_fmt_1 (uint64_t x)
> {
>   uint32_t _6;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _6 = .SAT_TRUNC (x_4(D)); [tail call]
>   return _6;
> ;;    succ:       EXIT
>
> }

OK.

Thanks,
Richard.

> The below tests are passed for this patch:
> *. The rv64gcv fully regression tests.
> *. The rv64gcv build with glibc.
> *. The x86 bootstrap tests.
> *. The x86 fully regression tests.
>
> gcc/ChangeLog:
>
>         * internal-fn.def (SAT_TRUNC): Add new signed IFN sat_trunc as
>         unary_convert.
>         * match.pd: Add new matching pattern for unsigned int sat_trunc.
>         * optabs.def (OPTAB_CL): Add unsigned and signed optab.
>         * tree-ssa-math-opts.cc (gimple_unsigend_integer_sat_trunc): Add
>         new decl for the matching pattern generated func.
>         (match_unsigned_saturation_trunc): Add new func impl to match
>         the .SAT_TRUNC.
>         (math_opts_dom_walker::after_dom_children): Add .SAT_TRUNC match
>         function under BIT_IOR_EXPR case.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/internal-fn.def       |  2 ++
>  gcc/match.pd              | 16 ++++++++++++++++
>  gcc/optabs.def            |  3 +++
>  gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++++++++++
>  4 files changed, 53 insertions(+)
>
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index a8c83437ada..915d329c05a 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -278,6 +278,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
>  DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
>  DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary)
>
> +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_TRUNC, ECF_CONST, first, sstrunc, ustrunc, unary_convert)
> +
>  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
>  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
>  DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 3d0689c9312..06120a1c62c 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3210,6 +3210,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* Unsigned saturation truncate, case 1 (), sizeof (WT) > sizeof (NT).
> +   SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))).  */
> +(match (unsigned_integer_sat_trunc @0)
> + (bit_ior:c (negate (convert (gt @0 INTEGER_CST@1)))
> +   (convert @0))
> + (with {
> +   unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0));
> +   unsigned otype_precision = TYPE_PRECISION (type);
> +   wide_int trunc_max = wi::mask (itype_precision / 2, false, itype_precision);
> +   wide_int int_cst = wi::to_wide (@1, itype_precision);
> +  }
> +  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> +       && TYPE_UNSIGNED (TREE_TYPE (@0))
> +       && otype_precision < itype_precision
> +       && wi::eq_p (trunc_max, int_cst)))))
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index bc2611abdc2..c16580ce956 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -63,6 +63,9 @@ OPTAB_CX(fractuns_optab, "fractuns$Q$b$I$a2")
>  OPTAB_CL(satfract_optab, "satfract$b$Q$a2", SAT_FRACT, "satfract", gen_satfract_conv_libfunc)
>  OPTAB_CL(satfractuns_optab, "satfractuns$I$b$Q$a2", UNSIGNED_SAT_FRACT, "satfractuns", gen_satfractuns_conv_libfunc)
>
> +OPTAB_CL(ustrunc_optab, "ustrunc$b$a2", US_TRUNCATE, "ustrunc", NULL)
> +OPTAB_CL(sstrunc_optab, "sstrunc$b$a2", SS_TRUNCATE, "sstrunc", NULL)
> +
>  OPTAB_CD(sfixtrunc_optab, "fix_trunc$F$b$I$a2")
>  OPTAB_CD(ufixtrunc_optab, "fixuns_trunc$F$b$I$a2")
>
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 57085488722..3783a874699 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4088,6 +4088,7 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
>
>  extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
>  extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
> +extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
>
>  static void
>  build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
> @@ -4216,6 +4217,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
>                                         ops[0], ops[1]);
>  }
>
> +/*
> + * Try to match saturation unsigned sub.
> + * uint16_t x_4(D);
> + * uint8_t _6;
> + * overflow_5 = x_4(D) > 255;
> + * _1 = (unsigned char) x_4(D);
> + * _2 = (unsigned char) overflow_5;
> + * _3 = -_2;
> + * _6 = _1 | _3;
> + * =>
> + * _6 = .SAT_TRUNC (x_4(D));
> + * */
> +static void
> +match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt)
> +{
> +  tree ops[1];
> +  tree lhs = gimple_assign_lhs (stmt);
> +  tree type = TREE_TYPE (lhs);
> +
> +  if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
> +    && direct_internal_fn_supported_p (IFN_SAT_TRUNC,
> +                                      tree_pair (type, TREE_TYPE (ops[0])),
> +                                      OPTIMIZE_FOR_BOTH))
> +    {
> +      gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
> +      gimple_call_set_lhs (call, lhs);
> +      gsi_replace (gsi, call, /* update_eh_info */ true);
> +    }
> +}
> +
>  /* Recognize for unsigned x
>     x = y - z;
>     if (x > y)
> @@ -6188,6 +6219,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>
>             case BIT_IOR_EXPR:
>               match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt));
> +             match_unsigned_saturation_trunc (&gsi, as_a<gassign *> (stmt));
>               /* fall-through  */
>             case BIT_XOR_EXPR:
>               match_uaddc_usubc (&gsi, stmt, code);
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH v2] Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar int
  2024-06-27  6:07   ` Richard Biener
@ 2024-06-27  6:14     ` Li, Pan2
  0 siblings, 0 replies; 6+ messages in thread
From: Li, Pan2 @ 2024-06-27  6:14 UTC (permalink / raw)
  To: Richard Biener
  Cc: gcc-patches, juzhe.zhong, kito.cheng, jeffreyalaw, rdapp.gcc

> OK.

Committed, thanks Richard.

Pan

-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Thursday, June 27, 2024 2:08 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v2] Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar int

On Thu, Jun 27, 2024 at 7:12 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to add the middle-end presentation for the
> saturation truncation.  Aka set the result of truncated value to
> the max value when overflow.  It will take the pattern similar
> as below.
>
> Form 1:
>   #define DEF_SAT_U_TRUC_FMT_1(WT, NT) \
>   NT __attribute__((noinline))         \
>   sat_u_truc_##T##_fmt_1 (WT x)        \
>   {                                    \
>     bool overflow = x > (WT)(NT)(-1);  \
>     return ((NT)x) | (NT)-overflow;    \
>   }
>
> For example, truncated uint16_t to uint8_t, we have
>
> * SAT_TRUNC (254)   => 254
> * SAT_TRUNC (255)   => 255
> * SAT_TRUNC (256)   => 255
> * SAT_TRUNC (65536) => 255
>
> Given below SAT_TRUNC from uint64_t to uint32_t.
>
> DEF_SAT_U_TRUC_FMT_1 (uint64_t, uint32_t)
>
> Before this patch:
> __attribute__((noinline))
> uint32_t sat_u_truc_T_fmt_1 (uint64_t x)
> {
>   _Bool overflow;
>   unsigned int _1;
>   unsigned int _2;
>   unsigned int _3;
>   uint32_t _6;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   overflow_5 = x_4(D) > 4294967295;
>   _1 = (unsigned int) x_4(D);
>   _2 = (unsigned int) overflow_5;
>   _3 = -_2;
>   _6 = _1 | _3;
>   return _6;
> ;;    succ:       EXIT
>
> }
>
> After this patch:
> __attribute__((noinline))
> uint32_t sat_u_truc_T_fmt_1 (uint64_t x)
> {
>   uint32_t _6;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _6 = .SAT_TRUNC (x_4(D)); [tail call]
>   return _6;
> ;;    succ:       EXIT
>
> }

OK.

Thanks,
Richard.

> The below tests are passed for this patch:
> *. The rv64gcv fully regression tests.
> *. The rv64gcv build with glibc.
> *. The x86 bootstrap tests.
> *. The x86 fully regression tests.
>
> gcc/ChangeLog:
>
>         * internal-fn.def (SAT_TRUNC): Add new signed IFN sat_trunc as
>         unary_convert.
>         * match.pd: Add new matching pattern for unsigned int sat_trunc.
>         * optabs.def (OPTAB_CL): Add unsigned and signed optab.
>         * tree-ssa-math-opts.cc (gimple_unsigend_integer_sat_trunc): Add
>         new decl for the matching pattern generated func.
>         (match_unsigned_saturation_trunc): Add new func impl to match
>         the .SAT_TRUNC.
>         (math_opts_dom_walker::after_dom_children): Add .SAT_TRUNC match
>         function under BIT_IOR_EXPR case.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/internal-fn.def       |  2 ++
>  gcc/match.pd              | 16 ++++++++++++++++
>  gcc/optabs.def            |  3 +++
>  gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++++++++++
>  4 files changed, 53 insertions(+)
>
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index a8c83437ada..915d329c05a 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -278,6 +278,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
>  DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
>  DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary)
>
> +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_TRUNC, ECF_CONST, first, sstrunc, ustrunc, unary_convert)
> +
>  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
>  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
>  DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 3d0689c9312..06120a1c62c 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3210,6 +3210,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* Unsigned saturation truncate, case 1 (), sizeof (WT) > sizeof (NT).
> +   SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))).  */
> +(match (unsigned_integer_sat_trunc @0)
> + (bit_ior:c (negate (convert (gt @0 INTEGER_CST@1)))
> +   (convert @0))
> + (with {
> +   unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0));
> +   unsigned otype_precision = TYPE_PRECISION (type);
> +   wide_int trunc_max = wi::mask (itype_precision / 2, false, itype_precision);
> +   wide_int int_cst = wi::to_wide (@1, itype_precision);
> +  }
> +  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> +       && TYPE_UNSIGNED (TREE_TYPE (@0))
> +       && otype_precision < itype_precision
> +       && wi::eq_p (trunc_max, int_cst)))))
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index bc2611abdc2..c16580ce956 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -63,6 +63,9 @@ OPTAB_CX(fractuns_optab, "fractuns$Q$b$I$a2")
>  OPTAB_CL(satfract_optab, "satfract$b$Q$a2", SAT_FRACT, "satfract", gen_satfract_conv_libfunc)
>  OPTAB_CL(satfractuns_optab, "satfractuns$I$b$Q$a2", UNSIGNED_SAT_FRACT, "satfractuns", gen_satfractuns_conv_libfunc)
>
> +OPTAB_CL(ustrunc_optab, "ustrunc$b$a2", US_TRUNCATE, "ustrunc", NULL)
> +OPTAB_CL(sstrunc_optab, "sstrunc$b$a2", SS_TRUNCATE, "sstrunc", NULL)
> +
>  OPTAB_CD(sfixtrunc_optab, "fix_trunc$F$b$I$a2")
>  OPTAB_CD(ufixtrunc_optab, "fixuns_trunc$F$b$I$a2")
>
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 57085488722..3783a874699 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4088,6 +4088,7 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
>
>  extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
>  extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
> +extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
>
>  static void
>  build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
> @@ -4216,6 +4217,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
>                                         ops[0], ops[1]);
>  }
>
> +/*
> + * Try to match saturation unsigned sub.
> + * uint16_t x_4(D);
> + * uint8_t _6;
> + * overflow_5 = x_4(D) > 255;
> + * _1 = (unsigned char) x_4(D);
> + * _2 = (unsigned char) overflow_5;
> + * _3 = -_2;
> + * _6 = _1 | _3;
> + * =>
> + * _6 = .SAT_TRUNC (x_4(D));
> + * */
> +static void
> +match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt)
> +{
> +  tree ops[1];
> +  tree lhs = gimple_assign_lhs (stmt);
> +  tree type = TREE_TYPE (lhs);
> +
> +  if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
> +    && direct_internal_fn_supported_p (IFN_SAT_TRUNC,
> +                                      tree_pair (type, TREE_TYPE (ops[0])),
> +                                      OPTIMIZE_FOR_BOTH))
> +    {
> +      gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
> +      gimple_call_set_lhs (call, lhs);
> +      gsi_replace (gsi, call, /* update_eh_info */ true);
> +    }
> +}
> +
>  /* Recognize for unsigned x
>     x = y - z;
>     if (x > y)
> @@ -6188,6 +6219,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>
>             case BIT_IOR_EXPR:
>               match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt));
> +             match_unsigned_saturation_trunc (&gsi, as_a<gassign *> (stmt));
>               /* fall-through  */
>             case BIT_XOR_EXPR:
>               match_uaddc_usubc (&gsi, stmt, code);
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-06-27  6:14 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-26  1:45 [PATCH v1] Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar int pan2.li
2024-06-26 13:51 ` Richard Biener
2024-06-26 14:18   ` Li, Pan2
2024-06-27  5:12 ` [PATCH v2] " pan2.li
2024-06-27  6:07   ` Richard Biener
2024-06-27  6:14     ` Li, Pan2

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).