* [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
@ 2024-06-12 12:37 pan2.li
2024-06-14 8:05 ` Richard Biener
0 siblings, 1 reply; 9+ messages in thread
From: pan2.li @ 2024-06-12 12:37 UTC (permalink / raw)
To: gcc-patches
Cc: juzhe.zhong, kito.cheng, richard.guenther, jeffreyalaw,
rdapp.gcc, Pan Li
From: Pan Li <pan2.li@intel.com>
After we support the scalar unsigned form 1 and 2, we would like
to introduce more forms include the branch and branchless. There
are forms 3-10 list as below:
Form 3:
#define SAT_SUB_U_3(T) \
T sat_sub_u_3_##T (T x, T y) \
{ \
return x > y ? x - y : 0; \
}
Form 4:
#define SAT_SUB_U_4(T) \
T sat_sub_u_4_##T (T x, T y) \
{ \
return x >= y ? x - y : 0; \
}
Form 5:
#define SAT_SUB_U_5(T) \
T sat_sub_u_5_##T (T x, T y) \
{ \
return x < y ? 0 : x - y; \
}
Form 6:
#define SAT_SUB_U_6(T) \
T sat_sub_u_6_##T (T x, T y) \
{ \
return x <= y ? 0 : x - y; \
}
Form 7:
#define SAT_SUB_U_7(T) \
T sat_sub_u_7_##T (T x, T y) \
{ \
T ret; \
T overflow = __builtin_sub_overflow (x, y, &ret); \
return ret & (T)(overflow - 1); \
}
Form 8:
#define SAT_SUB_U_8(T) \
T sat_sub_u_8_##T (T x, T y) \
{ \
T ret; \
T overflow = __builtin_sub_overflow (x, y, &ret); \
return ret & (T)-(!overflow); \
}
Form 9:
#define SAT_SUB_U_9(T) \
T sat_sub_u_9_##T (T x, T y) \
{ \
T ret; \
T overflow = __builtin_sub_overflow (x, y, &ret); \
return overflow ? 0 : ret; \
}
Form 10:
#define SAT_SUB_U_10(T) \
T sat_sub_u_10_##T (T x, T y) \
{ \
T ret; \
T overflow = __builtin_sub_overflow (x, y, &ret); \
return !overflow ? ret : 0; \
}
Take form 10 as example:
SAT_SUB_U_10(uint64_t);
Before this patch:
uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
{
unsigned char _1;
unsigned char _2;
uint8_t _3;
__complex__ unsigned char _6;
;; basic block 2, loop depth 0
;; pred: ENTRY
_6 = .SUB_OVERFLOW (x_4(D), y_5(D));
_2 = IMAGPART_EXPR <_6>;
if (_2 == 0)
goto <bb 3>; [50.00%]
else
goto <bb 4>; [50.00%]
;; succ: 3
;; 4
;; basic block 3, loop depth 0
;; pred: 2
_1 = REALPART_EXPR <_6>;
;; succ: 4
;; basic block 4, loop depth 0
;; pred: 2
;; 3
# _3 = PHI <0(2), _1(3)>
return _3;
;; succ: EXIT
}
After this patch:
uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
{
uint8_t _3;
;; basic block 2, loop depth 0
;; pred: ENTRY
_3 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
return _3;
;; succ: EXIT
}
The below test suites are passed for this patch:
1. The rv64gcv fully regression test with newlib.
2. The rv64gcv build with glibc.
3. The x86 bootstrap test.
4. The x86 fully regression test.
gcc/ChangeLog:
* match.pd: Add more match for unsigned sat_sub.
* tree-ssa-math-opts.cc (match_unsigned_saturation_sub): Add new
func impl to match phi node for .SAT_SUB.
(math_opts_dom_walker::after_dom_children): Try match .SAT_SUB
for the phi node, MULT_EXPR, BIT_XOR_EXPR and BIT_AND_EXPR.
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/match.pd | 25 +++++++++++++++++++++++--
gcc/tree-ssa-math-opts.cc | 33 +++++++++++++++++++++++++++++++++
2 files changed, 56 insertions(+), 2 deletions(-)
diff --git a/gcc/match.pd b/gcc/match.pd
index 5cfe81e80b3..66e411b3359 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3140,14 +3140,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
/* Unsigned saturation sub, case 1 (branch with gt):
SAT_U_SUB = X > Y ? X - Y : 0 */
(match (unsigned_integer_sat_sub @0 @1)
- (cond (gt @0 @1) (minus @0 @1) integer_zerop)
+ (cond^ (gt @0 @1) (minus @0 @1) integer_zerop)
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
/* Unsigned saturation sub, case 2 (branch with ge):
SAT_U_SUB = X >= Y ? X - Y : 0. */
(match (unsigned_integer_sat_sub @0 @1)
- (cond (ge @0 @1) (minus @0 @1) integer_zerop)
+ (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
@@ -3165,6 +3165,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
+/* Unsigned saturation sub, case 5 (branchless bit_and with .SUB_OVERFLOW. */
+(match (unsigned_integer_sat_sub @0 @1)
+ (bit_and:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
+ (plus:c (imagpart @2) integer_minus_onep))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+ && types_match (type, @0, @1))))
+
+/* Unsigned saturation sub, case 6 (branchless mult with .SUB_OVERFLOW. */
+(match (unsigned_integer_sat_sub @0 @1)
+ (mult:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
+ (bit_xor:c (imagpart @2) integer_onep))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+ && types_match (type, @0, @1))))
+
+/* Unsigned saturation sub, case 7 (branch with .SUB_OVERFLOW. */
+(match (unsigned_integer_sat_sub @0 @1)
+ (cond^ (eq (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop)
+ (realpart @2) integer_zerop)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+ && types_match (type, @0, @1))))
+
/* x > y && x != XXX_MIN --> x > y
x > y && x == XXX_MIN --> false . */
(for eqne (eq ne)
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index fbb8e0ea306..05aa157611b 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4186,6 +4186,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]);
}
+/*
+ * Try to match saturation unsigned sub.
+ * <bb 2> [local count: 1073741824]:
+ * if (x_2(D) > y_3(D))
+ * goto <bb 3>; [50.00%]
+ * else
+ * goto <bb 4>; [50.00%]
+ *
+ * <bb 3> [local count: 536870912]:
+ * _4 = x_2(D) - y_3(D);
+ *
+ * <bb 4> [local count: 1073741824]:
+ * # _1 = PHI <0(2), _4(3)>
+ * =>
+ * <bb 4> [local count: 1073741824]:
+ * _1 = .SAT_SUB (x_2(D), y_3(D)); */
+static void
+match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
+{
+ if (gimple_phi_num_args (phi) != 2)
+ return;
+
+ tree ops[2];
+ tree phi_result = gimple_phi_result (phi);
+
+ if (gimple_unsigned_integer_sat_sub (phi_result, ops, NULL))
+ build_saturation_binary_arith_call (gsi, phi, IFN_SAT_SUB, phi_result,
+ ops[0], ops[1]);
+}
+
/* Recognize for unsigned x
x = y - z;
if (x > y)
@@ -6104,6 +6134,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
{
gimple_stmt_iterator gsi = gsi_start_bb (bb);
match_unsigned_saturation_add (&gsi, psi.phi ());
+ match_unsigned_saturation_sub (&gsi, psi.phi ());
}
for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
@@ -6129,6 +6160,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
continue;
}
match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
+ match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
break;
case PLUS_EXPR:
@@ -6167,6 +6199,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
break;
case COND_EXPR:
+ case BIT_AND_EXPR:
match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
break;
--
2.34.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
2024-06-12 12:37 [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB pan2.li
@ 2024-06-14 8:05 ` Richard Biener
2024-06-14 8:14 ` Li, Pan2
2024-06-19 7:36 ` Li, Pan2
0 siblings, 2 replies; 9+ messages in thread
From: Richard Biener @ 2024-06-14 8:05 UTC (permalink / raw)
To: pan2.li; +Cc: gcc-patches, juzhe.zhong, kito.cheng, jeffreyalaw, rdapp.gcc
On Wed, Jun 12, 2024 at 2:38 PM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> After we support the scalar unsigned form 1 and 2, we would like
> to introduce more forms include the branch and branchless. There
> are forms 3-10 list as below:
>
> Form 3:
> #define SAT_SUB_U_3(T) \
> T sat_sub_u_3_##T (T x, T y) \
> { \
> return x > y ? x - y : 0; \
> }
>
> Form 4:
> #define SAT_SUB_U_4(T) \
> T sat_sub_u_4_##T (T x, T y) \
> { \
> return x >= y ? x - y : 0; \
> }
>
> Form 5:
> #define SAT_SUB_U_5(T) \
> T sat_sub_u_5_##T (T x, T y) \
> { \
> return x < y ? 0 : x - y; \
> }
>
> Form 6:
> #define SAT_SUB_U_6(T) \
> T sat_sub_u_6_##T (T x, T y) \
> { \
> return x <= y ? 0 : x - y; \
> }
>
> Form 7:
> #define SAT_SUB_U_7(T) \
> T sat_sub_u_7_##T (T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_sub_overflow (x, y, &ret); \
> return ret & (T)(overflow - 1); \
> }
>
> Form 8:
> #define SAT_SUB_U_8(T) \
> T sat_sub_u_8_##T (T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_sub_overflow (x, y, &ret); \
> return ret & (T)-(!overflow); \
> }
>
> Form 9:
> #define SAT_SUB_U_9(T) \
> T sat_sub_u_9_##T (T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_sub_overflow (x, y, &ret); \
> return overflow ? 0 : ret; \
> }
>
> Form 10:
> #define SAT_SUB_U_10(T) \
> T sat_sub_u_10_##T (T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_sub_overflow (x, y, &ret); \
> return !overflow ? ret : 0; \
> }
>
> Take form 10 as example:
>
> SAT_SUB_U_10(uint64_t);
>
> Before this patch:
> uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
> {
> unsigned char _1;
> unsigned char _2;
> uint8_t _3;
> __complex__ unsigned char _6;
>
> ;; basic block 2, loop depth 0
> ;; pred: ENTRY
> _6 = .SUB_OVERFLOW (x_4(D), y_5(D));
> _2 = IMAGPART_EXPR <_6>;
> if (_2 == 0)
> goto <bb 3>; [50.00%]
> else
> goto <bb 4>; [50.00%]
> ;; succ: 3
> ;; 4
>
> ;; basic block 3, loop depth 0
> ;; pred: 2
> _1 = REALPART_EXPR <_6>;
> ;; succ: 4
>
> ;; basic block 4, loop depth 0
> ;; pred: 2
> ;; 3
> # _3 = PHI <0(2), _1(3)>
> return _3;
> ;; succ: EXIT
>
> }
>
> After this patch:
> uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
> {
> uint8_t _3;
>
> ;; basic block 2, loop depth 0
> ;; pred: ENTRY
> _3 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
> return _3;
> ;; succ: EXIT
>
> }
>
> The below test suites are passed for this patch:
> 1. The rv64gcv fully regression test with newlib.
> 2. The rv64gcv build with glibc.
> 3. The x86 bootstrap test.
> 4. The x86 fully regression test.
>
> gcc/ChangeLog:
>
> * match.pd: Add more match for unsigned sat_sub.
> * tree-ssa-math-opts.cc (match_unsigned_saturation_sub): Add new
> func impl to match phi node for .SAT_SUB.
> (math_opts_dom_walker::after_dom_children): Try match .SAT_SUB
> for the phi node, MULT_EXPR, BIT_XOR_EXPR and BIT_AND_EXPR.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
> gcc/match.pd | 25 +++++++++++++++++++++++--
> gcc/tree-ssa-math-opts.cc | 33 +++++++++++++++++++++++++++++++++
> 2 files changed, 56 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 5cfe81e80b3..66e411b3359 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3140,14 +3140,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> /* Unsigned saturation sub, case 1 (branch with gt):
> SAT_U_SUB = X > Y ? X - Y : 0 */
> (match (unsigned_integer_sat_sub @0 @1)
> - (cond (gt @0 @1) (minus @0 @1) integer_zerop)
> + (cond^ (gt @0 @1) (minus @0 @1) integer_zerop)
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> /* Unsigned saturation sub, case 2 (branch with ge):
> SAT_U_SUB = X >= Y ? X - Y : 0. */
> (match (unsigned_integer_sat_sub @0 @1)
> - (cond (ge @0 @1) (minus @0 @1) integer_zerop)
> + (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> @@ -3165,6 +3165,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> +/* Unsigned saturation sub, case 5 (branchless bit_and with .SUB_OVERFLOW. */
> +(match (unsigned_integer_sat_sub @0 @1)
> + (bit_and:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
> + (plus:c (imagpart @2) integer_minus_onep))
:c shouldn't be necessary on the plus
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> + && types_match (type, @0, @1))))
> +
> +/* Unsigned saturation sub, case 6 (branchless mult with .SUB_OVERFLOW. */
> +(match (unsigned_integer_sat_sub @0 @1)
> + (mult:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
> + (bit_xor:c (imagpart @2) integer_onep))
or on the bit_xor
OK with those changes.
Richard.
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> + && types_match (type, @0, @1))))
> +
> +/* Unsigned saturation sub, case 7 (branch with .SUB_OVERFLOW. */
> +(match (unsigned_integer_sat_sub @0 @1)
> + (cond^ (eq (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop)
> + (realpart @2) integer_zerop)
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> + && types_match (type, @0, @1))))
> +
> /* x > y && x != XXX_MIN --> x > y
> x > y && x == XXX_MIN --> false . */
> (for eqne (eq ne)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index fbb8e0ea306..05aa157611b 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4186,6 +4186,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
> build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]);
> }
>
> +/*
> + * Try to match saturation unsigned sub.
> + * <bb 2> [local count: 1073741824]:
> + * if (x_2(D) > y_3(D))
> + * goto <bb 3>; [50.00%]
> + * else
> + * goto <bb 4>; [50.00%]
> + *
> + * <bb 3> [local count: 536870912]:
> + * _4 = x_2(D) - y_3(D);
> + *
> + * <bb 4> [local count: 1073741824]:
> + * # _1 = PHI <0(2), _4(3)>
> + * =>
> + * <bb 4> [local count: 1073741824]:
> + * _1 = .SAT_SUB (x_2(D), y_3(D)); */
> +static void
> +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
> +{
> + if (gimple_phi_num_args (phi) != 2)
> + return;
> +
> + tree ops[2];
> + tree phi_result = gimple_phi_result (phi);
> +
> + if (gimple_unsigned_integer_sat_sub (phi_result, ops, NULL))
> + build_saturation_binary_arith_call (gsi, phi, IFN_SAT_SUB, phi_result,
> + ops[0], ops[1]);
> +}
> +
> /* Recognize for unsigned x
> x = y - z;
> if (x > y)
> @@ -6104,6 +6134,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> {
> gimple_stmt_iterator gsi = gsi_start_bb (bb);
> match_unsigned_saturation_add (&gsi, psi.phi ());
> + match_unsigned_saturation_sub (&gsi, psi.phi ());
> }
>
> for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
> @@ -6129,6 +6160,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> continue;
> }
> match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
> + match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> break;
>
> case PLUS_EXPR:
> @@ -6167,6 +6199,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> break;
>
> case COND_EXPR:
> + case BIT_AND_EXPR:
> match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> break;
>
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
2024-06-14 8:05 ` Richard Biener
@ 2024-06-14 8:14 ` Li, Pan2
2024-06-14 14:06 ` Li, Pan2
2024-06-19 7:36 ` Li, Pan2
1 sibling, 1 reply; 9+ messages in thread
From: Li, Pan2 @ 2024-06-14 8:14 UTC (permalink / raw)
To: Richard Biener
Cc: gcc-patches, juzhe.zhong, kito.cheng, jeffreyalaw, rdapp.gcc
Thanks Richard for comments.
> :c shouldn't be necessary on the plus
> or on the bit_xor
> OK with those changes.
Will remove the :c and commit it if there is no surprise from test suites.
Pan
-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com>
Sent: Friday, June 14, 2024 4:05 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
On Wed, Jun 12, 2024 at 2:38 PM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> After we support the scalar unsigned form 1 and 2, we would like
> to introduce more forms include the branch and branchless. There
> are forms 3-10 list as below:
>
> Form 3:
> #define SAT_SUB_U_3(T) \
> T sat_sub_u_3_##T (T x, T y) \
> { \
> return x > y ? x - y : 0; \
> }
>
> Form 4:
> #define SAT_SUB_U_4(T) \
> T sat_sub_u_4_##T (T x, T y) \
> { \
> return x >= y ? x - y : 0; \
> }
>
> Form 5:
> #define SAT_SUB_U_5(T) \
> T sat_sub_u_5_##T (T x, T y) \
> { \
> return x < y ? 0 : x - y; \
> }
>
> Form 6:
> #define SAT_SUB_U_6(T) \
> T sat_sub_u_6_##T (T x, T y) \
> { \
> return x <= y ? 0 : x - y; \
> }
>
> Form 7:
> #define SAT_SUB_U_7(T) \
> T sat_sub_u_7_##T (T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_sub_overflow (x, y, &ret); \
> return ret & (T)(overflow - 1); \
> }
>
> Form 8:
> #define SAT_SUB_U_8(T) \
> T sat_sub_u_8_##T (T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_sub_overflow (x, y, &ret); \
> return ret & (T)-(!overflow); \
> }
>
> Form 9:
> #define SAT_SUB_U_9(T) \
> T sat_sub_u_9_##T (T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_sub_overflow (x, y, &ret); \
> return overflow ? 0 : ret; \
> }
>
> Form 10:
> #define SAT_SUB_U_10(T) \
> T sat_sub_u_10_##T (T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_sub_overflow (x, y, &ret); \
> return !overflow ? ret : 0; \
> }
>
> Take form 10 as example:
>
> SAT_SUB_U_10(uint64_t);
>
> Before this patch:
> uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
> {
> unsigned char _1;
> unsigned char _2;
> uint8_t _3;
> __complex__ unsigned char _6;
>
> ;; basic block 2, loop depth 0
> ;; pred: ENTRY
> _6 = .SUB_OVERFLOW (x_4(D), y_5(D));
> _2 = IMAGPART_EXPR <_6>;
> if (_2 == 0)
> goto <bb 3>; [50.00%]
> else
> goto <bb 4>; [50.00%]
> ;; succ: 3
> ;; 4
>
> ;; basic block 3, loop depth 0
> ;; pred: 2
> _1 = REALPART_EXPR <_6>;
> ;; succ: 4
>
> ;; basic block 4, loop depth 0
> ;; pred: 2
> ;; 3
> # _3 = PHI <0(2), _1(3)>
> return _3;
> ;; succ: EXIT
>
> }
>
> After this patch:
> uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
> {
> uint8_t _3;
>
> ;; basic block 2, loop depth 0
> ;; pred: ENTRY
> _3 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
> return _3;
> ;; succ: EXIT
>
> }
>
> The below test suites are passed for this patch:
> 1. The rv64gcv fully regression test with newlib.
> 2. The rv64gcv build with glibc.
> 3. The x86 bootstrap test.
> 4. The x86 fully regression test.
>
> gcc/ChangeLog:
>
> * match.pd: Add more match for unsigned sat_sub.
> * tree-ssa-math-opts.cc (match_unsigned_saturation_sub): Add new
> func impl to match phi node for .SAT_SUB.
> (math_opts_dom_walker::after_dom_children): Try match .SAT_SUB
> for the phi node, MULT_EXPR, BIT_XOR_EXPR and BIT_AND_EXPR.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
> gcc/match.pd | 25 +++++++++++++++++++++++--
> gcc/tree-ssa-math-opts.cc | 33 +++++++++++++++++++++++++++++++++
> 2 files changed, 56 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 5cfe81e80b3..66e411b3359 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3140,14 +3140,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> /* Unsigned saturation sub, case 1 (branch with gt):
> SAT_U_SUB = X > Y ? X - Y : 0 */
> (match (unsigned_integer_sat_sub @0 @1)
> - (cond (gt @0 @1) (minus @0 @1) integer_zerop)
> + (cond^ (gt @0 @1) (minus @0 @1) integer_zerop)
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> /* Unsigned saturation sub, case 2 (branch with ge):
> SAT_U_SUB = X >= Y ? X - Y : 0. */
> (match (unsigned_integer_sat_sub @0 @1)
> - (cond (ge @0 @1) (minus @0 @1) integer_zerop)
> + (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> @@ -3165,6 +3165,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> +/* Unsigned saturation sub, case 5 (branchless bit_and with .SUB_OVERFLOW. */
> +(match (unsigned_integer_sat_sub @0 @1)
> + (bit_and:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
> + (plus:c (imagpart @2) integer_minus_onep))
:c shouldn't be necessary on the plus
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> + && types_match (type, @0, @1))))
> +
> +/* Unsigned saturation sub, case 6 (branchless mult with .SUB_OVERFLOW. */
> +(match (unsigned_integer_sat_sub @0 @1)
> + (mult:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
> + (bit_xor:c (imagpart @2) integer_onep))
or on the bit_xor
OK with those changes.
Richard.
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> + && types_match (type, @0, @1))))
> +
> +/* Unsigned saturation sub, case 7 (branch with .SUB_OVERFLOW. */
> +(match (unsigned_integer_sat_sub @0 @1)
> + (cond^ (eq (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop)
> + (realpart @2) integer_zerop)
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> + && types_match (type, @0, @1))))
> +
> /* x > y && x != XXX_MIN --> x > y
> x > y && x == XXX_MIN --> false . */
> (for eqne (eq ne)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index fbb8e0ea306..05aa157611b 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4186,6 +4186,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
> build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]);
> }
>
> +/*
> + * Try to match saturation unsigned sub.
> + * <bb 2> [local count: 1073741824]:
> + * if (x_2(D) > y_3(D))
> + * goto <bb 3>; [50.00%]
> + * else
> + * goto <bb 4>; [50.00%]
> + *
> + * <bb 3> [local count: 536870912]:
> + * _4 = x_2(D) - y_3(D);
> + *
> + * <bb 4> [local count: 1073741824]:
> + * # _1 = PHI <0(2), _4(3)>
> + * =>
> + * <bb 4> [local count: 1073741824]:
> + * _1 = .SAT_SUB (x_2(D), y_3(D)); */
> +static void
> +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
> +{
> + if (gimple_phi_num_args (phi) != 2)
> + return;
> +
> + tree ops[2];
> + tree phi_result = gimple_phi_result (phi);
> +
> + if (gimple_unsigned_integer_sat_sub (phi_result, ops, NULL))
> + build_saturation_binary_arith_call (gsi, phi, IFN_SAT_SUB, phi_result,
> + ops[0], ops[1]);
> +}
> +
> /* Recognize for unsigned x
> x = y - z;
> if (x > y)
> @@ -6104,6 +6134,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> {
> gimple_stmt_iterator gsi = gsi_start_bb (bb);
> match_unsigned_saturation_add (&gsi, psi.phi ());
> + match_unsigned_saturation_sub (&gsi, psi.phi ());
> }
>
> for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
> @@ -6129,6 +6160,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> continue;
> }
> match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
> + match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> break;
>
> case PLUS_EXPR:
> @@ -6167,6 +6199,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> break;
>
> case COND_EXPR:
> + case BIT_AND_EXPR:
> match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> break;
>
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
2024-06-14 8:14 ` Li, Pan2
@ 2024-06-14 14:06 ` Li, Pan2
0 siblings, 0 replies; 9+ messages in thread
From: Li, Pan2 @ 2024-06-14 14:06 UTC (permalink / raw)
To: Richard Biener
Cc: gcc-patches, juzhe.zhong, kito.cheng, jeffreyalaw, rdapp.gcc
Committed with those changes and test suites passed.
Pan
-----Original Message-----
From: Li, Pan2
Sent: Friday, June 14, 2024 4:15 PM
To: Richard Biener <richard.guenther@gmail.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: RE: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
Thanks Richard for comments.
> :c shouldn't be necessary on the plus
> or on the bit_xor
> OK with those changes.
Will remove the :c and commit it if there is no surprise from test suites.
Pan
-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com>
Sent: Friday, June 14, 2024 4:05 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
On Wed, Jun 12, 2024 at 2:38 PM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> After we support the scalar unsigned form 1 and 2, we would like
> to introduce more forms include the branch and branchless. There
> are forms 3-10 list as below:
>
> Form 3:
> #define SAT_SUB_U_3(T) \
> T sat_sub_u_3_##T (T x, T y) \
> { \
> return x > y ? x - y : 0; \
> }
>
> Form 4:
> #define SAT_SUB_U_4(T) \
> T sat_sub_u_4_##T (T x, T y) \
> { \
> return x >= y ? x - y : 0; \
> }
>
> Form 5:
> #define SAT_SUB_U_5(T) \
> T sat_sub_u_5_##T (T x, T y) \
> { \
> return x < y ? 0 : x - y; \
> }
>
> Form 6:
> #define SAT_SUB_U_6(T) \
> T sat_sub_u_6_##T (T x, T y) \
> { \
> return x <= y ? 0 : x - y; \
> }
>
> Form 7:
> #define SAT_SUB_U_7(T) \
> T sat_sub_u_7_##T (T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_sub_overflow (x, y, &ret); \
> return ret & (T)(overflow - 1); \
> }
>
> Form 8:
> #define SAT_SUB_U_8(T) \
> T sat_sub_u_8_##T (T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_sub_overflow (x, y, &ret); \
> return ret & (T)-(!overflow); \
> }
>
> Form 9:
> #define SAT_SUB_U_9(T) \
> T sat_sub_u_9_##T (T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_sub_overflow (x, y, &ret); \
> return overflow ? 0 : ret; \
> }
>
> Form 10:
> #define SAT_SUB_U_10(T) \
> T sat_sub_u_10_##T (T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_sub_overflow (x, y, &ret); \
> return !overflow ? ret : 0; \
> }
>
> Take form 10 as example:
>
> SAT_SUB_U_10(uint64_t);
>
> Before this patch:
> uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
> {
> unsigned char _1;
> unsigned char _2;
> uint8_t _3;
> __complex__ unsigned char _6;
>
> ;; basic block 2, loop depth 0
> ;; pred: ENTRY
> _6 = .SUB_OVERFLOW (x_4(D), y_5(D));
> _2 = IMAGPART_EXPR <_6>;
> if (_2 == 0)
> goto <bb 3>; [50.00%]
> else
> goto <bb 4>; [50.00%]
> ;; succ: 3
> ;; 4
>
> ;; basic block 3, loop depth 0
> ;; pred: 2
> _1 = REALPART_EXPR <_6>;
> ;; succ: 4
>
> ;; basic block 4, loop depth 0
> ;; pred: 2
> ;; 3
> # _3 = PHI <0(2), _1(3)>
> return _3;
> ;; succ: EXIT
>
> }
>
> After this patch:
> uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
> {
> uint8_t _3;
>
> ;; basic block 2, loop depth 0
> ;; pred: ENTRY
> _3 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
> return _3;
> ;; succ: EXIT
>
> }
>
> The below test suites are passed for this patch:
> 1. The rv64gcv fully regression test with newlib.
> 2. The rv64gcv build with glibc.
> 3. The x86 bootstrap test.
> 4. The x86 fully regression test.
>
> gcc/ChangeLog:
>
> * match.pd: Add more match for unsigned sat_sub.
> * tree-ssa-math-opts.cc (match_unsigned_saturation_sub): Add new
> func impl to match phi node for .SAT_SUB.
> (math_opts_dom_walker::after_dom_children): Try match .SAT_SUB
> for the phi node, MULT_EXPR, BIT_XOR_EXPR and BIT_AND_EXPR.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
> gcc/match.pd | 25 +++++++++++++++++++++++--
> gcc/tree-ssa-math-opts.cc | 33 +++++++++++++++++++++++++++++++++
> 2 files changed, 56 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 5cfe81e80b3..66e411b3359 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3140,14 +3140,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> /* Unsigned saturation sub, case 1 (branch with gt):
> SAT_U_SUB = X > Y ? X - Y : 0 */
> (match (unsigned_integer_sat_sub @0 @1)
> - (cond (gt @0 @1) (minus @0 @1) integer_zerop)
> + (cond^ (gt @0 @1) (minus @0 @1) integer_zerop)
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> /* Unsigned saturation sub, case 2 (branch with ge):
> SAT_U_SUB = X >= Y ? X - Y : 0. */
> (match (unsigned_integer_sat_sub @0 @1)
> - (cond (ge @0 @1) (minus @0 @1) integer_zerop)
> + (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> @@ -3165,6 +3165,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> +/* Unsigned saturation sub, case 5 (branchless bit_and with .SUB_OVERFLOW. */
> +(match (unsigned_integer_sat_sub @0 @1)
> + (bit_and:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
> + (plus:c (imagpart @2) integer_minus_onep))
:c shouldn't be necessary on the plus
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> + && types_match (type, @0, @1))))
> +
> +/* Unsigned saturation sub, case 6 (branchless mult with .SUB_OVERFLOW. */
> +(match (unsigned_integer_sat_sub @0 @1)
> + (mult:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
> + (bit_xor:c (imagpart @2) integer_onep))
or on the bit_xor
OK with those changes.
Richard.
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> + && types_match (type, @0, @1))))
> +
> +/* Unsigned saturation sub, case 7 (branch with .SUB_OVERFLOW. */
> +(match (unsigned_integer_sat_sub @0 @1)
> + (cond^ (eq (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop)
> + (realpart @2) integer_zerop)
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> + && types_match (type, @0, @1))))
> +
> /* x > y && x != XXX_MIN --> x > y
> x > y && x == XXX_MIN --> false . */
> (for eqne (eq ne)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index fbb8e0ea306..05aa157611b 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4186,6 +4186,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
> build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]);
> }
>
> +/*
> + * Try to match saturation unsigned sub.
> + * <bb 2> [local count: 1073741824]:
> + * if (x_2(D) > y_3(D))
> + * goto <bb 3>; [50.00%]
> + * else
> + * goto <bb 4>; [50.00%]
> + *
> + * <bb 3> [local count: 536870912]:
> + * _4 = x_2(D) - y_3(D);
> + *
> + * <bb 4> [local count: 1073741824]:
> + * # _1 = PHI <0(2), _4(3)>
> + * =>
> + * <bb 4> [local count: 1073741824]:
> + * _1 = .SAT_SUB (x_2(D), y_3(D)); */
> +static void
> +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
> +{
> + if (gimple_phi_num_args (phi) != 2)
> + return;
> +
> + tree ops[2];
> + tree phi_result = gimple_phi_result (phi);
> +
> + if (gimple_unsigned_integer_sat_sub (phi_result, ops, NULL))
> + build_saturation_binary_arith_call (gsi, phi, IFN_SAT_SUB, phi_result,
> + ops[0], ops[1]);
> +}
> +
> /* Recognize for unsigned x
> x = y - z;
> if (x > y)
> @@ -6104,6 +6134,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> {
> gimple_stmt_iterator gsi = gsi_start_bb (bb);
> match_unsigned_saturation_add (&gsi, psi.phi ());
> + match_unsigned_saturation_sub (&gsi, psi.phi ());
> }
>
> for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
> @@ -6129,6 +6160,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> continue;
> }
> match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
> + match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> break;
>
> case PLUS_EXPR:
> @@ -6167,6 +6199,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> break;
>
> case COND_EXPR:
> + case BIT_AND_EXPR:
> match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> break;
>
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
2024-06-14 8:05 ` Richard Biener
2024-06-14 8:14 ` Li, Pan2
@ 2024-06-19 7:36 ` Li, Pan2
2024-06-19 8:00 ` Richard Biener
2024-06-27 7:31 ` Andrew Pinski
1 sibling, 2 replies; 9+ messages in thread
From: Li, Pan2 @ 2024-06-19 7:36 UTC (permalink / raw)
To: Richard Biener
Cc: gcc-patches, juzhe.zhong, kito.cheng, jeffreyalaw, rdapp.gcc
Hi Richard,
Given almost all unsigned SAT_ADD/SAT_SUB patches are merged, I revisit the original code pattern aka zip benchmark.
It may look like below:
void test (uint16_t *x, uint16_t *y, unsigned wsize, unsigned count)
{
unsigned m = 0, n = count;
register uint16_t *p;
p = x;
do {
m = *--p;
*p = (uint16_t)(m >= wsize ? m-wsize : 0); // There will be a conversion here.
} while (--n);
}
And we can have 179 tree pass as below:
<bb 3> [local count: 1073741824]:
# n_3 = PHI <n_15(7), count_7(D)(15)>
# p_4 = PHI <p_10(7), x_8(D)(15)>
p_10 = p_4 + 18446744073709551614;
_1 = *p_10;
m_11 = (unsigned int) _1;
_2 = m_11 - wsize_12(D);
iftmp.0_13 = (short unsigned int) _2;
_18 = m_11 >= wsize_12(D);
iftmp.0_5 = _18 ? iftmp.0_13 : 0;
*p_10 = iftmp.0_5;
The above form doesn't hit any form we have supported in match.pd. Then I have one idea that to convert
uint16 d, tmp;
uint32 a, b, m;
m = a - b;
tmp = (uint16)m;
d = a >= b ? tmp : 0;
to
d = (uint16)(.SAT_SUB (a, b));
I am not very sure it is reasonable to make it work, it may have gimple assignment with convert similar as below (may require the help of vectorize_conversion?).
Would like to get some hint from you before the next step, thanks a lot.
patt_34 = .SAT_SUB (m_11, wsize_12(D));
patt_35 = (vector([8,8]) short unsigned int) patt_34;
Pan
-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com>
Sent: Friday, June 14, 2024 4:05 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
On Wed, Jun 12, 2024 at 2:38 PM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> After we support the scalar unsigned form 1 and 2, we would like
> to introduce more forms include the branch and branchless. There
> are forms 3-10 list as below:
>
> Form 3:
> #define SAT_SUB_U_3(T) \
> T sat_sub_u_3_##T (T x, T y) \
> { \
> return x > y ? x - y : 0; \
> }
>
> Form 4:
> #define SAT_SUB_U_4(T) \
> T sat_sub_u_4_##T (T x, T y) \
> { \
> return x >= y ? x - y : 0; \
> }
>
> Form 5:
> #define SAT_SUB_U_5(T) \
> T sat_sub_u_5_##T (T x, T y) \
> { \
> return x < y ? 0 : x - y; \
> }
>
> Form 6:
> #define SAT_SUB_U_6(T) \
> T sat_sub_u_6_##T (T x, T y) \
> { \
> return x <= y ? 0 : x - y; \
> }
>
> Form 7:
> #define SAT_SUB_U_7(T) \
> T sat_sub_u_7_##T (T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_sub_overflow (x, y, &ret); \
> return ret & (T)(overflow - 1); \
> }
>
> Form 8:
> #define SAT_SUB_U_8(T) \
> T sat_sub_u_8_##T (T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_sub_overflow (x, y, &ret); \
> return ret & (T)-(!overflow); \
> }
>
> Form 9:
> #define SAT_SUB_U_9(T) \
> T sat_sub_u_9_##T (T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_sub_overflow (x, y, &ret); \
> return overflow ? 0 : ret; \
> }
>
> Form 10:
> #define SAT_SUB_U_10(T) \
> T sat_sub_u_10_##T (T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_sub_overflow (x, y, &ret); \
> return !overflow ? ret : 0; \
> }
>
> Take form 10 as example:
>
> SAT_SUB_U_10(uint64_t);
>
> Before this patch:
> uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
> {
> unsigned char _1;
> unsigned char _2;
> uint8_t _3;
> __complex__ unsigned char _6;
>
> ;; basic block 2, loop depth 0
> ;; pred: ENTRY
> _6 = .SUB_OVERFLOW (x_4(D), y_5(D));
> _2 = IMAGPART_EXPR <_6>;
> if (_2 == 0)
> goto <bb 3>; [50.00%]
> else
> goto <bb 4>; [50.00%]
> ;; succ: 3
> ;; 4
>
> ;; basic block 3, loop depth 0
> ;; pred: 2
> _1 = REALPART_EXPR <_6>;
> ;; succ: 4
>
> ;; basic block 4, loop depth 0
> ;; pred: 2
> ;; 3
> # _3 = PHI <0(2), _1(3)>
> return _3;
> ;; succ: EXIT
>
> }
>
> After this patch:
> uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
> {
> uint8_t _3;
>
> ;; basic block 2, loop depth 0
> ;; pred: ENTRY
> _3 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
> return _3;
> ;; succ: EXIT
>
> }
>
> The below test suites are passed for this patch:
> 1. The rv64gcv fully regression test with newlib.
> 2. The rv64gcv build with glibc.
> 3. The x86 bootstrap test.
> 4. The x86 fully regression test.
>
> gcc/ChangeLog:
>
> * match.pd: Add more match for unsigned sat_sub.
> * tree-ssa-math-opts.cc (match_unsigned_saturation_sub): Add new
> func impl to match phi node for .SAT_SUB.
> (math_opts_dom_walker::after_dom_children): Try match .SAT_SUB
> for the phi node, MULT_EXPR, BIT_XOR_EXPR and BIT_AND_EXPR.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
> gcc/match.pd | 25 +++++++++++++++++++++++--
> gcc/tree-ssa-math-opts.cc | 33 +++++++++++++++++++++++++++++++++
> 2 files changed, 56 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 5cfe81e80b3..66e411b3359 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3140,14 +3140,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> /* Unsigned saturation sub, case 1 (branch with gt):
> SAT_U_SUB = X > Y ? X - Y : 0 */
> (match (unsigned_integer_sat_sub @0 @1)
> - (cond (gt @0 @1) (minus @0 @1) integer_zerop)
> + (cond^ (gt @0 @1) (minus @0 @1) integer_zerop)
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> /* Unsigned saturation sub, case 2 (branch with ge):
> SAT_U_SUB = X >= Y ? X - Y : 0. */
> (match (unsigned_integer_sat_sub @0 @1)
> - (cond (ge @0 @1) (minus @0 @1) integer_zerop)
> + (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> @@ -3165,6 +3165,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> +/* Unsigned saturation sub, case 5 (branchless bit_and with .SUB_OVERFLOW. */
> +(match (unsigned_integer_sat_sub @0 @1)
> + (bit_and:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
> + (plus:c (imagpart @2) integer_minus_onep))
:c shouldn't be necessary on the plus
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> + && types_match (type, @0, @1))))
> +
> +/* Unsigned saturation sub, case 6 (branchless mult with .SUB_OVERFLOW. */
> +(match (unsigned_integer_sat_sub @0 @1)
> + (mult:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
> + (bit_xor:c (imagpart @2) integer_onep))
or on the bit_xor
OK with those changes.
Richard.
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> + && types_match (type, @0, @1))))
> +
> +/* Unsigned saturation sub, case 7 (branch with .SUB_OVERFLOW. */
> +(match (unsigned_integer_sat_sub @0 @1)
> + (cond^ (eq (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop)
> + (realpart @2) integer_zerop)
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> + && types_match (type, @0, @1))))
> +
> /* x > y && x != XXX_MIN --> x > y
> x > y && x == XXX_MIN --> false . */
> (for eqne (eq ne)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index fbb8e0ea306..05aa157611b 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4186,6 +4186,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
> build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]);
> }
>
> +/*
> + * Try to match saturation unsigned sub.
> + * <bb 2> [local count: 1073741824]:
> + * if (x_2(D) > y_3(D))
> + * goto <bb 3>; [50.00%]
> + * else
> + * goto <bb 4>; [50.00%]
> + *
> + * <bb 3> [local count: 536870912]:
> + * _4 = x_2(D) - y_3(D);
> + *
> + * <bb 4> [local count: 1073741824]:
> + * # _1 = PHI <0(2), _4(3)>
> + * =>
> + * <bb 4> [local count: 1073741824]:
> + * _1 = .SAT_SUB (x_2(D), y_3(D)); */
> +static void
> +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
> +{
> + if (gimple_phi_num_args (phi) != 2)
> + return;
> +
> + tree ops[2];
> + tree phi_result = gimple_phi_result (phi);
> +
> + if (gimple_unsigned_integer_sat_sub (phi_result, ops, NULL))
> + build_saturation_binary_arith_call (gsi, phi, IFN_SAT_SUB, phi_result,
> + ops[0], ops[1]);
> +}
> +
> /* Recognize for unsigned x
> x = y - z;
> if (x > y)
> @@ -6104,6 +6134,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> {
> gimple_stmt_iterator gsi = gsi_start_bb (bb);
> match_unsigned_saturation_add (&gsi, psi.phi ());
> + match_unsigned_saturation_sub (&gsi, psi.phi ());
> }
>
> for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
> @@ -6129,6 +6160,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> continue;
> }
> match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
> + match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> break;
>
> case PLUS_EXPR:
> @@ -6167,6 +6199,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> break;
>
> case COND_EXPR:
> + case BIT_AND_EXPR:
> match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> break;
>
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
2024-06-19 7:36 ` Li, Pan2
@ 2024-06-19 8:00 ` Richard Biener
2024-06-19 13:47 ` Li, Pan2
2024-06-27 7:31 ` Andrew Pinski
1 sibling, 1 reply; 9+ messages in thread
From: Richard Biener @ 2024-06-19 8:00 UTC (permalink / raw)
To: Li, Pan2; +Cc: gcc-patches, juzhe.zhong, kito.cheng, jeffreyalaw, rdapp.gcc
On Wed, Jun 19, 2024 at 9:37 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> Hi Richard,
>
> Given almost all unsigned SAT_ADD/SAT_SUB patches are merged, I revisit the original code pattern aka zip benchmark.
> It may look like below:
>
> void test (uint16_t *x, uint16_t *y, unsigned wsize, unsigned count)
> {
> unsigned m = 0, n = count;
> register uint16_t *p;
>
> p = x;
>
> do {
> m = *--p;
>
> *p = (uint16_t)(m >= wsize ? m-wsize : 0); // There will be a conversion here.
> } while (--n);
> }
>
> And we can have 179 tree pass as below:
>
> <bb 3> [local count: 1073741824]:
> # n_3 = PHI <n_15(7), count_7(D)(15)>
> # p_4 = PHI <p_10(7), x_8(D)(15)>
> p_10 = p_4 + 18446744073709551614;
> _1 = *p_10;
> m_11 = (unsigned int) _1;
> _2 = m_11 - wsize_12(D);
> iftmp.0_13 = (short unsigned int) _2;
> _18 = m_11 >= wsize_12(D);
> iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> *p_10 = iftmp.0_5;
>
> The above form doesn't hit any form we have supported in match.pd. Then I have one idea that to convert
>
> uint16 d, tmp;
> uint32 a, b, m;
>
> m = a - b;
> tmp = (uint16)m;
> d = a >= b ? tmp : 0;
>
> to
>
> d = (uint16)(.SAT_SUB (a, b));
The key here is to turn this into
m = a - b;
tmp = a >= b ? m : 0;
d = (uint16) tmp;
I guess? We probably have the reverse transform, turn
(uint16) a ? b : c; into a ? (uint16)b : (uint16)c if any of the arm simplifies.
OTOH if you figure the correct rules for the allowed conversions adjusting the
pattern matching to allow a conversion on the subtract would work.
> I am not very sure it is reasonable to make it work, it may have gimple assignment with convert similar as below (may require the help of vectorize_conversion?).
> Would like to get some hint from you before the next step, thanks a lot.
>
> patt_34 = .SAT_SUB (m_11, wsize_12(D));
> patt_35 = (vector([8,8]) short unsigned int) patt_34;
>
> Pan
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Friday, June 14, 2024 4:05 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
> Subject: Re: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
>
> On Wed, Jun 12, 2024 at 2:38 PM <pan2.li@intel.com> wrote:
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > After we support the scalar unsigned form 1 and 2, we would like
> > to introduce more forms include the branch and branchless. There
> > are forms 3-10 list as below:
> >
> > Form 3:
> > #define SAT_SUB_U_3(T) \
> > T sat_sub_u_3_##T (T x, T y) \
> > { \
> > return x > y ? x - y : 0; \
> > }
> >
> > Form 4:
> > #define SAT_SUB_U_4(T) \
> > T sat_sub_u_4_##T (T x, T y) \
> > { \
> > return x >= y ? x - y : 0; \
> > }
> >
> > Form 5:
> > #define SAT_SUB_U_5(T) \
> > T sat_sub_u_5_##T (T x, T y) \
> > { \
> > return x < y ? 0 : x - y; \
> > }
> >
> > Form 6:
> > #define SAT_SUB_U_6(T) \
> > T sat_sub_u_6_##T (T x, T y) \
> > { \
> > return x <= y ? 0 : x - y; \
> > }
> >
> > Form 7:
> > #define SAT_SUB_U_7(T) \
> > T sat_sub_u_7_##T (T x, T y) \
> > { \
> > T ret; \
> > T overflow = __builtin_sub_overflow (x, y, &ret); \
> > return ret & (T)(overflow - 1); \
> > }
> >
> > Form 8:
> > #define SAT_SUB_U_8(T) \
> > T sat_sub_u_8_##T (T x, T y) \
> > { \
> > T ret; \
> > T overflow = __builtin_sub_overflow (x, y, &ret); \
> > return ret & (T)-(!overflow); \
> > }
> >
> > Form 9:
> > #define SAT_SUB_U_9(T) \
> > T sat_sub_u_9_##T (T x, T y) \
> > { \
> > T ret; \
> > T overflow = __builtin_sub_overflow (x, y, &ret); \
> > return overflow ? 0 : ret; \
> > }
> >
> > Form 10:
> > #define SAT_SUB_U_10(T) \
> > T sat_sub_u_10_##T (T x, T y) \
> > { \
> > T ret; \
> > T overflow = __builtin_sub_overflow (x, y, &ret); \
> > return !overflow ? ret : 0; \
> > }
> >
> > Take form 10 as example:
> >
> > SAT_SUB_U_10(uint64_t);
> >
> > Before this patch:
> > uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
> > {
> > unsigned char _1;
> > unsigned char _2;
> > uint8_t _3;
> > __complex__ unsigned char _6;
> >
> > ;; basic block 2, loop depth 0
> > ;; pred: ENTRY
> > _6 = .SUB_OVERFLOW (x_4(D), y_5(D));
> > _2 = IMAGPART_EXPR <_6>;
> > if (_2 == 0)
> > goto <bb 3>; [50.00%]
> > else
> > goto <bb 4>; [50.00%]
> > ;; succ: 3
> > ;; 4
> >
> > ;; basic block 3, loop depth 0
> > ;; pred: 2
> > _1 = REALPART_EXPR <_6>;
> > ;; succ: 4
> >
> > ;; basic block 4, loop depth 0
> > ;; pred: 2
> > ;; 3
> > # _3 = PHI <0(2), _1(3)>
> > return _3;
> > ;; succ: EXIT
> >
> > }
> >
> > After this patch:
> > uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
> > {
> > uint8_t _3;
> >
> > ;; basic block 2, loop depth 0
> > ;; pred: ENTRY
> > _3 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
> > return _3;
> > ;; succ: EXIT
> >
> > }
> >
> > The below test suites are passed for this patch:
> > 1. The rv64gcv fully regression test with newlib.
> > 2. The rv64gcv build with glibc.
> > 3. The x86 bootstrap test.
> > 4. The x86 fully regression test.
> >
> > gcc/ChangeLog:
> >
> > * match.pd: Add more match for unsigned sat_sub.
> > * tree-ssa-math-opts.cc (match_unsigned_saturation_sub): Add new
> > func impl to match phi node for .SAT_SUB.
> > (math_opts_dom_walker::after_dom_children): Try match .SAT_SUB
> > for the phi node, MULT_EXPR, BIT_XOR_EXPR and BIT_AND_EXPR.
> >
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> > gcc/match.pd | 25 +++++++++++++++++++++++--
> > gcc/tree-ssa-math-opts.cc | 33 +++++++++++++++++++++++++++++++++
> > 2 files changed, 56 insertions(+), 2 deletions(-)
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 5cfe81e80b3..66e411b3359 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3140,14 +3140,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > /* Unsigned saturation sub, case 1 (branch with gt):
> > SAT_U_SUB = X > Y ? X - Y : 0 */
> > (match (unsigned_integer_sat_sub @0 @1)
> > - (cond (gt @0 @1) (minus @0 @1) integer_zerop)
> > + (cond^ (gt @0 @1) (minus @0 @1) integer_zerop)
> > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > && types_match (type, @0, @1))))
> >
> > /* Unsigned saturation sub, case 2 (branch with ge):
> > SAT_U_SUB = X >= Y ? X - Y : 0. */
> > (match (unsigned_integer_sat_sub @0 @1)
> > - (cond (ge @0 @1) (minus @0 @1) integer_zerop)
> > + (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
> > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > && types_match (type, @0, @1))))
> >
> > @@ -3165,6 +3165,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > && types_match (type, @0, @1))))
> >
> > +/* Unsigned saturation sub, case 5 (branchless bit_and with .SUB_OVERFLOW. */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (bit_and:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
> > + (plus:c (imagpart @2) integer_minus_onep))
>
> :c shouldn't be necessary on the plus
>
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > + && types_match (type, @0, @1))))
> > +
> > +/* Unsigned saturation sub, case 6 (branchless mult with .SUB_OVERFLOW. */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (mult:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
> > + (bit_xor:c (imagpart @2) integer_onep))
>
> or on the bit_xor
>
> OK with those changes.
>
> Richard.
>
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > + && types_match (type, @0, @1))))
> > +
> > +/* Unsigned saturation sub, case 7 (branch with .SUB_OVERFLOW. */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (cond^ (eq (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop)
> > + (realpart @2) integer_zerop)
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > + && types_match (type, @0, @1))))
> > +
> > /* x > y && x != XXX_MIN --> x > y
> > x > y && x == XXX_MIN --> false . */
> > (for eqne (eq ne)
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index fbb8e0ea306..05aa157611b 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4186,6 +4186,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
> > build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]);
> > }
> >
> > +/*
> > + * Try to match saturation unsigned sub.
> > + * <bb 2> [local count: 1073741824]:
> > + * if (x_2(D) > y_3(D))
> > + * goto <bb 3>; [50.00%]
> > + * else
> > + * goto <bb 4>; [50.00%]
> > + *
> > + * <bb 3> [local count: 536870912]:
> > + * _4 = x_2(D) - y_3(D);
> > + *
> > + * <bb 4> [local count: 1073741824]:
> > + * # _1 = PHI <0(2), _4(3)>
> > + * =>
> > + * <bb 4> [local count: 1073741824]:
> > + * _1 = .SAT_SUB (x_2(D), y_3(D)); */
> > +static void
> > +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
> > +{
> > + if (gimple_phi_num_args (phi) != 2)
> > + return;
> > +
> > + tree ops[2];
> > + tree phi_result = gimple_phi_result (phi);
> > +
> > + if (gimple_unsigned_integer_sat_sub (phi_result, ops, NULL))
> > + build_saturation_binary_arith_call (gsi, phi, IFN_SAT_SUB, phi_result,
> > + ops[0], ops[1]);
> > +}
> > +
> > /* Recognize for unsigned x
> > x = y - z;
> > if (x > y)
> > @@ -6104,6 +6134,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> > {
> > gimple_stmt_iterator gsi = gsi_start_bb (bb);
> > match_unsigned_saturation_add (&gsi, psi.phi ());
> > + match_unsigned_saturation_sub (&gsi, psi.phi ());
> > }
> >
> > for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
> > @@ -6129,6 +6160,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> > continue;
> > }
> > match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
> > + match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> > break;
> >
> > case PLUS_EXPR:
> > @@ -6167,6 +6199,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> > break;
> >
> > case COND_EXPR:
> > + case BIT_AND_EXPR:
> > match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> > break;
> >
> > --
> > 2.34.1
> >
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
2024-06-19 8:00 ` Richard Biener
@ 2024-06-19 13:47 ` Li, Pan2
0 siblings, 0 replies; 9+ messages in thread
From: Li, Pan2 @ 2024-06-19 13:47 UTC (permalink / raw)
To: Richard Biener
Cc: gcc-patches, juzhe.zhong, kito.cheng, jeffreyalaw, rdapp.gcc
Got it. Thanks Richard for suggestion.
Pan
-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com>
Sent: Wednesday, June 19, 2024 4:00 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
On Wed, Jun 19, 2024 at 9:37 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> Hi Richard,
>
> Given almost all unsigned SAT_ADD/SAT_SUB patches are merged, I revisit the original code pattern aka zip benchmark.
> It may look like below:
>
> void test (uint16_t *x, uint16_t *y, unsigned wsize, unsigned count)
> {
> unsigned m = 0, n = count;
> register uint16_t *p;
>
> p = x;
>
> do {
> m = *--p;
>
> *p = (uint16_t)(m >= wsize ? m-wsize : 0); // There will be a conversion here.
> } while (--n);
> }
>
> And we can have 179 tree pass as below:
>
> <bb 3> [local count: 1073741824]:
> # n_3 = PHI <n_15(7), count_7(D)(15)>
> # p_4 = PHI <p_10(7), x_8(D)(15)>
> p_10 = p_4 + 18446744073709551614;
> _1 = *p_10;
> m_11 = (unsigned int) _1;
> _2 = m_11 - wsize_12(D);
> iftmp.0_13 = (short unsigned int) _2;
> _18 = m_11 >= wsize_12(D);
> iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> *p_10 = iftmp.0_5;
>
> The above form doesn't hit any form we have supported in match.pd. Then I have one idea that to convert
>
> uint16 d, tmp;
> uint32 a, b, m;
>
> m = a - b;
> tmp = (uint16)m;
> d = a >= b ? tmp : 0;
>
> to
>
> d = (uint16)(.SAT_SUB (a, b));
The key here is to turn this into
m = a - b;
tmp = a >= b ? m : 0;
d = (uint16) tmp;
I guess? We probably have the reverse transform, turn
(uint16) a ? b : c; into a ? (uint16)b : (uint16)c if any of the arm simplifies.
OTOH if you figure the correct rules for the allowed conversions adjusting the
pattern matching to allow a conversion on the subtract would work.
> I am not very sure it is reasonable to make it work, it may have gimple assignment with convert similar as below (may require the help of vectorize_conversion?).
> Would like to get some hint from you before the next step, thanks a lot.
>
> patt_34 = .SAT_SUB (m_11, wsize_12(D));
> patt_35 = (vector([8,8]) short unsigned int) patt_34;
>
> Pan
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Friday, June 14, 2024 4:05 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
> Subject: Re: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
>
> On Wed, Jun 12, 2024 at 2:38 PM <pan2.li@intel.com> wrote:
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > After we support the scalar unsigned form 1 and 2, we would like
> > to introduce more forms include the branch and branchless. There
> > are forms 3-10 list as below:
> >
> > Form 3:
> > #define SAT_SUB_U_3(T) \
> > T sat_sub_u_3_##T (T x, T y) \
> > { \
> > return x > y ? x - y : 0; \
> > }
> >
> > Form 4:
> > #define SAT_SUB_U_4(T) \
> > T sat_sub_u_4_##T (T x, T y) \
> > { \
> > return x >= y ? x - y : 0; \
> > }
> >
> > Form 5:
> > #define SAT_SUB_U_5(T) \
> > T sat_sub_u_5_##T (T x, T y) \
> > { \
> > return x < y ? 0 : x - y; \
> > }
> >
> > Form 6:
> > #define SAT_SUB_U_6(T) \
> > T sat_sub_u_6_##T (T x, T y) \
> > { \
> > return x <= y ? 0 : x - y; \
> > }
> >
> > Form 7:
> > #define SAT_SUB_U_7(T) \
> > T sat_sub_u_7_##T (T x, T y) \
> > { \
> > T ret; \
> > T overflow = __builtin_sub_overflow (x, y, &ret); \
> > return ret & (T)(overflow - 1); \
> > }
> >
> > Form 8:
> > #define SAT_SUB_U_8(T) \
> > T sat_sub_u_8_##T (T x, T y) \
> > { \
> > T ret; \
> > T overflow = __builtin_sub_overflow (x, y, &ret); \
> > return ret & (T)-(!overflow); \
> > }
> >
> > Form 9:
> > #define SAT_SUB_U_9(T) \
> > T sat_sub_u_9_##T (T x, T y) \
> > { \
> > T ret; \
> > T overflow = __builtin_sub_overflow (x, y, &ret); \
> > return overflow ? 0 : ret; \
> > }
> >
> > Form 10:
> > #define SAT_SUB_U_10(T) \
> > T sat_sub_u_10_##T (T x, T y) \
> > { \
> > T ret; \
> > T overflow = __builtin_sub_overflow (x, y, &ret); \
> > return !overflow ? ret : 0; \
> > }
> >
> > Take form 10 as example:
> >
> > SAT_SUB_U_10(uint64_t);
> >
> > Before this patch:
> > uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
> > {
> > unsigned char _1;
> > unsigned char _2;
> > uint8_t _3;
> > __complex__ unsigned char _6;
> >
> > ;; basic block 2, loop depth 0
> > ;; pred: ENTRY
> > _6 = .SUB_OVERFLOW (x_4(D), y_5(D));
> > _2 = IMAGPART_EXPR <_6>;
> > if (_2 == 0)
> > goto <bb 3>; [50.00%]
> > else
> > goto <bb 4>; [50.00%]
> > ;; succ: 3
> > ;; 4
> >
> > ;; basic block 3, loop depth 0
> > ;; pred: 2
> > _1 = REALPART_EXPR <_6>;
> > ;; succ: 4
> >
> > ;; basic block 4, loop depth 0
> > ;; pred: 2
> > ;; 3
> > # _3 = PHI <0(2), _1(3)>
> > return _3;
> > ;; succ: EXIT
> >
> > }
> >
> > After this patch:
> > uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
> > {
> > uint8_t _3;
> >
> > ;; basic block 2, loop depth 0
> > ;; pred: ENTRY
> > _3 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
> > return _3;
> > ;; succ: EXIT
> >
> > }
> >
> > The below test suites are passed for this patch:
> > 1. The rv64gcv fully regression test with newlib.
> > 2. The rv64gcv build with glibc.
> > 3. The x86 bootstrap test.
> > 4. The x86 fully regression test.
> >
> > gcc/ChangeLog:
> >
> > * match.pd: Add more match for unsigned sat_sub.
> > * tree-ssa-math-opts.cc (match_unsigned_saturation_sub): Add new
> > func impl to match phi node for .SAT_SUB.
> > (math_opts_dom_walker::after_dom_children): Try match .SAT_SUB
> > for the phi node, MULT_EXPR, BIT_XOR_EXPR and BIT_AND_EXPR.
> >
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> > gcc/match.pd | 25 +++++++++++++++++++++++--
> > gcc/tree-ssa-math-opts.cc | 33 +++++++++++++++++++++++++++++++++
> > 2 files changed, 56 insertions(+), 2 deletions(-)
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 5cfe81e80b3..66e411b3359 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3140,14 +3140,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > /* Unsigned saturation sub, case 1 (branch with gt):
> > SAT_U_SUB = X > Y ? X - Y : 0 */
> > (match (unsigned_integer_sat_sub @0 @1)
> > - (cond (gt @0 @1) (minus @0 @1) integer_zerop)
> > + (cond^ (gt @0 @1) (minus @0 @1) integer_zerop)
> > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > && types_match (type, @0, @1))))
> >
> > /* Unsigned saturation sub, case 2 (branch with ge):
> > SAT_U_SUB = X >= Y ? X - Y : 0. */
> > (match (unsigned_integer_sat_sub @0 @1)
> > - (cond (ge @0 @1) (minus @0 @1) integer_zerop)
> > + (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
> > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > && types_match (type, @0, @1))))
> >
> > @@ -3165,6 +3165,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > && types_match (type, @0, @1))))
> >
> > +/* Unsigned saturation sub, case 5 (branchless bit_and with .SUB_OVERFLOW. */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (bit_and:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
> > + (plus:c (imagpart @2) integer_minus_onep))
>
> :c shouldn't be necessary on the plus
>
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > + && types_match (type, @0, @1))))
> > +
> > +/* Unsigned saturation sub, case 6 (branchless mult with .SUB_OVERFLOW. */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (mult:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
> > + (bit_xor:c (imagpart @2) integer_onep))
>
> or on the bit_xor
>
> OK with those changes.
>
> Richard.
>
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > + && types_match (type, @0, @1))))
> > +
> > +/* Unsigned saturation sub, case 7 (branch with .SUB_OVERFLOW. */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (cond^ (eq (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop)
> > + (realpart @2) integer_zerop)
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > + && types_match (type, @0, @1))))
> > +
> > /* x > y && x != XXX_MIN --> x > y
> > x > y && x == XXX_MIN --> false . */
> > (for eqne (eq ne)
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index fbb8e0ea306..05aa157611b 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4186,6 +4186,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
> > build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]);
> > }
> >
> > +/*
> > + * Try to match saturation unsigned sub.
> > + * <bb 2> [local count: 1073741824]:
> > + * if (x_2(D) > y_3(D))
> > + * goto <bb 3>; [50.00%]
> > + * else
> > + * goto <bb 4>; [50.00%]
> > + *
> > + * <bb 3> [local count: 536870912]:
> > + * _4 = x_2(D) - y_3(D);
> > + *
> > + * <bb 4> [local count: 1073741824]:
> > + * # _1 = PHI <0(2), _4(3)>
> > + * =>
> > + * <bb 4> [local count: 1073741824]:
> > + * _1 = .SAT_SUB (x_2(D), y_3(D)); */
> > +static void
> > +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
> > +{
> > + if (gimple_phi_num_args (phi) != 2)
> > + return;
> > +
> > + tree ops[2];
> > + tree phi_result = gimple_phi_result (phi);
> > +
> > + if (gimple_unsigned_integer_sat_sub (phi_result, ops, NULL))
> > + build_saturation_binary_arith_call (gsi, phi, IFN_SAT_SUB, phi_result,
> > + ops[0], ops[1]);
> > +}
> > +
> > /* Recognize for unsigned x
> > x = y - z;
> > if (x > y)
> > @@ -6104,6 +6134,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> > {
> > gimple_stmt_iterator gsi = gsi_start_bb (bb);
> > match_unsigned_saturation_add (&gsi, psi.phi ());
> > + match_unsigned_saturation_sub (&gsi, psi.phi ());
> > }
> >
> > for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
> > @@ -6129,6 +6160,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> > continue;
> > }
> > match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
> > + match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> > break;
> >
> > case PLUS_EXPR:
> > @@ -6167,6 +6199,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> > break;
> >
> > case COND_EXPR:
> > + case BIT_AND_EXPR:
> > match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> > break;
> >
> > --
> > 2.34.1
> >
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
2024-06-19 7:36 ` Li, Pan2
2024-06-19 8:00 ` Richard Biener
@ 2024-06-27 7:31 ` Andrew Pinski
2024-06-27 7:54 ` Li, Pan2
1 sibling, 1 reply; 9+ messages in thread
From: Andrew Pinski @ 2024-06-27 7:31 UTC (permalink / raw)
To: Li, Pan2
Cc: Richard Biener, gcc-patches, juzhe.zhong, kito.cheng,
jeffreyalaw, rdapp.gcc
On Wed, Jun 19, 2024 at 12:37 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> Hi Richard,
>
> Given almost all unsigned SAT_ADD/SAT_SUB patches are merged, I revisit the original code pattern aka zip benchmark.
> It may look like below:
>
> void test (uint16_t *x, uint16_t *y, unsigned wsize, unsigned count)
> {
> unsigned m = 0, n = count;
> register uint16_t *p;
>
> p = x;
>
> do {
> m = *--p;
>
> *p = (uint16_t)(m >= wsize ? m-wsize : 0); // There will be a conversion here.
> } while (--n);
> }
>
> And we can have 179 tree pass as below:
>
> <bb 3> [local count: 1073741824]:
> # n_3 = PHI <n_15(7), count_7(D)(15)>
> # p_4 = PHI <p_10(7), x_8(D)(15)>
> p_10 = p_4 + 18446744073709551614;
> _1 = *p_10;
> m_11 = (unsigned int) _1;
> _2 = m_11 - wsize_12(D);
> iftmp.0_13 = (short unsigned int) _2;
> _18 = m_11 >= wsize_12(D);
> iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> *p_10 = iftmp.0_5;
>
> The above form doesn't hit any form we have supported in match.pd. Then I have one idea that to convert
>
> uint16 d, tmp;
> uint32 a, b, m;
>
> m = a - b;
> tmp = (uint16)m;
> d = a >= b ? tmp : 0;
>
> to
>
> d = (uint16)(.SAT_SUB (a, b));
>
> I am not very sure it is reasonable to make it work, it may have gimple assignment with convert similar as below (may require the help of vectorize_conversion?).
> Would like to get some hint from you before the next step, thanks a lot.
>
> patt_34 = .SAT_SUB (m_11, wsize_12(D));
> patt_35 = (vector([8,8]) short unsigned int) patt_34;
I am not sure if this is related to the above but we also miss:
```
uint32_t saturation_add(uint32_t a, uint32_t b)
{
const uint64_t tmp = (uint64_t)a + b;
if (tmp > UINT32_MAX)
{
return UINT32_MAX;
}
return tmp;
}
```
This comes from https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88603 . I
thought you might be interested in that form too.
Thanks,
Andrew
>
> Pan
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Friday, June 14, 2024 4:05 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
> Subject: Re: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
>
> On Wed, Jun 12, 2024 at 2:38 PM <pan2.li@intel.com> wrote:
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > After we support the scalar unsigned form 1 and 2, we would like
> > to introduce more forms include the branch and branchless. There
> > are forms 3-10 list as below:
> >
> > Form 3:
> > #define SAT_SUB_U_3(T) \
> > T sat_sub_u_3_##T (T x, T y) \
> > { \
> > return x > y ? x - y : 0; \
> > }
> >
> > Form 4:
> > #define SAT_SUB_U_4(T) \
> > T sat_sub_u_4_##T (T x, T y) \
> > { \
> > return x >= y ? x - y : 0; \
> > }
> >
> > Form 5:
> > #define SAT_SUB_U_5(T) \
> > T sat_sub_u_5_##T (T x, T y) \
> > { \
> > return x < y ? 0 : x - y; \
> > }
> >
> > Form 6:
> > #define SAT_SUB_U_6(T) \
> > T sat_sub_u_6_##T (T x, T y) \
> > { \
> > return x <= y ? 0 : x - y; \
> > }
> >
> > Form 7:
> > #define SAT_SUB_U_7(T) \
> > T sat_sub_u_7_##T (T x, T y) \
> > { \
> > T ret; \
> > T overflow = __builtin_sub_overflow (x, y, &ret); \
> > return ret & (T)(overflow - 1); \
> > }
> >
> > Form 8:
> > #define SAT_SUB_U_8(T) \
> > T sat_sub_u_8_##T (T x, T y) \
> > { \
> > T ret; \
> > T overflow = __builtin_sub_overflow (x, y, &ret); \
> > return ret & (T)-(!overflow); \
> > }
> >
> > Form 9:
> > #define SAT_SUB_U_9(T) \
> > T sat_sub_u_9_##T (T x, T y) \
> > { \
> > T ret; \
> > T overflow = __builtin_sub_overflow (x, y, &ret); \
> > return overflow ? 0 : ret; \
> > }
> >
> > Form 10:
> > #define SAT_SUB_U_10(T) \
> > T sat_sub_u_10_##T (T x, T y) \
> > { \
> > T ret; \
> > T overflow = __builtin_sub_overflow (x, y, &ret); \
> > return !overflow ? ret : 0; \
> > }
> >
> > Take form 10 as example:
> >
> > SAT_SUB_U_10(uint64_t);
> >
> > Before this patch:
> > uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
> > {
> > unsigned char _1;
> > unsigned char _2;
> > uint8_t _3;
> > __complex__ unsigned char _6;
> >
> > ;; basic block 2, loop depth 0
> > ;; pred: ENTRY
> > _6 = .SUB_OVERFLOW (x_4(D), y_5(D));
> > _2 = IMAGPART_EXPR <_6>;
> > if (_2 == 0)
> > goto <bb 3>; [50.00%]
> > else
> > goto <bb 4>; [50.00%]
> > ;; succ: 3
> > ;; 4
> >
> > ;; basic block 3, loop depth 0
> > ;; pred: 2
> > _1 = REALPART_EXPR <_6>;
> > ;; succ: 4
> >
> > ;; basic block 4, loop depth 0
> > ;; pred: 2
> > ;; 3
> > # _3 = PHI <0(2), _1(3)>
> > return _3;
> > ;; succ: EXIT
> >
> > }
> >
> > After this patch:
> > uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
> > {
> > uint8_t _3;
> >
> > ;; basic block 2, loop depth 0
> > ;; pred: ENTRY
> > _3 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
> > return _3;
> > ;; succ: EXIT
> >
> > }
> >
> > The below test suites are passed for this patch:
> > 1. The rv64gcv fully regression test with newlib.
> > 2. The rv64gcv build with glibc.
> > 3. The x86 bootstrap test.
> > 4. The x86 fully regression test.
> >
> > gcc/ChangeLog:
> >
> > * match.pd: Add more match for unsigned sat_sub.
> > * tree-ssa-math-opts.cc (match_unsigned_saturation_sub): Add new
> > func impl to match phi node for .SAT_SUB.
> > (math_opts_dom_walker::after_dom_children): Try match .SAT_SUB
> > for the phi node, MULT_EXPR, BIT_XOR_EXPR and BIT_AND_EXPR.
> >
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> > gcc/match.pd | 25 +++++++++++++++++++++++--
> > gcc/tree-ssa-math-opts.cc | 33 +++++++++++++++++++++++++++++++++
> > 2 files changed, 56 insertions(+), 2 deletions(-)
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 5cfe81e80b3..66e411b3359 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3140,14 +3140,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > /* Unsigned saturation sub, case 1 (branch with gt):
> > SAT_U_SUB = X > Y ? X - Y : 0 */
> > (match (unsigned_integer_sat_sub @0 @1)
> > - (cond (gt @0 @1) (minus @0 @1) integer_zerop)
> > + (cond^ (gt @0 @1) (minus @0 @1) integer_zerop)
> > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > && types_match (type, @0, @1))))
> >
> > /* Unsigned saturation sub, case 2 (branch with ge):
> > SAT_U_SUB = X >= Y ? X - Y : 0. */
> > (match (unsigned_integer_sat_sub @0 @1)
> > - (cond (ge @0 @1) (minus @0 @1) integer_zerop)
> > + (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
> > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > && types_match (type, @0, @1))))
> >
> > @@ -3165,6 +3165,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > && types_match (type, @0, @1))))
> >
> > +/* Unsigned saturation sub, case 5 (branchless bit_and with .SUB_OVERFLOW. */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (bit_and:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
> > + (plus:c (imagpart @2) integer_minus_onep))
>
> :c shouldn't be necessary on the plus
>
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > + && types_match (type, @0, @1))))
> > +
> > +/* Unsigned saturation sub, case 6 (branchless mult with .SUB_OVERFLOW. */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (mult:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
> > + (bit_xor:c (imagpart @2) integer_onep))
>
> or on the bit_xor
>
> OK with those changes.
>
> Richard.
>
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > + && types_match (type, @0, @1))))
> > +
> > +/* Unsigned saturation sub, case 7 (branch with .SUB_OVERFLOW. */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (cond^ (eq (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop)
> > + (realpart @2) integer_zerop)
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > + && types_match (type, @0, @1))))
> > +
> > /* x > y && x != XXX_MIN --> x > y
> > x > y && x == XXX_MIN --> false . */
> > (for eqne (eq ne)
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index fbb8e0ea306..05aa157611b 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4186,6 +4186,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
> > build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]);
> > }
> >
> > +/*
> > + * Try to match saturation unsigned sub.
> > + * <bb 2> [local count: 1073741824]:
> > + * if (x_2(D) > y_3(D))
> > + * goto <bb 3>; [50.00%]
> > + * else
> > + * goto <bb 4>; [50.00%]
> > + *
> > + * <bb 3> [local count: 536870912]:
> > + * _4 = x_2(D) - y_3(D);
> > + *
> > + * <bb 4> [local count: 1073741824]:
> > + * # _1 = PHI <0(2), _4(3)>
> > + * =>
> > + * <bb 4> [local count: 1073741824]:
> > + * _1 = .SAT_SUB (x_2(D), y_3(D)); */
> > +static void
> > +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
> > +{
> > + if (gimple_phi_num_args (phi) != 2)
> > + return;
> > +
> > + tree ops[2];
> > + tree phi_result = gimple_phi_result (phi);
> > +
> > + if (gimple_unsigned_integer_sat_sub (phi_result, ops, NULL))
> > + build_saturation_binary_arith_call (gsi, phi, IFN_SAT_SUB, phi_result,
> > + ops[0], ops[1]);
> > +}
> > +
> > /* Recognize for unsigned x
> > x = y - z;
> > if (x > y)
> > @@ -6104,6 +6134,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> > {
> > gimple_stmt_iterator gsi = gsi_start_bb (bb);
> > match_unsigned_saturation_add (&gsi, psi.phi ());
> > + match_unsigned_saturation_sub (&gsi, psi.phi ());
> > }
> >
> > for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
> > @@ -6129,6 +6160,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> > continue;
> > }
> > match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
> > + match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> > break;
> >
> > case PLUS_EXPR:
> > @@ -6167,6 +6199,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> > break;
> >
> > case COND_EXPR:
> > + case BIT_AND_EXPR:
> > match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> > break;
> >
> > --
> > 2.34.1
> >
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
2024-06-27 7:31 ` Andrew Pinski
@ 2024-06-27 7:54 ` Li, Pan2
0 siblings, 0 replies; 9+ messages in thread
From: Li, Pan2 @ 2024-06-27 7:54 UTC (permalink / raw)
To: Andrew Pinski
Cc: Richard Biener, gcc-patches, juzhe.zhong, kito.cheng,
jeffreyalaw, rdapp.gcc
Yes, you are right. The supported forms for now don't involve any widening ops.
Thus, below uint64_t form is failed to detect, will add it to my backlog.
uint32_t saturation_add(uint32_t a, uint32_t b)
{
const uint64_t tmp = (uint64_t)a + b;
if (tmp > UINT32_MAX)
{
return UINT32_MAX;
}
return tmp;
}
Pan
-----Original Message-----
From: Andrew Pinski <pinskia@gmail.com>
Sent: Thursday, June 27, 2024 3:32 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
On Wed, Jun 19, 2024 at 12:37 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> Hi Richard,
>
> Given almost all unsigned SAT_ADD/SAT_SUB patches are merged, I revisit the original code pattern aka zip benchmark.
> It may look like below:
>
> void test (uint16_t *x, uint16_t *y, unsigned wsize, unsigned count)
> {
> unsigned m = 0, n = count;
> register uint16_t *p;
>
> p = x;
>
> do {
> m = *--p;
>
> *p = (uint16_t)(m >= wsize ? m-wsize : 0); // There will be a conversion here.
> } while (--n);
> }
>
> And we can have 179 tree pass as below:
>
> <bb 3> [local count: 1073741824]:
> # n_3 = PHI <n_15(7), count_7(D)(15)>
> # p_4 = PHI <p_10(7), x_8(D)(15)>
> p_10 = p_4 + 18446744073709551614;
> _1 = *p_10;
> m_11 = (unsigned int) _1;
> _2 = m_11 - wsize_12(D);
> iftmp.0_13 = (short unsigned int) _2;
> _18 = m_11 >= wsize_12(D);
> iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> *p_10 = iftmp.0_5;
>
> The above form doesn't hit any form we have supported in match.pd. Then I have one idea that to convert
>
> uint16 d, tmp;
> uint32 a, b, m;
>
> m = a - b;
> tmp = (uint16)m;
> d = a >= b ? tmp : 0;
>
> to
>
> d = (uint16)(.SAT_SUB (a, b));
>
> I am not very sure it is reasonable to make it work, it may have gimple assignment with convert similar as below (may require the help of vectorize_conversion?).
> Would like to get some hint from you before the next step, thanks a lot.
>
> patt_34 = .SAT_SUB (m_11, wsize_12(D));
> patt_35 = (vector([8,8]) short unsigned int) patt_34;
I am not sure if this is related to the above but we also miss:
```
uint32_t saturation_add(uint32_t a, uint32_t b)
{
const uint64_t tmp = (uint64_t)a + b;
if (tmp > UINT32_MAX)
{
return UINT32_MAX;
}
return tmp;
}
```
This comes from https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88603 . I
thought you might be interested in that form too.
Thanks,
Andrew
>
> Pan
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Friday, June 14, 2024 4:05 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
> Subject: Re: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
>
> On Wed, Jun 12, 2024 at 2:38 PM <pan2.li@intel.com> wrote:
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > After we support the scalar unsigned form 1 and 2, we would like
> > to introduce more forms include the branch and branchless. There
> > are forms 3-10 list as below:
> >
> > Form 3:
> > #define SAT_SUB_U_3(T) \
> > T sat_sub_u_3_##T (T x, T y) \
> > { \
> > return x > y ? x - y : 0; \
> > }
> >
> > Form 4:
> > #define SAT_SUB_U_4(T) \
> > T sat_sub_u_4_##T (T x, T y) \
> > { \
> > return x >= y ? x - y : 0; \
> > }
> >
> > Form 5:
> > #define SAT_SUB_U_5(T) \
> > T sat_sub_u_5_##T (T x, T y) \
> > { \
> > return x < y ? 0 : x - y; \
> > }
> >
> > Form 6:
> > #define SAT_SUB_U_6(T) \
> > T sat_sub_u_6_##T (T x, T y) \
> > { \
> > return x <= y ? 0 : x - y; \
> > }
> >
> > Form 7:
> > #define SAT_SUB_U_7(T) \
> > T sat_sub_u_7_##T (T x, T y) \
> > { \
> > T ret; \
> > T overflow = __builtin_sub_overflow (x, y, &ret); \
> > return ret & (T)(overflow - 1); \
> > }
> >
> > Form 8:
> > #define SAT_SUB_U_8(T) \
> > T sat_sub_u_8_##T (T x, T y) \
> > { \
> > T ret; \
> > T overflow = __builtin_sub_overflow (x, y, &ret); \
> > return ret & (T)-(!overflow); \
> > }
> >
> > Form 9:
> > #define SAT_SUB_U_9(T) \
> > T sat_sub_u_9_##T (T x, T y) \
> > { \
> > T ret; \
> > T overflow = __builtin_sub_overflow (x, y, &ret); \
> > return overflow ? 0 : ret; \
> > }
> >
> > Form 10:
> > #define SAT_SUB_U_10(T) \
> > T sat_sub_u_10_##T (T x, T y) \
> > { \
> > T ret; \
> > T overflow = __builtin_sub_overflow (x, y, &ret); \
> > return !overflow ? ret : 0; \
> > }
> >
> > Take form 10 as example:
> >
> > SAT_SUB_U_10(uint64_t);
> >
> > Before this patch:
> > uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
> > {
> > unsigned char _1;
> > unsigned char _2;
> > uint8_t _3;
> > __complex__ unsigned char _6;
> >
> > ;; basic block 2, loop depth 0
> > ;; pred: ENTRY
> > _6 = .SUB_OVERFLOW (x_4(D), y_5(D));
> > _2 = IMAGPART_EXPR <_6>;
> > if (_2 == 0)
> > goto <bb 3>; [50.00%]
> > else
> > goto <bb 4>; [50.00%]
> > ;; succ: 3
> > ;; 4
> >
> > ;; basic block 3, loop depth 0
> > ;; pred: 2
> > _1 = REALPART_EXPR <_6>;
> > ;; succ: 4
> >
> > ;; basic block 4, loop depth 0
> > ;; pred: 2
> > ;; 3
> > # _3 = PHI <0(2), _1(3)>
> > return _3;
> > ;; succ: EXIT
> >
> > }
> >
> > After this patch:
> > uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
> > {
> > uint8_t _3;
> >
> > ;; basic block 2, loop depth 0
> > ;; pred: ENTRY
> > _3 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
> > return _3;
> > ;; succ: EXIT
> >
> > }
> >
> > The below test suites are passed for this patch:
> > 1. The rv64gcv fully regression test with newlib.
> > 2. The rv64gcv build with glibc.
> > 3. The x86 bootstrap test.
> > 4. The x86 fully regression test.
> >
> > gcc/ChangeLog:
> >
> > * match.pd: Add more match for unsigned sat_sub.
> > * tree-ssa-math-opts.cc (match_unsigned_saturation_sub): Add new
> > func impl to match phi node for .SAT_SUB.
> > (math_opts_dom_walker::after_dom_children): Try match .SAT_SUB
> > for the phi node, MULT_EXPR, BIT_XOR_EXPR and BIT_AND_EXPR.
> >
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> > gcc/match.pd | 25 +++++++++++++++++++++++--
> > gcc/tree-ssa-math-opts.cc | 33 +++++++++++++++++++++++++++++++++
> > 2 files changed, 56 insertions(+), 2 deletions(-)
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 5cfe81e80b3..66e411b3359 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3140,14 +3140,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > /* Unsigned saturation sub, case 1 (branch with gt):
> > SAT_U_SUB = X > Y ? X - Y : 0 */
> > (match (unsigned_integer_sat_sub @0 @1)
> > - (cond (gt @0 @1) (minus @0 @1) integer_zerop)
> > + (cond^ (gt @0 @1) (minus @0 @1) integer_zerop)
> > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > && types_match (type, @0, @1))))
> >
> > /* Unsigned saturation sub, case 2 (branch with ge):
> > SAT_U_SUB = X >= Y ? X - Y : 0. */
> > (match (unsigned_integer_sat_sub @0 @1)
> > - (cond (ge @0 @1) (minus @0 @1) integer_zerop)
> > + (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
> > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > && types_match (type, @0, @1))))
> >
> > @@ -3165,6 +3165,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > && types_match (type, @0, @1))))
> >
> > +/* Unsigned saturation sub, case 5 (branchless bit_and with .SUB_OVERFLOW. */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (bit_and:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
> > + (plus:c (imagpart @2) integer_minus_onep))
>
> :c shouldn't be necessary on the plus
>
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > + && types_match (type, @0, @1))))
> > +
> > +/* Unsigned saturation sub, case 6 (branchless mult with .SUB_OVERFLOW. */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (mult:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
> > + (bit_xor:c (imagpart @2) integer_onep))
>
> or on the bit_xor
>
> OK with those changes.
>
> Richard.
>
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > + && types_match (type, @0, @1))))
> > +
> > +/* Unsigned saturation sub, case 7 (branch with .SUB_OVERFLOW. */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (cond^ (eq (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop)
> > + (realpart @2) integer_zerop)
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > + && types_match (type, @0, @1))))
> > +
> > /* x > y && x != XXX_MIN --> x > y
> > x > y && x == XXX_MIN --> false . */
> > (for eqne (eq ne)
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index fbb8e0ea306..05aa157611b 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4186,6 +4186,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
> > build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]);
> > }
> >
> > +/*
> > + * Try to match saturation unsigned sub.
> > + * <bb 2> [local count: 1073741824]:
> > + * if (x_2(D) > y_3(D))
> > + * goto <bb 3>; [50.00%]
> > + * else
> > + * goto <bb 4>; [50.00%]
> > + *
> > + * <bb 3> [local count: 536870912]:
> > + * _4 = x_2(D) - y_3(D);
> > + *
> > + * <bb 4> [local count: 1073741824]:
> > + * # _1 = PHI <0(2), _4(3)>
> > + * =>
> > + * <bb 4> [local count: 1073741824]:
> > + * _1 = .SAT_SUB (x_2(D), y_3(D)); */
> > +static void
> > +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
> > +{
> > + if (gimple_phi_num_args (phi) != 2)
> > + return;
> > +
> > + tree ops[2];
> > + tree phi_result = gimple_phi_result (phi);
> > +
> > + if (gimple_unsigned_integer_sat_sub (phi_result, ops, NULL))
> > + build_saturation_binary_arith_call (gsi, phi, IFN_SAT_SUB, phi_result,
> > + ops[0], ops[1]);
> > +}
> > +
> > /* Recognize for unsigned x
> > x = y - z;
> > if (x > y)
> > @@ -6104,6 +6134,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> > {
> > gimple_stmt_iterator gsi = gsi_start_bb (bb);
> > match_unsigned_saturation_add (&gsi, psi.phi ());
> > + match_unsigned_saturation_sub (&gsi, psi.phi ());
> > }
> >
> > for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
> > @@ -6129,6 +6160,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> > continue;
> > }
> > match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
> > + match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> > break;
> >
> > case PLUS_EXPR:
> > @@ -6167,6 +6199,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> > break;
> >
> > case COND_EXPR:
> > + case BIT_AND_EXPR:
> > match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> > break;
> >
> > --
> > 2.34.1
> >
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2024-06-27 7:54 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-12 12:37 [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB pan2.li
2024-06-14 8:05 ` Richard Biener
2024-06-14 8:14 ` Li, Pan2
2024-06-14 14:06 ` Li, Pan2
2024-06-19 7:36 ` Li, Pan2
2024-06-19 8:00 ` Richard Biener
2024-06-19 13:47 ` Li, Pan2
2024-06-27 7:31 ` Andrew Pinski
2024-06-27 7:54 ` Li, Pan2
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).