public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB
@ 2024-06-12 12:37 pan2.li
  2024-06-14  8:05 ` Richard Biener
  0 siblings, 1 reply; 9+ messages in thread
From: pan2.li @ 2024-06-12 12:37 UTC (permalink / raw)
  To: gcc-patches
  Cc: juzhe.zhong, kito.cheng, richard.guenther, jeffreyalaw,
	rdapp.gcc, Pan Li

From: Pan Li <pan2.li@intel.com>

After we support the scalar unsigned form 1 and 2,  we would like
to introduce more forms include the branch and branchless.  There
are forms 3-10 list as below:

Form 3:
  #define SAT_SUB_U_3(T) \
  T sat_sub_u_3_##T (T x, T y) \
  { \
    return x > y ? x - y : 0; \
  }

Form 4:
  #define SAT_SUB_U_4(T) \
  T sat_sub_u_4_##T (T x, T y) \
  { \
    return x >= y ? x - y : 0; \
  }

Form 5:
  #define SAT_SUB_U_5(T) \
  T sat_sub_u_5_##T (T x, T y) \
  { \
    return x < y ? 0 : x - y; \
  }

Form 6:
  #define SAT_SUB_U_6(T) \
  T sat_sub_u_6_##T (T x, T y) \
  { \
    return x <= y ? 0 : x - y; \
  }

Form 7:
  #define SAT_SUB_U_7(T) \
  T sat_sub_u_7_##T (T x, T y) \
  { \
    T ret; \
    T overflow = __builtin_sub_overflow (x, y, &ret); \
    return ret & (T)(overflow - 1); \
  }

Form 8:
  #define SAT_SUB_U_8(T) \
  T sat_sub_u_8_##T (T x, T y) \
  { \
    T ret; \
    T overflow = __builtin_sub_overflow (x, y, &ret); \
    return ret & (T)-(!overflow); \
  }

Form 9:
  #define SAT_SUB_U_9(T) \
  T sat_sub_u_9_##T (T x, T y) \
  { \
    T ret; \
    T overflow = __builtin_sub_overflow (x, y, &ret); \
    return overflow ? 0 : ret; \
  }

Form 10:
  #define SAT_SUB_U_10(T) \
  T sat_sub_u_10_##T (T x, T y) \
  { \
    T ret; \
    T overflow = __builtin_sub_overflow (x, y, &ret); \
    return !overflow ? ret : 0; \
  }

Take form 10 as example:

SAT_SUB_U_10(uint64_t);

Before this patch:
uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
{
  unsigned char _1;
  unsigned char _2;
  uint8_t _3;
  __complex__ unsigned char _6;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _6 = .SUB_OVERFLOW (x_4(D), y_5(D));
  _2 = IMAGPART_EXPR <_6>;
  if (_2 == 0)
    goto <bb 3>; [50.00%]
  else
    goto <bb 4>; [50.00%]
;;    succ:       3
;;                4

;;   basic block 3, loop depth 0
;;    pred:       2
  _1 = REALPART_EXPR <_6>;
;;    succ:       4

;;   basic block 4, loop depth 0
;;    pred:       2
;;                3
  # _3 = PHI <0(2), _1(3)>
  return _3;
;;    succ:       EXIT

}

After this patch:
uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
{
  uint8_t _3;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _3 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
  return _3;
;;    succ:       EXIT

}

The below test suites are passed for this patch:
1. The rv64gcv fully regression test with newlib.
2. The rv64gcv build with glibc.
3. The x86 bootstrap test.
4. The x86 fully regression test.

gcc/ChangeLog:

	* match.pd: Add more match for unsigned sat_sub.
	* tree-ssa-math-opts.cc (match_unsigned_saturation_sub): Add new
	func impl to match phi node for .SAT_SUB.
	(math_opts_dom_walker::after_dom_children): Try match .SAT_SUB
	for the phi node, MULT_EXPR, BIT_XOR_EXPR and BIT_AND_EXPR.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/match.pd              | 25 +++++++++++++++++++++++--
 gcc/tree-ssa-math-opts.cc | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 5cfe81e80b3..66e411b3359 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3140,14 +3140,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* Unsigned saturation sub, case 1 (branch with gt):
    SAT_U_SUB = X > Y ? X - Y : 0  */
 (match (unsigned_integer_sat_sub @0 @1)
- (cond (gt @0 @1) (minus @0 @1) integer_zerop)
+ (cond^ (gt @0 @1) (minus @0 @1) integer_zerop)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
 /* Unsigned saturation sub, case 2 (branch with ge):
    SAT_U_SUB = X >= Y ? X - Y : 0.  */
 (match (unsigned_integer_sat_sub @0 @1)
- (cond (ge @0 @1) (minus @0 @1) integer_zerop)
+ (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
@@ -3165,6 +3165,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* Unsigned saturation sub, case 5 (branchless bit_and with .SUB_OVERFLOW.  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (bit_and:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
+  (plus:c (imagpart @2) integer_minus_onep))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+      && types_match (type, @0, @1))))
+
+/* Unsigned saturation sub, case 6 (branchless mult with .SUB_OVERFLOW.  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (mult:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
+  (bit_xor:c (imagpart @2) integer_onep))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+      && types_match (type, @0, @1))))
+
+/* Unsigned saturation sub, case 7 (branch with .SUB_OVERFLOW.  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (cond^ (eq (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop)
+  (realpart @2) integer_zerop)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+      && types_match (type, @0, @1))))
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
    x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index fbb8e0ea306..05aa157611b 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4186,6 +4186,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
     build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]);
 }
 
+/*
+ * Try to match saturation unsigned sub.
+ *  <bb 2> [local count: 1073741824]:
+ *  if (x_2(D) > y_3(D))
+ *    goto <bb 3>; [50.00%]
+ *  else
+ *    goto <bb 4>; [50.00%]
+ *
+ *  <bb 3> [local count: 536870912]:
+ *  _4 = x_2(D) - y_3(D);
+ *
+ *  <bb 4> [local count: 1073741824]:
+ *  # _1 = PHI <0(2), _4(3)>
+ *  =>
+ *  <bb 4> [local count: 1073741824]:
+ *  _1 = .SAT_SUB (x_2(D), y_3(D));  */
+static void
+match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
+{
+  if (gimple_phi_num_args (phi) != 2)
+    return;
+
+  tree ops[2];
+  tree phi_result = gimple_phi_result (phi);
+
+  if (gimple_unsigned_integer_sat_sub (phi_result, ops, NULL))
+    build_saturation_binary_arith_call (gsi, phi, IFN_SAT_SUB, phi_result,
+					ops[0], ops[1]);
+}
+
 /* Recognize for unsigned x
    x = y - z;
    if (x > y)
@@ -6104,6 +6134,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
     {
       gimple_stmt_iterator gsi = gsi_start_bb (bb);
       match_unsigned_saturation_add (&gsi, psi.phi ());
+      match_unsigned_saturation_sub (&gsi, psi.phi ());
     }
 
   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
@@ -6129,6 +6160,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
 		  continue;
 		}
 	      match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
+	      match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
 	      break;
 
 	    case PLUS_EXPR:
@@ -6167,6 +6199,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
 	      break;
 
 	    case COND_EXPR:
+	    case BIT_AND_EXPR:
 	      match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
 	      break;
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-06-27  7:54 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-12 12:37 [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB pan2.li
2024-06-14  8:05 ` Richard Biener
2024-06-14  8:14   ` Li, Pan2
2024-06-14 14:06     ` Li, Pan2
2024-06-19  7:36   ` Li, Pan2
2024-06-19  8:00     ` Richard Biener
2024-06-19 13:47       ` Li, Pan2
2024-06-27  7:31     ` Andrew Pinski
2024-06-27  7:54       ` Li, Pan2

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).