public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v1] Ifcvt: Add cond tree reconcile for truncated .SAT_SUB
@ 2024-06-21  3:52 pan2.li
  2024-06-21  7:00 ` Richard Biener
                   ` (2 more replies)
  0 siblings, 3 replies; 27+ messages in thread
From: pan2.li @ 2024-06-21  3:52 UTC (permalink / raw)
  To: gcc-patches
  Cc: juzhe.zhong, kito.cheng, richard.guenther, jeffreyalaw,
	rdapp.gcc, Pan Li

From: Pan Li <pan2.li@intel.com>

The zip benchmark of coremark-pro have one SAT_SUB like pattern but
truncated as below:

void test (uint16_t *x, unsigned b, unsigned n)
{
  unsigned a = 0;
  register uint16_t *p = x;

  do {
    a = *--p;
    *p = (uint16_t)(a >= b ? a - b : 0); // Truncate the result of SAT_SUB
  } while (--n);
}

It will have gimple after ifcvt pass,  it cannot hit any pattern of
SAT_SUB and then cannot vectorize to SAT_SUB.

_2 = a_11 - b_12(D);
iftmp.0_13 = (short unsigned int) _2;
_18 = a_11 >= b_12(D);
iftmp.0_5 = _18 ? iftmp.0_13 : 0;

This patch would like to do some reconcile for above pattern to match
the SAT_SUB pattern.  Then the underlying vect pass is able to vectorize
the SAT_SUB.

_2 = a_11 - b_12(D);
_18 = a_11 >= b_12(D);
_pattmp = _18 ? _2 : 0; // .SAT_SUB pattern
iftmp.0_13 = (short unsigned int) _pattmp;
iftmp.0_5 = iftmp.0_13;

The below tests are running for this patch.
1. The rv64gcv fully regression tests.
2. The rv64gcv build with glibc.
3. The x86 bootstrap tests.
4. The x86 fully regression tests.

gcc/ChangeLog:

	* match.pd: Add new match for trunated unsigned sat_sub.
	* tree-if-conv.cc (gimple_truncated_unsigned_integer_sat_sub):
	New external decl from match.pd.
	(tree_if_cond_reconcile_unsigned_integer_sat_sub): New func impl
	to reconcile the truncated sat_sub pattern.
	(tree_if_cond_reconcile): New func impl to reconcile.
	(pass_if_conversion::execute): Try to reconcile after ifcvt.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/match.pd        |  9 +++++
 gcc/tree-if-conv.cc | 83 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 3d0689c9312..9617a5f9d5e 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3210,6 +3210,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* Unsigned saturation sub and then truncated, aka:
+   Truncated = X >= Y ? (Other Type) (X - Y) : 0.
+ */
+(match (truncated_unsigned_integer_sat_sub @0 @1)
+ (cond (ge @0 @1) (convert (minus @0 @1)) integer_zerop)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+      && types_match (@0, @1)
+      && tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0))))))
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
    x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 57992b6deca..535743130f2 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -3738,6 +3738,87 @@ bitfields_to_lower_p (class loop *loop,
   return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
 }
 
+extern bool gimple_truncated_unsigned_integer_sat_sub (tree, tree*,
+						       tree (*)(tree));
+
+/*
+ * Try to reconcile the stmt pattern as below to math the SAT_SUB
+ * in vectorization.  If and only if the related internal_fn has
+ * been implemented already.
+ *
+ * The reconcile will insert one new stmt named 'a' in below example,
+ * replace the stmt '4' by new added stmt 'b' as well.  Then the stmt
+ * pattern is able to hit the SAT_SUB pattern in the underlying pass.
+ *
+ * 1. _2 = a_11 - b_12(D);
+ * 2. iftmp.0_13 = (short unsigned int) _2;
+ * 3. _18 = a_11 >= b_12(D);
+ * 4. iftmp.0_5 = _18 ? iftmp.0_13 : 0;
+ * ==>
+ * 1. _2 = a_11 - b_12(D);
+ * 3. _18 = a_11 >= b_12(D);
+ * a. pattmp = _18 ? _2 : 0;                     // New insertion
+ * 2. iftmp.0_13 = (short unsigned int) _pattmp; // Move before
+ * b. iftmp.0_5 = iftmp.0_13;
+ *    == Replace ==> 4. iftmp.0_5 = _18 ? iftmp.0_13 : 0;
+ */
+static void
+tree_if_cond_reconcile_unsigned_integer_sat_sub (gimple_stmt_iterator *gsi,
+						 gassign *stmt)
+{
+  tree ops[2];
+  tree lhs = gimple_assign_lhs (stmt);
+  bool supported_p = direct_internal_fn_supported_p (IFN_SAT_SUB,
+						     TREE_TYPE (lhs),
+						     OPTIMIZE_FOR_BOTH);
+
+  if (supported_p && gimple_truncated_unsigned_integer_sat_sub (lhs, ops, NULL))
+    {
+      tree cond = gimple_assign_rhs1 (stmt); // aka _18
+      tree truncated = gimple_assign_rhs2 (stmt); // aka iftmp.0_13
+      gimple *stmt_2 = SSA_NAME_DEF_STMT (truncated);
+      tree minus = gimple_assign_rhs1 (stmt_2); // aka _2
+      tree raw_type = TREE_TYPE (minus);
+      tree zero = build_zero_cst (raw_type);
+      tree tmp = make_temp_ssa_name (raw_type, NULL, "sat_sub_tmp");
+
+      /* For stmt 'a' in above example  */
+      gimple *stmt_a = gimple_build_assign (tmp, COND_EXPR, cond, minus, zero);
+      gsi_insert_before (gsi, stmt_a, GSI_SAME_STMT);
+      update_stmt (stmt_a);
+
+      /* For stmt '2' in above example  */
+      gimple_stmt_iterator stmt_2_gsi = gsi_for_stmt (stmt_2);
+      gsi_move_before (&stmt_2_gsi, gsi, GSI_SAME_STMT);
+      gimple_assign_set_rhs1 (stmt_2, tmp);
+      update_stmt (stmt_2);
+
+      /* For stmt 'b' in above example  */
+      gimple *stmt_b = gimple_build_assign (lhs, NOP_EXPR, truncated);
+      gsi_replace (gsi, stmt_b, /* update_eh_info */ true);
+      update_stmt (stmt_b);
+    }
+}
+
+static void
+tree_if_cond_reconcile (function *fun)
+{
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, fun)
+    {
+      gimple_stmt_iterator gsi;
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	{
+	  gimple *stmt = gsi_stmt (gsi);
+
+	  if (is_gimple_assign (stmt))
+	    {
+	      gassign *assign = dyn_cast <gassign *> (stmt);
+	      tree_if_cond_reconcile_unsigned_integer_sat_sub (&gsi, assign);
+	    }
+	}
+    }
+}
 
 /* If-convert LOOP when it is legal.  For the moment this pass has no
    profitability analysis.  Returns non-zero todo flags when something
@@ -4063,6 +4144,8 @@ pass_if_conversion::execute (function *fun)
 	}
     }
 
+  tree_if_cond_reconcile (fun);
+
   return 0;
 }
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2024-06-28 15:06 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-21  3:52 [PATCH v1] Ifcvt: Add cond tree reconcile for truncated .SAT_SUB pan2.li
2024-06-21  7:00 ` Richard Biener
2024-06-21  8:50   ` Li, Pan2
2024-06-21  9:28     ` Richard Biener
2024-06-21 14:45       ` Li, Pan2
2024-06-22 13:19         ` Richard Biener
2024-06-23 12:57           ` Li, Pan2
2024-06-24 13:55 ` [PATCH v2] Vect: Support truncate after .SAT_SUB pattern in zip pan2.li
2024-06-24 19:59   ` Tamar Christina
2024-06-25  2:25     ` Li, Pan2
2024-06-25  4:00       ` Tamar Christina
2024-06-25  6:06         ` Li, Pan2
2024-06-25  6:11           ` Tamar Christina
2024-06-25  6:25             ` Li, Pan2
2024-06-26  3:12               ` Li, Pan2
2024-06-26 13:56   ` Richard Biener
2024-06-26 14:22     ` Li, Pan2
2024-06-27  6:48   ` Uros Bizjak
2024-06-27  7:00     ` Li, Pan2
2024-06-27  9:22       ` Uros Bizjak
2024-06-27  1:31 ` [PATCH v3] " pan2.li
2024-06-27  6:04   ` Richard Biener
2024-06-27  6:14     ` Li, Pan2
2024-06-27 14:45       ` Li, Pan2
2024-06-28  5:38         ` Richard Biener
2024-06-28 13:38           ` Tamar Christina
2024-06-28 15:06             ` Li, Pan2

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).