* [PATCH v3] Match: Support more form for scalar unsigned SAT_ADD
@ 2024-05-27 6:29 pan2.li
2024-05-29 12:35 ` Richard Biener
0 siblings, 1 reply; 3+ messages in thread
From: pan2.li @ 2024-05-27 6:29 UTC (permalink / raw)
To: gcc-patches
Cc: juzhe.zhong, kito.cheng, tamar.christina, richard.guenther, Pan Li
From: Pan Li <pan2.li@intel.com>
After we support one gassign form of the unsigned .SAT_ADD, we
would like to support more forms including both the branch and
branchless. There are 5 other forms of .SAT_ADD, list as below:
Form 1:
#define SAT_ADD_U_1(T) \
T sat_add_u_1_##T(T x, T y) \
{ \
return (T)(x + y) >= x ? (x + y) : -1; \
}
Form 2:
#define SAT_ADD_U_2(T) \
T sat_add_u_2_##T(T x, T y) \
{ \
T ret; \
T overflow = __builtin_add_overflow (x, y, &ret); \
return (T)(-overflow) | ret; \
}
Form 3:
#define SAT_ADD_U_3(T) \
T sat_add_u_3_##T (T x, T y) \
{ \
T ret; \
return __builtin_add_overflow (x, y, &ret) ? -1 : ret; \
}
Form 4:
#define SAT_ADD_U_4(T) \
T sat_add_u_4_##T (T x, T y) \
{ \
T ret; \
return __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1; \
}
Form 5:
#define SAT_ADD_U_5(T) \
T sat_add_u_5_##T(T x, T y) \
{ \
return (T)(x + y) < x ? -1 : (x + y); \
}
Take the forms 3 of above as example:
uint64_t
sat_add (uint64_t x, uint64_t y)
{
uint64_t ret;
return __builtin_add_overflow (x, y, &ret) ? -1 : ret;
}
Before this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
long unsigned int _1;
long unsigned int _2;
uint64_t _3;
__complex__ long unsigned int _6;
;; basic block 2, loop depth 0
;; pred: ENTRY
_6 = .ADD_OVERFLOW (x_4(D), y_5(D));
_2 = IMAGPART_EXPR <_6>;
if (_2 != 0)
goto <bb 4>; [35.00%]
else
goto <bb 3>; [65.00%]
;; succ: 4
;; 3
;; basic block 3, loop depth 0
;; pred: 2
_1 = REALPART_EXPR <_6>;
;; succ: 4
;; basic block 4, loop depth 0
;; pred: 3
;; 2
# _3 = PHI <_1(3), 18446744073709551615(2)>
return _3;
;; succ: EXIT
}
After this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
long unsigned int _12;
;; basic block 2, loop depth 0
;; pred: ENTRY
_12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
return _12;
;; succ: EXIT
}
The below test suites are still running, will update it later.
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression test.
gcc/ChangeLog:
* genmatch.cc (dt_node::gen_kids): Add new arg of predicate id.
(allow_phi_predicate_p): New func impl to check the phi
predicate is allowed or not.
(dt_node::gen_kids_1): Add COND_EXPR gen for phi node if allowed.
(dt_operand::gen_phi_on_cond):
(write_predicate): Init the predicate id before gen_kids.
* match.pd: Add more forms of unsigned_integer_sat_add and
comments.
* tree-ssa-math-opts.cc (match_saturation_arith): Rename from.
(match_assign_saturation_arith): Rename to.
(match_phi_saturation_arith): New func impl to match phi.
(math_opts_dom_walker::after_dom_children): Add phi match for
echo bb.
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/genmatch.cc | 123 ++++++++++++++++++++++++++++++++++++--
gcc/match.pd | 43 ++++++++++++-
gcc/tree-ssa-math-opts.cc | 51 +++++++++++++++-
3 files changed, 210 insertions(+), 7 deletions(-)
diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index f1e0e7abe0c..816d2dafd23 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -1767,6 +1767,7 @@ public:
unsigned level;
dt_node *parent;
vec<dt_node *> kids;
+ const char *id;
/* Statistics. */
unsigned num_leafs;
@@ -1786,7 +1787,7 @@ public:
virtual void gen (FILE *, int, bool, int) {}
void gen_kids (FILE *, int, bool, int);
- void gen_kids_1 (FILE *, int, bool, int,
+ void gen_kids_1 (FILE *, const char *, int, bool, int,
const vec<dt_operand *> &, const vec<dt_operand *> &,
const vec<dt_operand *> &, const vec<dt_operand *> &,
const vec<dt_operand *> &, const vec<dt_node *> &);
@@ -1819,6 +1820,7 @@ public:
char *get_name (char *);
void gen_opname (char *, unsigned);
+ void gen_phi_on_cond (FILE *, int, bool, int);
};
/* Leaf node of the decision tree, used for DT_SIMPLIFY. */
@@ -3173,7 +3175,7 @@ dt_node::gen_kids (FILE *f, int indent, bool gimple, int depth)
for what we have collected sofar. */
fns.qsort (fns_cmp);
generic_fns.qsort (fns_cmp);
- gen_kids_1 (f, indent, gimple, depth, gimple_exprs, generic_exprs,
+ gen_kids_1 (f, id, indent, gimple, depth, gimple_exprs, generic_exprs,
fns, generic_fns, preds, others);
/* And output the true operand itself. */
kids[i]->gen (f, indent, gimple, depth);
@@ -3191,14 +3193,21 @@ dt_node::gen_kids (FILE *f, int indent, bool gimple, int depth)
/* Generate code for the remains. */
fns.qsort (fns_cmp);
generic_fns.qsort (fns_cmp);
- gen_kids_1 (f, indent, gimple, depth, gimple_exprs, generic_exprs,
+ gen_kids_1 (f, id, indent, gimple, depth, gimple_exprs, generic_exprs,
fns, generic_fns, preds, others);
}
+static bool
+allow_phi_predicate_p (const char *id)
+{
+ return id && strcmp (id, "unsigned_integer_sat_add") == 0;
+}
+
/* Generate matching code for the children of the decision tree node. */
void
-dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
+dt_node::gen_kids_1 (FILE *f, const char *id, int indent, bool gimple,
+ int depth,
const vec<dt_operand *> &gimple_exprs,
const vec<dt_operand *> &generic_exprs,
const vec<dt_operand *> &fns,
@@ -3251,6 +3260,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
depth);
indent += 4;
fprintf_indent (f, indent, "{\n");
+ bool cond_expr_p = false;
for (unsigned i = 0; i < exprs_len; ++i)
{
expr *e = as_a <expr *> (gimple_exprs[i]->op);
@@ -3262,6 +3272,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
else
{
id_base *op = e->operation;
+ cond_expr_p |= *op == COND_EXPR;
if (*op == CONVERT_EXPR || *op == NOP_EXPR)
fprintf_indent (f, indent, "CASE_CONVERT:\n");
else
@@ -3275,6 +3286,27 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
fprintf_indent (f, indent, "default:;\n");
fprintf_indent (f, indent, "}\n");
indent -= 4;
+
+ if (cond_expr_p && allow_phi_predicate_p (id))
+ {
+ fprintf_indent (f, indent,
+ "else if (gphi *_a%d = dyn_cast <gphi *> (_d%d))\n",
+ depth, depth);
+ indent += 2;
+ fprintf_indent (f, indent, "{\n");
+ indent += 2;
+
+ for (unsigned i = 0; i < exprs_len; i++)
+ {
+ expr *e = as_a <expr *> (gimple_exprs[i]->op);
+ if (*e->operation == COND_EXPR)
+ gimple_exprs[i]->gen_phi_on_cond (f, indent, true, depth);
+ }
+
+ indent -= 2;
+ fprintf_indent (f, indent, "}\n");
+ indent -= 2;
+ }
}
if (fns_len)
@@ -3483,6 +3515,87 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int depth)
}
}
+/* Generate matching code for the phi when meet COND_EXPR. */
+
+void
+dt_operand::gen_phi_on_cond (FILE *f, int indent, bool gimple, int depth)
+{
+ fprintf_indent (f, indent,
+ "basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
+
+ fprintf_indent (f, indent,
+ "if (EDGE_COUNT (_b%d->preds) == 2 && gimple_phi_num_args (_a%d) == 2)\n",
+ depth, depth);
+
+ indent += 2;
+ fprintf_indent (f, indent, "{\n");
+ indent += 2;
+
+ fprintf_indent (f, indent,
+ "basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth);
+ fprintf_indent (f, indent,
+ "basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth);
+ fprintf_indent (f, indent,
+ "basic_block _db_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_%d)) ? "
+ "_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth);
+ fprintf_indent (f, indent,
+ "basic_block _other_db_%d = safe_dyn_cast <gcond *> "
+ "(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n",
+ depth, depth, depth, depth);
+
+ fprintf_indent (f, indent,
+ "gcond *_ct_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_%d));\n ",
+ depth, depth);
+ fprintf_indent (f, indent, "if (_ct_%d "
+ "&& EDGE_COUNT (_other_db_%d->preds) == 1 "
+ "&& EDGE_COUNT (_other_db_%d->succs) == 1 "
+ "&& EDGE_SUCC (_other_db_%d, 0)->dest == _b%d)\n",
+ depth, depth, depth, depth, depth);
+
+ indent += 2;
+ fprintf_indent (f, indent, "{\n");
+ indent += 2;
+
+ fprintf_indent (f, indent,
+ "tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth);
+ fprintf_indent (f, indent,
+ "tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth);
+
+ char opname_0[20];
+ char opname_1[20];
+ char opname_2[20];
+ gen_opname (opname_0, 0);
+
+ fprintf_indent (f, indent,
+ "tree %s = build2 (gimple_cond_code (_ct_%d), "
+ "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n",
+ opname_0, depth, depth, depth);
+
+ fprintf_indent (f, indent,
+ "bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags "
+ " & EDGE_TRUE_VALUE;\n", depth, depth);
+
+ gen_opname (opname_1, 1);
+ fprintf_indent (f, indent,
+ "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 0 : 1);\n",
+ opname_1, depth, depth);
+
+ gen_opname (opname_2, 2);
+ fprintf_indent (f, indent,
+ "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 1 : 0);\n",
+ opname_2, depth, depth);
+
+ gen_kids (f, indent, gimple, depth);
+
+ indent -= 2;
+ fprintf_indent (f, indent, "}\n");
+ indent -= 2;
+
+ indent -= 2;
+ fprintf_indent (f, indent, "}\n");
+ indent -= 2;
+}
+
/* Emit a logging call to the debug file to the file F, with the INDENT from
either the RESULT location or the S's match location if RESULT is null. */
static void
@@ -4263,6 +4376,8 @@ write_predicate (FILE *f, predicate_id *p, decision_tree &dt, bool gimple)
if (!gimple)
fprintf_indent (f, 2, "if (TREE_SIDE_EFFECTS (t)) return false;\n");
+
+ dt.root->id = p->id;
dt.root->gen_kids (f, 2, gimple, 0);
fprintf_indent (f, 2, "return false;\n"
diff --git a/gcc/match.pd b/gcc/match.pd
index 024e3350465..ddbdb7f7638 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3046,31 +3046,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
/* Unsigned Saturation Add */
+/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
+ SAT_ADD = (X + Y) | -((X + Y) < X) */
(match (usadd_left_part_1 @0 @1)
(plus:c @0 @1)
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
+/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
+ SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
(match (usadd_left_part_2 @0 @1)
(realpart (IFN_ADD_OVERFLOW:c @0 @1))
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
+/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
+ SAT_ADD = (X + Y) | -((type)(X + Y) < X) */
(match (usadd_right_part_1 @0 @1)
(negate (convert (lt (plus:c @0 @1) @0)))
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
+/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
+ SAT_ADD = (X + Y) | -(X > (X + Y)) */
(match (usadd_right_part_1 @0 @1)
(negate (convert (gt @0 (plus:c @0 @1))))
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
+/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
+ SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
(match (usadd_right_part_2 @0 @1)
(negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
+/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
+ SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> */
+(match (usadd_right_part_2 @0 @1)
+ (negate (imagpart (IFN_ADD_OVERFLOW:c @0 @1)))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+ && types_match (type, @0, @1))))
+
/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
because the sub part of left_part_2 cannot work with right_part_1.
For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
@@ -3082,10 +3099,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(match (unsigned_integer_sat_add @0 @1)
(bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
-/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */
+/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW):
+ SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> or
+ SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
(match (unsigned_integer_sat_add @0 @1)
(bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
+/* Unsigned saturation add, case 3 (branch with ge):
+ SAT_U_ADD = (X + Y) >= x ? (X + Y) : -1. */
+(match (unsigned_integer_sat_add @0 @1)
+ (cond (ge (usadd_left_part_1@2 @0 @1) @0) @2 integer_minus_onep))
+
+/* Unsigned saturation add, case 4 (branch with lt):
+ SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). */
+(match (unsigned_integer_sat_add @0 @1)
+ (cond (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2))
+
+/* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW):
+ SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1. */
+(match (unsigned_integer_sat_add @0 @1)
+ (cond (eq (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
+ (usadd_left_part_2 @0 @1) integer_minus_onep))
+
+/* Unsigned saturation add, case 6 (branch with ne .ADD_OVERFLOW):
+ SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> != 0 ? -1 : .ADD_OVERFLOW. */
+(match (unsigned_integer_sat_add @0 @1)
+ (cond (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
+ integer_minus_onep (usadd_left_part_2 @0 @1)))
+
/* x > y && x != XXX_MIN --> x > y
x > y && x == XXX_MIN --> false . */
(for eqne (eq ne)
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 62da1c5ee08..8624d414347 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4099,7 +4099,7 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
* =>
* _12 = .SAT_ADD (_4, _6); */
static void
-match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
+match_assign_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
{
gcall *call = NULL;
@@ -4116,6 +4116,47 @@ match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
}
}
+/*
+ * Try to match saturation arith pattern(s).
+ * From:
+ * <bb 2> :
+ * _1 = x_3(D) + y_4(D);
+ * if (_1 >= x_3(D))
+ * goto <bb 3>; [INV]
+ * else
+ * goto <bb 4>; [INV]
+ *
+ * <bb 3> :
+ *
+ * <bb 4> :
+ * # _2 = PHI <255(2), _1(3)>
+ *
+ * To:
+ * <bb 4> [local count: 1073741824]:
+ * _2 = .SAT_ADD (x_4(D), y_5(D)); */
+
+static void
+match_phi_saturation_arith (gimple_stmt_iterator *gsi, gphi *phi)
+{
+ if (gimple_phi_num_args (phi) != 2)
+ return;
+
+ tree ops[2];
+ tree phi_result = gimple_phi_result (phi);
+
+ if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
+ && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (phi_result),
+ OPTIMIZE_FOR_BOTH))
+ {
+ gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
+ gimple_call_set_lhs (call, phi_result);
+ gsi_insert_before (gsi, call, GSI_SAME_STMT);
+
+ gimple_stmt_iterator psi = gsi_for_stmt (phi);
+ remove_phi_node (&psi, false);
+ }
+}
+
/* Recognize for unsigned x
x = y - z;
if (x > y)
@@ -6029,6 +6070,12 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
+ for (gphi_iterator psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
+ {
+ gimple_stmt_iterator gsi = gsi_last_bb (bb);
+ match_phi_saturation_arith (&gsi, psi.phi ());
+ }
+
for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
{
gimple *stmt = gsi_stmt (gsi);
@@ -6078,7 +6125,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
break;
case BIT_IOR_EXPR:
- match_saturation_arith (&gsi, as_a<gassign *> (stmt));
+ match_assign_saturation_arith (&gsi, as_a<gassign *> (stmt));
/* fall-through */
case BIT_XOR_EXPR:
match_uaddc_usubc (&gsi, stmt, code);
--
2.34.1
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v3] Match: Support more form for scalar unsigned SAT_ADD
2024-05-27 6:29 [PATCH v3] Match: Support more form for scalar unsigned SAT_ADD pan2.li
@ 2024-05-29 12:35 ` Richard Biener
2024-05-29 14:00 ` Li, Pan2
0 siblings, 1 reply; 3+ messages in thread
From: Richard Biener @ 2024-05-29 12:35 UTC (permalink / raw)
To: pan2.li; +Cc: gcc-patches, juzhe.zhong, kito.cheng, tamar.christina
On Mon, May 27, 2024 at 8:29 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> After we support one gassign form of the unsigned .SAT_ADD, we
> would like to support more forms including both the branch and
> branchless. There are 5 other forms of .SAT_ADD, list as below:
>
> Form 1:
> #define SAT_ADD_U_1(T) \
> T sat_add_u_1_##T(T x, T y) \
> { \
> return (T)(x + y) >= x ? (x + y) : -1; \
> }
>
> Form 2:
> #define SAT_ADD_U_2(T) \
> T sat_add_u_2_##T(T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_add_overflow (x, y, &ret); \
> return (T)(-overflow) | ret; \
> }
>
> Form 3:
> #define SAT_ADD_U_3(T) \
> T sat_add_u_3_##T (T x, T y) \
> { \
> T ret; \
> return __builtin_add_overflow (x, y, &ret) ? -1 : ret; \
> }
>
> Form 4:
> #define SAT_ADD_U_4(T) \
> T sat_add_u_4_##T (T x, T y) \
> { \
> T ret; \
> return __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1; \
> }
>
> Form 5:
> #define SAT_ADD_U_5(T) \
> T sat_add_u_5_##T(T x, T y) \
> { \
> return (T)(x + y) < x ? -1 : (x + y); \
> }
>
> Take the forms 3 of above as example:
>
> uint64_t
> sat_add (uint64_t x, uint64_t y)
> {
> uint64_t ret;
> return __builtin_add_overflow (x, y, &ret) ? -1 : ret;
> }
>
> Before this patch:
> uint64_t sat_add (uint64_t x, uint64_t y)
> {
> long unsigned int _1;
> long unsigned int _2;
> uint64_t _3;
> __complex__ long unsigned int _6;
>
> ;; basic block 2, loop depth 0
> ;; pred: ENTRY
> _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
> _2 = IMAGPART_EXPR <_6>;
> if (_2 != 0)
> goto <bb 4>; [35.00%]
> else
> goto <bb 3>; [65.00%]
> ;; succ: 4
> ;; 3
>
> ;; basic block 3, loop depth 0
> ;; pred: 2
> _1 = REALPART_EXPR <_6>;
> ;; succ: 4
>
> ;; basic block 4, loop depth 0
> ;; pred: 3
> ;; 2
> # _3 = PHI <_1(3), 18446744073709551615(2)>
> return _3;
> ;; succ: EXIT
> }
>
> After this patch:
> uint64_t sat_add (uint64_t x, uint64_t y)
> {
> long unsigned int _12;
>
> ;; basic block 2, loop depth 0
> ;; pred: ENTRY
> _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
> return _12;
> ;; succ: EXIT
> }
>
> The below test suites are still running, will update it later.
> * The x86 bootstrap test.
> * The x86 fully regression test.
> * The riscv fully regression test.
>
> gcc/ChangeLog:
>
> * genmatch.cc (dt_node::gen_kids): Add new arg of predicate id.
> (allow_phi_predicate_p): New func impl to check the phi
> predicate is allowed or not.
> (dt_node::gen_kids_1): Add COND_EXPR gen for phi node if allowed.
> (dt_operand::gen_phi_on_cond):
> (write_predicate): Init the predicate id before gen_kids.
> * match.pd: Add more forms of unsigned_integer_sat_add and
> comments.
> * tree-ssa-math-opts.cc (match_saturation_arith): Rename from.
> (match_assign_saturation_arith): Rename to.
> (match_phi_saturation_arith): New func impl to match phi.
> (math_opts_dom_walker::after_dom_children): Add phi match for
> echo bb.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
> gcc/genmatch.cc | 123 ++++++++++++++++++++++++++++++++++++--
> gcc/match.pd | 43 ++++++++++++-
> gcc/tree-ssa-math-opts.cc | 51 +++++++++++++++-
> 3 files changed, 210 insertions(+), 7 deletions(-)
>
> diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
> index f1e0e7abe0c..816d2dafd23 100644
> --- a/gcc/genmatch.cc
> +++ b/gcc/genmatch.cc
> @@ -1767,6 +1767,7 @@ public:
> unsigned level;
> dt_node *parent;
> vec<dt_node *> kids;
> + const char *id;
>
> /* Statistics. */
> unsigned num_leafs;
> @@ -1786,7 +1787,7 @@ public:
> virtual void gen (FILE *, int, bool, int) {}
>
> void gen_kids (FILE *, int, bool, int);
> - void gen_kids_1 (FILE *, int, bool, int,
> + void gen_kids_1 (FILE *, const char *, int, bool, int,
> const vec<dt_operand *> &, const vec<dt_operand *> &,
> const vec<dt_operand *> &, const vec<dt_operand *> &,
> const vec<dt_operand *> &, const vec<dt_node *> &);
> @@ -1819,6 +1820,7 @@ public:
>
> char *get_name (char *);
> void gen_opname (char *, unsigned);
> + void gen_phi_on_cond (FILE *, int, bool, int);
> };
>
> /* Leaf node of the decision tree, used for DT_SIMPLIFY. */
> @@ -3173,7 +3175,7 @@ dt_node::gen_kids (FILE *f, int indent, bool gimple, int depth)
> for what we have collected sofar. */
> fns.qsort (fns_cmp);
> generic_fns.qsort (fns_cmp);
> - gen_kids_1 (f, indent, gimple, depth, gimple_exprs, generic_exprs,
> + gen_kids_1 (f, id, indent, gimple, depth, gimple_exprs, generic_exprs,
> fns, generic_fns, preds, others);
> /* And output the true operand itself. */
> kids[i]->gen (f, indent, gimple, depth);
> @@ -3191,14 +3193,21 @@ dt_node::gen_kids (FILE *f, int indent, bool gimple, int depth)
> /* Generate code for the remains. */
> fns.qsort (fns_cmp);
> generic_fns.qsort (fns_cmp);
> - gen_kids_1 (f, indent, gimple, depth, gimple_exprs, generic_exprs,
> + gen_kids_1 (f, id, indent, gimple, depth, gimple_exprs, generic_exprs,
> fns, generic_fns, preds, others);
> }
>
> +static bool
> +allow_phi_predicate_p (const char *id)
> +{
> + return id && strcmp (id, "unsigned_integer_sat_add") == 0;
Of course that's not OK. I suggest to either check ...
> +}
> +
> /* Generate matching code for the children of the decision tree node. */
>
> void
> -dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
> +dt_node::gen_kids_1 (FILE *f, const char *id, int indent, bool gimple,
> + int depth,
> const vec<dt_operand *> &gimple_exprs,
> const vec<dt_operand *> &generic_exprs,
> const vec<dt_operand *> &fns,
> @@ -3251,6 +3260,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
> depth);
> indent += 4;
> fprintf_indent (f, indent, "{\n");
> + bool cond_expr_p = false;
> for (unsigned i = 0; i < exprs_len; ++i)
> {
> expr *e = as_a <expr *> (gimple_exprs[i]->op);
> @@ -3262,6 +3272,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
> else
> {
> id_base *op = e->operation;
> + cond_expr_p |= *op == COND_EXPR;
> if (*op == CONVERT_EXPR || *op == NOP_EXPR)
> fprintf_indent (f, indent, "CASE_CONVERT:\n");
> else
> @@ -3275,6 +3286,27 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
> fprintf_indent (f, indent, "default:;\n");
> fprintf_indent (f, indent, "}\n");
> indent -= 4;
> +
> + if (cond_expr_p && allow_phi_predicate_p (id))
... this->parent == nullptr restricting PHI handling to the outermost
COND_EXPR, which
is probably a good idea (but there's no way to see we're in a match or
in a simplify).
Or even better, either mark the expression node with a new flag (make
sure to not
merge cond with/without the flag during decision tree insert), add a new
operation code like (cond_or_phi ...) and either handle or lower it,
with lowering
introducing yet another operation, (phi ..). From all of the above simply doing
this->parent == nullptr and adding a global flag "writing_predicate"
to make sure
to only cover (match ...) is the simplest. I somewhat dislike adding
fake operation
codes so a flag, like '^' (cond^ ...), would be my second choice where I'd limit
that to (match and the outermost cond within that unless we see need for more
cases. There are a few existing (match (cond ..)) patterns so the
flag looks like
the safest option - not all users might expect PHIs.
> + {
> + fprintf_indent (f, indent,
> + "else if (gphi *_a%d = dyn_cast <gphi *> (_d%d))\n",
> + depth, depth);
> + indent += 2;
> + fprintf_indent (f, indent, "{\n");
> + indent += 2;
> +
> + for (unsigned i = 0; i < exprs_len; i++)
> + {
> + expr *e = as_a <expr *> (gimple_exprs[i]->op);
> + if (*e->operation == COND_EXPR)
> + gimple_exprs[i]->gen_phi_on_cond (f, indent, true, depth);
> + }
> +
> + indent -= 2;
> + fprintf_indent (f, indent, "}\n");
> + indent -= 2;
> + }
> }
>
> if (fns_len)
> @@ -3483,6 +3515,87 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int depth)
> }
> }
>
> +/* Generate matching code for the phi when meet COND_EXPR. */
> +
> +void
> +dt_operand::gen_phi_on_cond (FILE *f, int indent, bool gimple, int depth)
Note this is implicitly 'gimple', no need to pass that flag.
> +{
> + fprintf_indent (f, indent,
> + "basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
> +
> + fprintf_indent (f, indent,
> + "if (EDGE_COUNT (_b%d->preds) == 2 && gimple_phi_num_args (_a%d) == 2)\n",
> + depth, depth);
Either is redundant - the number of preds always matches the number of PHI args.
> +
> + indent += 2;
> + fprintf_indent (f, indent, "{\n");
> + indent += 2;
> +
> + fprintf_indent (f, indent,
> + "basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth);
> + fprintf_indent (f, indent,
> + "basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth);
> + fprintf_indent (f, indent,
> + "basic_block _db_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_%d)) ? "
> + "_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth);
> + fprintf_indent (f, indent,
> + "basic_block _other_db_%d = safe_dyn_cast <gcond *> "
> + "(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n",
> + depth, depth, depth, depth);
> +
> + fprintf_indent (f, indent,
> + "gcond *_ct_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_%d));\n ",
> + depth, depth);
> + fprintf_indent (f, indent, "if (_ct_%d "
> + "&& EDGE_COUNT (_other_db_%d->preds) == 1 "
> + "&& EDGE_COUNT (_other_db_%d->succs) == 1 "
> + "&& EDGE_SUCC (_other_db_%d, 0)->dest == _b%d)\n",
> + depth, depth, depth, depth, depth);
> +
> + indent += 2;
> + fprintf_indent (f, indent, "{\n");
> + indent += 2;
I'm quoting generated code here:
else if (gphi *_a1 = dyn_cast <gphi *> (_d1))
{
basic_block _b1 = gimple_bb (_a1);
if (EDGE_COUNT (_b1->preds) == 2 && gimple_phi_num_args
(_a1) == 2)
{
basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
basic_block _db_1 = safe_dyn_cast <gcond *>
(*gsi_last_bb (_pb_0_1)) ? _pb_0_1 : _pb_1_1;
basic_block _other_db_1 = safe_dyn_cast <gcond *>
(*gsi_last_bb (_pb_0_1)) ? _pb_1_1 : _pb_0_1;
_other_db_1 = _db_1 == _pb_0_1 : _pb_1_1 : _pb_0_1;
gcond *_ct_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_1));
if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1 &&
EDGE_COUNT (_other_db_1->succs) == 1 && EDGE_SUCC (_other_db_1,
0)->dest == _b1)
new lines with indenting && EDGE_COUNT checks please.
I think EDGE_SUCC (_other_db_1, 0)->dest == _b1 is pointless. Instead
you want to check EDGE_PRED (_other_db_1, 0)->src == _db_1?
{
tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
tree _p0 = build2 (gimple_cond_code (_ct_1),
boolean_type_node, _cond_lhs_1, _cond_rhs_1);
bool _arg_0_is_true_1 = gimple_phi_arg_edge
(_a1, 0)->flags & EDGE_TRUE_VALUE;
Hmm, you don't know this PHI arg edge comes from the cond block, it could come
from the middle-bb.
Otherwise I think this will work, of course it depends on us handling
GENERIC conditions in COND_EXPRs. Not building the _p0 GENERIC tree
would be nice but that's quite difficult to pull off here (impossible even).
Overall I like it. Can you please try making the '^' flag work
instead (also document
it in doc/match-and-simplify.texi)?
Thanks for working on this.
Richard.
tree _p1 = gimple_phi_arg_def (_a1,
_arg_0_is_true_1 ? 0 : 1);
tree _p2 = gimple_phi_arg_def (_a1,
_arg_0_is_true_1 ? 1 : 0);
> + fprintf_indent (f, indent,
> + "tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth);
> + fprintf_indent (f, indent,
> + "tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth);
> +
> + char opname_0[20];
> + char opname_1[20];
> + char opname_2[20];
> + gen_opname (opname_0, 0);
> +
> + fprintf_indent (f, indent,
> + "tree %s = build2 (gimple_cond_code (_ct_%d), "
> + "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n",
> + opname_0, depth, depth, depth);
> +
> + fprintf_indent (f, indent,
> + "bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags "
> + " & EDGE_TRUE_VALUE;\n", depth, depth);
> +
> + gen_opname (opname_1, 1);
> + fprintf_indent (f, indent,
> + "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 0 : 1);\n",
> + opname_1, depth, depth);
> +
> + gen_opname (opname_2, 2);
> + fprintf_indent (f, indent,
> + "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 1 : 0);\n",
> + opname_2, depth, depth);
> +
> + gen_kids (f, indent, gimple, depth);
> +
> + indent -= 2;
> + fprintf_indent (f, indent, "}\n");
> + indent -= 2;
> +
> + indent -= 2;
> + fprintf_indent (f, indent, "}\n");
> + indent -= 2;
> +}
> +
> /* Emit a logging call to the debug file to the file F, with the INDENT from
> either the RESULT location or the S's match location if RESULT is null. */
> static void
> @@ -4263,6 +4376,8 @@ write_predicate (FILE *f, predicate_id *p, decision_tree &dt, bool gimple)
>
> if (!gimple)
> fprintf_indent (f, 2, "if (TREE_SIDE_EFFECTS (t)) return false;\n");
> +
> + dt.root->id = p->id;
> dt.root->gen_kids (f, 2, gimple, 0);
>
> fprintf_indent (f, 2, "return false;\n"
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 024e3350465..ddbdb7f7638 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3046,31 +3046,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
>
> /* Unsigned Saturation Add */
> +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> + SAT_ADD = (X + Y) | -((X + Y) < X) */
> (match (usadd_left_part_1 @0 @1)
> (plus:c @0 @1)
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
> (match (usadd_left_part_2 @0 @1)
> (realpart (IFN_ADD_OVERFLOW:c @0 @1))
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> + SAT_ADD = (X + Y) | -((type)(X + Y) < X) */
> (match (usadd_right_part_1 @0 @1)
> (negate (convert (lt (plus:c @0 @1) @0)))
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> + SAT_ADD = (X + Y) | -(X > (X + Y)) */
> (match (usadd_right_part_1 @0 @1)
> (negate (convert (gt @0 (plus:c @0 @1))))
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
> (match (usadd_right_part_2 @0 @1)
> (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> */
> +(match (usadd_right_part_2 @0 @1)
> + (negate (imagpart (IFN_ADD_OVERFLOW:c @0 @1)))
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> + && types_match (type, @0, @1))))
> +
> /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
> because the sub part of left_part_2 cannot work with right_part_1.
> For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
> @@ -3082,10 +3099,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (match (unsigned_integer_sat_add @0 @1)
> (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
>
> -/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */
> +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW):
> + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> or
> + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
> (match (unsigned_integer_sat_add @0 @1)
> (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
>
> +/* Unsigned saturation add, case 3 (branch with ge):
> + SAT_U_ADD = (X + Y) >= x ? (X + Y) : -1. */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond (ge (usadd_left_part_1@2 @0 @1) @0) @2 integer_minus_onep))
> +
> +/* Unsigned saturation add, case 4 (branch with lt):
> + SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2))
> +
> +/* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW):
> + SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1. */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond (eq (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
> + (usadd_left_part_2 @0 @1) integer_minus_onep))
> +
> +/* Unsigned saturation add, case 6 (branch with ne .ADD_OVERFLOW):
> + SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> != 0 ? -1 : .ADD_OVERFLOW. */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
> + integer_minus_onep (usadd_left_part_2 @0 @1)))
> +
> /* x > y && x != XXX_MIN --> x > y
> x > y && x == XXX_MIN --> false . */
> (for eqne (eq ne)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 62da1c5ee08..8624d414347 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4099,7 +4099,7 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> * =>
> * _12 = .SAT_ADD (_4, _6); */
> static void
> -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> +match_assign_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> {
> gcall *call = NULL;
>
> @@ -4116,6 +4116,47 @@ match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> }
> }
>
> +/*
> + * Try to match saturation arith pattern(s).
> + * From:
> + * <bb 2> :
> + * _1 = x_3(D) + y_4(D);
> + * if (_1 >= x_3(D))
> + * goto <bb 3>; [INV]
> + * else
> + * goto <bb 4>; [INV]
> + *
> + * <bb 3> :
> + *
> + * <bb 4> :
> + * # _2 = PHI <255(2), _1(3)>
> + *
> + * To:
> + * <bb 4> [local count: 1073741824]:
> + * _2 = .SAT_ADD (x_4(D), y_5(D)); */
> +
> +static void
> +match_phi_saturation_arith (gimple_stmt_iterator *gsi, gphi *phi)
> +{
> + if (gimple_phi_num_args (phi) != 2)
> + return;
> +
> + tree ops[2];
> + tree phi_result = gimple_phi_result (phi);
> +
> + if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> + && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (phi_result),
> + OPTIMIZE_FOR_BOTH))
> + {
> + gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> + gimple_call_set_lhs (call, phi_result);
> + gsi_insert_before (gsi, call, GSI_SAME_STMT);
> +
> + gimple_stmt_iterator psi = gsi_for_stmt (phi);
> + remove_phi_node (&psi, false);
> + }
> +}
> +
> /* Recognize for unsigned x
> x = y - z;
> if (x > y)
> @@ -6029,6 +6070,12 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>
> fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
>
> + for (gphi_iterator psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
> + {
> + gimple_stmt_iterator gsi = gsi_last_bb (bb);
> + match_phi_saturation_arith (&gsi, psi.phi ());
> + }
> +
> for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
> {
> gimple *stmt = gsi_stmt (gsi);
> @@ -6078,7 +6125,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> break;
>
> case BIT_IOR_EXPR:
> - match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> + match_assign_saturation_arith (&gsi, as_a<gassign *> (stmt));
> /* fall-through */
> case BIT_XOR_EXPR:
> match_uaddc_usubc (&gsi, stmt, code);
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 3+ messages in thread
* RE: [PATCH v3] Match: Support more form for scalar unsigned SAT_ADD
2024-05-29 12:35 ` Richard Biener
@ 2024-05-29 14:00 ` Li, Pan2
0 siblings, 0 replies; 3+ messages in thread
From: Li, Pan2 @ 2024-05-29 14:00 UTC (permalink / raw)
To: Richard Biener; +Cc: gcc-patches, juzhe.zhong, kito.cheng, tamar.christina
Thanks Richard for suggestion and review.
Did some tricky/ugly restrictions v3 for the phi gen as there are
sorts of (cond in match.pd, will have a try with your proposal in v4.
Thanks again for help.
Pan
-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com>
Sent: Wednesday, May 29, 2024 8:36 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
Subject: Re: [PATCH v3] Match: Support more form for scalar unsigned SAT_ADD
On Mon, May 27, 2024 at 8:29 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> After we support one gassign form of the unsigned .SAT_ADD, we
> would like to support more forms including both the branch and
> branchless. There are 5 other forms of .SAT_ADD, list as below:
>
> Form 1:
> #define SAT_ADD_U_1(T) \
> T sat_add_u_1_##T(T x, T y) \
> { \
> return (T)(x + y) >= x ? (x + y) : -1; \
> }
>
> Form 2:
> #define SAT_ADD_U_2(T) \
> T sat_add_u_2_##T(T x, T y) \
> { \
> T ret; \
> T overflow = __builtin_add_overflow (x, y, &ret); \
> return (T)(-overflow) | ret; \
> }
>
> Form 3:
> #define SAT_ADD_U_3(T) \
> T sat_add_u_3_##T (T x, T y) \
> { \
> T ret; \
> return __builtin_add_overflow (x, y, &ret) ? -1 : ret; \
> }
>
> Form 4:
> #define SAT_ADD_U_4(T) \
> T sat_add_u_4_##T (T x, T y) \
> { \
> T ret; \
> return __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1; \
> }
>
> Form 5:
> #define SAT_ADD_U_5(T) \
> T sat_add_u_5_##T(T x, T y) \
> { \
> return (T)(x + y) < x ? -1 : (x + y); \
> }
>
> Take the forms 3 of above as example:
>
> uint64_t
> sat_add (uint64_t x, uint64_t y)
> {
> uint64_t ret;
> return __builtin_add_overflow (x, y, &ret) ? -1 : ret;
> }
>
> Before this patch:
> uint64_t sat_add (uint64_t x, uint64_t y)
> {
> long unsigned int _1;
> long unsigned int _2;
> uint64_t _3;
> __complex__ long unsigned int _6;
>
> ;; basic block 2, loop depth 0
> ;; pred: ENTRY
> _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
> _2 = IMAGPART_EXPR <_6>;
> if (_2 != 0)
> goto <bb 4>; [35.00%]
> else
> goto <bb 3>; [65.00%]
> ;; succ: 4
> ;; 3
>
> ;; basic block 3, loop depth 0
> ;; pred: 2
> _1 = REALPART_EXPR <_6>;
> ;; succ: 4
>
> ;; basic block 4, loop depth 0
> ;; pred: 3
> ;; 2
> # _3 = PHI <_1(3), 18446744073709551615(2)>
> return _3;
> ;; succ: EXIT
> }
>
> After this patch:
> uint64_t sat_add (uint64_t x, uint64_t y)
> {
> long unsigned int _12;
>
> ;; basic block 2, loop depth 0
> ;; pred: ENTRY
> _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
> return _12;
> ;; succ: EXIT
> }
>
> The below test suites are still running, will update it later.
> * The x86 bootstrap test.
> * The x86 fully regression test.
> * The riscv fully regression test.
>
> gcc/ChangeLog:
>
> * genmatch.cc (dt_node::gen_kids): Add new arg of predicate id.
> (allow_phi_predicate_p): New func impl to check the phi
> predicate is allowed or not.
> (dt_node::gen_kids_1): Add COND_EXPR gen for phi node if allowed.
> (dt_operand::gen_phi_on_cond):
> (write_predicate): Init the predicate id before gen_kids.
> * match.pd: Add more forms of unsigned_integer_sat_add and
> comments.
> * tree-ssa-math-opts.cc (match_saturation_arith): Rename from.
> (match_assign_saturation_arith): Rename to.
> (match_phi_saturation_arith): New func impl to match phi.
> (math_opts_dom_walker::after_dom_children): Add phi match for
> echo bb.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
> gcc/genmatch.cc | 123 ++++++++++++++++++++++++++++++++++++--
> gcc/match.pd | 43 ++++++++++++-
> gcc/tree-ssa-math-opts.cc | 51 +++++++++++++++-
> 3 files changed, 210 insertions(+), 7 deletions(-)
>
> diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
> index f1e0e7abe0c..816d2dafd23 100644
> --- a/gcc/genmatch.cc
> +++ b/gcc/genmatch.cc
> @@ -1767,6 +1767,7 @@ public:
> unsigned level;
> dt_node *parent;
> vec<dt_node *> kids;
> + const char *id;
>
> /* Statistics. */
> unsigned num_leafs;
> @@ -1786,7 +1787,7 @@ public:
> virtual void gen (FILE *, int, bool, int) {}
>
> void gen_kids (FILE *, int, bool, int);
> - void gen_kids_1 (FILE *, int, bool, int,
> + void gen_kids_1 (FILE *, const char *, int, bool, int,
> const vec<dt_operand *> &, const vec<dt_operand *> &,
> const vec<dt_operand *> &, const vec<dt_operand *> &,
> const vec<dt_operand *> &, const vec<dt_node *> &);
> @@ -1819,6 +1820,7 @@ public:
>
> char *get_name (char *);
> void gen_opname (char *, unsigned);
> + void gen_phi_on_cond (FILE *, int, bool, int);
> };
>
> /* Leaf node of the decision tree, used for DT_SIMPLIFY. */
> @@ -3173,7 +3175,7 @@ dt_node::gen_kids (FILE *f, int indent, bool gimple, int depth)
> for what we have collected sofar. */
> fns.qsort (fns_cmp);
> generic_fns.qsort (fns_cmp);
> - gen_kids_1 (f, indent, gimple, depth, gimple_exprs, generic_exprs,
> + gen_kids_1 (f, id, indent, gimple, depth, gimple_exprs, generic_exprs,
> fns, generic_fns, preds, others);
> /* And output the true operand itself. */
> kids[i]->gen (f, indent, gimple, depth);
> @@ -3191,14 +3193,21 @@ dt_node::gen_kids (FILE *f, int indent, bool gimple, int depth)
> /* Generate code for the remains. */
> fns.qsort (fns_cmp);
> generic_fns.qsort (fns_cmp);
> - gen_kids_1 (f, indent, gimple, depth, gimple_exprs, generic_exprs,
> + gen_kids_1 (f, id, indent, gimple, depth, gimple_exprs, generic_exprs,
> fns, generic_fns, preds, others);
> }
>
> +static bool
> +allow_phi_predicate_p (const char *id)
> +{
> + return id && strcmp (id, "unsigned_integer_sat_add") == 0;
Of course that's not OK. I suggest to either check ...
> +}
> +
> /* Generate matching code for the children of the decision tree node. */
>
> void
> -dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
> +dt_node::gen_kids_1 (FILE *f, const char *id, int indent, bool gimple,
> + int depth,
> const vec<dt_operand *> &gimple_exprs,
> const vec<dt_operand *> &generic_exprs,
> const vec<dt_operand *> &fns,
> @@ -3251,6 +3260,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
> depth);
> indent += 4;
> fprintf_indent (f, indent, "{\n");
> + bool cond_expr_p = false;
> for (unsigned i = 0; i < exprs_len; ++i)
> {
> expr *e = as_a <expr *> (gimple_exprs[i]->op);
> @@ -3262,6 +3272,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
> else
> {
> id_base *op = e->operation;
> + cond_expr_p |= *op == COND_EXPR;
> if (*op == CONVERT_EXPR || *op == NOP_EXPR)
> fprintf_indent (f, indent, "CASE_CONVERT:\n");
> else
> @@ -3275,6 +3286,27 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
> fprintf_indent (f, indent, "default:;\n");
> fprintf_indent (f, indent, "}\n");
> indent -= 4;
> +
> + if (cond_expr_p && allow_phi_predicate_p (id))
... this->parent == nullptr restricting PHI handling to the outermost
COND_EXPR, which
is probably a good idea (but there's no way to see we're in a match or
in a simplify).
Or even better, either mark the expression node with a new flag (make
sure to not
merge cond with/without the flag during decision tree insert), add a new
operation code like (cond_or_phi ...) and either handle or lower it,
with lowering
introducing yet another operation, (phi ..). From all of the above simply doing
this->parent == nullptr and adding a global flag "writing_predicate"
to make sure
to only cover (match ...) is the simplest. I somewhat dislike adding
fake operation
codes so a flag, like '^' (cond^ ...), would be my second choice where I'd limit
that to (match and the outermost cond within that unless we see need for more
cases. There are a few existing (match (cond ..)) patterns so the
flag looks like
the safest option - not all users might expect PHIs.
> + {
> + fprintf_indent (f, indent,
> + "else if (gphi *_a%d = dyn_cast <gphi *> (_d%d))\n",
> + depth, depth);
> + indent += 2;
> + fprintf_indent (f, indent, "{\n");
> + indent += 2;
> +
> + for (unsigned i = 0; i < exprs_len; i++)
> + {
> + expr *e = as_a <expr *> (gimple_exprs[i]->op);
> + if (*e->operation == COND_EXPR)
> + gimple_exprs[i]->gen_phi_on_cond (f, indent, true, depth);
> + }
> +
> + indent -= 2;
> + fprintf_indent (f, indent, "}\n");
> + indent -= 2;
> + }
> }
>
> if (fns_len)
> @@ -3483,6 +3515,87 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int depth)
> }
> }
>
> +/* Generate matching code for the phi when meet COND_EXPR. */
> +
> +void
> +dt_operand::gen_phi_on_cond (FILE *f, int indent, bool gimple, int depth)
Note this is implicitly 'gimple', no need to pass that flag.
> +{
> + fprintf_indent (f, indent,
> + "basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
> +
> + fprintf_indent (f, indent,
> + "if (EDGE_COUNT (_b%d->preds) == 2 && gimple_phi_num_args (_a%d) == 2)\n",
> + depth, depth);
Either is redundant - the number of preds always matches the number of PHI args.
> +
> + indent += 2;
> + fprintf_indent (f, indent, "{\n");
> + indent += 2;
> +
> + fprintf_indent (f, indent,
> + "basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth);
> + fprintf_indent (f, indent,
> + "basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth);
> + fprintf_indent (f, indent,
> + "basic_block _db_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_%d)) ? "
> + "_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth);
> + fprintf_indent (f, indent,
> + "basic_block _other_db_%d = safe_dyn_cast <gcond *> "
> + "(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n",
> + depth, depth, depth, depth);
> +
> + fprintf_indent (f, indent,
> + "gcond *_ct_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_%d));\n ",
> + depth, depth);
> + fprintf_indent (f, indent, "if (_ct_%d "
> + "&& EDGE_COUNT (_other_db_%d->preds) == 1 "
> + "&& EDGE_COUNT (_other_db_%d->succs) == 1 "
> + "&& EDGE_SUCC (_other_db_%d, 0)->dest == _b%d)\n",
> + depth, depth, depth, depth, depth);
> +
> + indent += 2;
> + fprintf_indent (f, indent, "{\n");
> + indent += 2;
I'm quoting generated code here:
else if (gphi *_a1 = dyn_cast <gphi *> (_d1))
{
basic_block _b1 = gimple_bb (_a1);
if (EDGE_COUNT (_b1->preds) == 2 && gimple_phi_num_args
(_a1) == 2)
{
basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
basic_block _db_1 = safe_dyn_cast <gcond *>
(*gsi_last_bb (_pb_0_1)) ? _pb_0_1 : _pb_1_1;
basic_block _other_db_1 = safe_dyn_cast <gcond *>
(*gsi_last_bb (_pb_0_1)) ? _pb_1_1 : _pb_0_1;
_other_db_1 = _db_1 == _pb_0_1 : _pb_1_1 : _pb_0_1;
gcond *_ct_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_1));
if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1 &&
EDGE_COUNT (_other_db_1->succs) == 1 && EDGE_SUCC (_other_db_1,
0)->dest == _b1)
new lines with indenting && EDGE_COUNT checks please.
I think EDGE_SUCC (_other_db_1, 0)->dest == _b1 is pointless. Instead
you want to check EDGE_PRED (_other_db_1, 0)->src == _db_1?
{
tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
tree _p0 = build2 (gimple_cond_code (_ct_1),
boolean_type_node, _cond_lhs_1, _cond_rhs_1);
bool _arg_0_is_true_1 = gimple_phi_arg_edge
(_a1, 0)->flags & EDGE_TRUE_VALUE;
Hmm, you don't know this PHI arg edge comes from the cond block, it could come
from the middle-bb.
Otherwise I think this will work, of course it depends on us handling
GENERIC conditions in COND_EXPRs. Not building the _p0 GENERIC tree
would be nice but that's quite difficult to pull off here (impossible even).
Overall I like it. Can you please try making the '^' flag work
instead (also document
it in doc/match-and-simplify.texi)?
Thanks for working on this.
Richard.
tree _p1 = gimple_phi_arg_def (_a1,
_arg_0_is_true_1 ? 0 : 1);
tree _p2 = gimple_phi_arg_def (_a1,
_arg_0_is_true_1 ? 1 : 0);
> + fprintf_indent (f, indent,
> + "tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth);
> + fprintf_indent (f, indent,
> + "tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth);
> +
> + char opname_0[20];
> + char opname_1[20];
> + char opname_2[20];
> + gen_opname (opname_0, 0);
> +
> + fprintf_indent (f, indent,
> + "tree %s = build2 (gimple_cond_code (_ct_%d), "
> + "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n",
> + opname_0, depth, depth, depth);
> +
> + fprintf_indent (f, indent,
> + "bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags "
> + " & EDGE_TRUE_VALUE;\n", depth, depth);
> +
> + gen_opname (opname_1, 1);
> + fprintf_indent (f, indent,
> + "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 0 : 1);\n",
> + opname_1, depth, depth);
> +
> + gen_opname (opname_2, 2);
> + fprintf_indent (f, indent,
> + "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 1 : 0);\n",
> + opname_2, depth, depth);
> +
> + gen_kids (f, indent, gimple, depth);
> +
> + indent -= 2;
> + fprintf_indent (f, indent, "}\n");
> + indent -= 2;
> +
> + indent -= 2;
> + fprintf_indent (f, indent, "}\n");
> + indent -= 2;
> +}
> +
> /* Emit a logging call to the debug file to the file F, with the INDENT from
> either the RESULT location or the S's match location if RESULT is null. */
> static void
> @@ -4263,6 +4376,8 @@ write_predicate (FILE *f, predicate_id *p, decision_tree &dt, bool gimple)
>
> if (!gimple)
> fprintf_indent (f, 2, "if (TREE_SIDE_EFFECTS (t)) return false;\n");
> +
> + dt.root->id = p->id;
> dt.root->gen_kids (f, 2, gimple, 0);
>
> fprintf_indent (f, 2, "return false;\n"
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 024e3350465..ddbdb7f7638 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3046,31 +3046,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
>
> /* Unsigned Saturation Add */
> +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> + SAT_ADD = (X + Y) | -((X + Y) < X) */
> (match (usadd_left_part_1 @0 @1)
> (plus:c @0 @1)
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
> (match (usadd_left_part_2 @0 @1)
> (realpart (IFN_ADD_OVERFLOW:c @0 @1))
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> + SAT_ADD = (X + Y) | -((type)(X + Y) < X) */
> (match (usadd_right_part_1 @0 @1)
> (negate (convert (lt (plus:c @0 @1) @0)))
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> + SAT_ADD = (X + Y) | -(X > (X + Y)) */
> (match (usadd_right_part_1 @0 @1)
> (negate (convert (gt @0 (plus:c @0 @1))))
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
> (match (usadd_right_part_2 @0 @1)
> (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> */
> +(match (usadd_right_part_2 @0 @1)
> + (negate (imagpart (IFN_ADD_OVERFLOW:c @0 @1)))
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> + && types_match (type, @0, @1))))
> +
> /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
> because the sub part of left_part_2 cannot work with right_part_1.
> For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
> @@ -3082,10 +3099,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (match (unsigned_integer_sat_add @0 @1)
> (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
>
> -/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */
> +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW):
> + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> or
> + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
> (match (unsigned_integer_sat_add @0 @1)
> (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
>
> +/* Unsigned saturation add, case 3 (branch with ge):
> + SAT_U_ADD = (X + Y) >= x ? (X + Y) : -1. */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond (ge (usadd_left_part_1@2 @0 @1) @0) @2 integer_minus_onep))
> +
> +/* Unsigned saturation add, case 4 (branch with lt):
> + SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2))
> +
> +/* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW):
> + SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1. */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond (eq (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
> + (usadd_left_part_2 @0 @1) integer_minus_onep))
> +
> +/* Unsigned saturation add, case 6 (branch with ne .ADD_OVERFLOW):
> + SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> != 0 ? -1 : .ADD_OVERFLOW. */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
> + integer_minus_onep (usadd_left_part_2 @0 @1)))
> +
> /* x > y && x != XXX_MIN --> x > y
> x > y && x == XXX_MIN --> false . */
> (for eqne (eq ne)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 62da1c5ee08..8624d414347 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4099,7 +4099,7 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> * =>
> * _12 = .SAT_ADD (_4, _6); */
> static void
> -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> +match_assign_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> {
> gcall *call = NULL;
>
> @@ -4116,6 +4116,47 @@ match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> }
> }
>
> +/*
> + * Try to match saturation arith pattern(s).
> + * From:
> + * <bb 2> :
> + * _1 = x_3(D) + y_4(D);
> + * if (_1 >= x_3(D))
> + * goto <bb 3>; [INV]
> + * else
> + * goto <bb 4>; [INV]
> + *
> + * <bb 3> :
> + *
> + * <bb 4> :
> + * # _2 = PHI <255(2), _1(3)>
> + *
> + * To:
> + * <bb 4> [local count: 1073741824]:
> + * _2 = .SAT_ADD (x_4(D), y_5(D)); */
> +
> +static void
> +match_phi_saturation_arith (gimple_stmt_iterator *gsi, gphi *phi)
> +{
> + if (gimple_phi_num_args (phi) != 2)
> + return;
> +
> + tree ops[2];
> + tree phi_result = gimple_phi_result (phi);
> +
> + if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> + && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (phi_result),
> + OPTIMIZE_FOR_BOTH))
> + {
> + gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> + gimple_call_set_lhs (call, phi_result);
> + gsi_insert_before (gsi, call, GSI_SAME_STMT);
> +
> + gimple_stmt_iterator psi = gsi_for_stmt (phi);
> + remove_phi_node (&psi, false);
> + }
> +}
> +
> /* Recognize for unsigned x
> x = y - z;
> if (x > y)
> @@ -6029,6 +6070,12 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>
> fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
>
> + for (gphi_iterator psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
> + {
> + gimple_stmt_iterator gsi = gsi_last_bb (bb);
> + match_phi_saturation_arith (&gsi, psi.phi ());
> + }
> +
> for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
> {
> gimple *stmt = gsi_stmt (gsi);
> @@ -6078,7 +6125,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> break;
>
> case BIT_IOR_EXPR:
> - match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> + match_assign_saturation_arith (&gsi, as_a<gassign *> (stmt));
> /* fall-through */
> case BIT_XOR_EXPR:
> match_uaddc_usubc (&gsi, stmt, code);
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-05-29 14:01 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-27 6:29 [PATCH v3] Match: Support more form for scalar unsigned SAT_ADD pan2.li
2024-05-29 12:35 ` Richard Biener
2024-05-29 14:00 ` Li, Pan2
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).