[PATCH] vect: Missed opportunity to use [SU]ABD

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] vect: Missed opportunity to use [SU]ABD
@ 2023-05-09 16:07 Oluwatamilore Adebayo
  2023-05-10  9:01 ` Richard Sandiford
  0 siblings, 1 reply; 24+ messages in thread
From: Oluwatamilore Adebayo @ 2023-05-09 16:07 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Sandiford, richard.guenther

[-- Attachment #1: Type: text/plain, Size: 12981 bytes --]

From 0b5f469171c340ef61a48a31877d495bb77bd35f Mon Sep 17 00:00:00 2001
From: oluade01 <oluwatamilore.adebayo@arm.com>
Date: Fri, 14 Apr 2023 10:24:43 +0100
Subject: [PATCH 1/4] Missed opportunity to use [SU]ABD

This adds a recognition pattern for the non-widening
absolute difference (ABD).

gcc/ChangeLog:

	* doc/md.texi (sabd, uabd): Document them.
	* internal-fn.def (ABD): Use new optab.
	* optabs.def (sabd_optab, uabd_optab): New optabs,
	* tree-vect-patterns.cc (vect_recog_absolute_difference):
	Recognize the following idiom abs (a - b).
	(vect_recog_sad_pattern): Refactor to use
	vect_recog_absolute_difference.
	(vect_recog_abd_pattern): Use patterns found by
	vect_recog_absolute_difference to build a new ABD
	internal call.
---
 gcc/doc/md.texi           |  10 ++
 gcc/internal-fn.def       |   3 +
 gcc/optabs.def            |   2 +
 gcc/tree-vect-patterns.cc | 250 +++++++++++++++++++++++++++++++++-----
 4 files changed, 234 insertions(+), 31 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..0ad546c63a8deebb4b6db894f437d1e21f0245a8 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
 Vector shift and rotate instructions that take vectors as operand 2
 instead of a scalar type.
 
+@cindex @code{uabd@var{m}} instruction pattern
+@cindex @code{sabd@var{m}} instruction pattern
+@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
+Signed and unsigned absolute difference instructions.  These
+instructions find the difference between operands 1 and 2
+then return the absolute value.  A C code equivalent would be:
+@smallexample
+op0 = abs (op0 - op1)
+@end smallexample
+
 @cindex @code{avg@var{m}3_floor} instruction pattern
 @cindex @code{uavg@var{m}3_floor} instruction pattern
 @item @samp{avg@var{m}3_floor}
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
+			      sabd, uabd, binary)
+
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
 			      savg_floor, uavg_floor, binary)
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
 OPTAB_D (extract_last_optab, "extract_last_$a")
 OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
 
+OPTAB_D (uabd_optab, "uabd$a3")
+OPTAB_D (sabd_optab, "sabd$a3")
 OPTAB_D (savg_floor_optab, "avg$a3_floor")
 OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
 OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index a49b09539776c0056e77f99b10365d0a8747fbc5..91e1f9d4b610275dd833ec56dc77f76367ee7886 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -770,6 +770,89 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
     }
 }
 
+/* Look for the following pattern
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+ */
+static bool
+vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
+				tree *half_type, bool reject_unsigned,
+				vect_unpromoted_value unprom[2],
+				tree diff_oprnds[2])
+{
+  if (!abs_stmt)
+    return false;
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
+  if (code != ABS_EXPR && code != ABSU_EXPR)
+    return false;
+
+  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
+  tree abs_type = TREE_TYPE (abs_oprnd);
+  if (!abs_oprnd)
+    return false;
+  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
+    return false;
+  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
+    return false;
+
+  /* Peel off conversions from the ABS input.  This can involve sign
+     changes (e.g.  from an unsigned subtraction to a signed ABS input)
+     or signed promotion, but it can't include unsigned promotion.
+     (Note that ABS of an unsigned promotion should have been folded
+     away before now anyway.)  */
+  vect_unpromoted_value unprom_diff;
+  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
+						    &unprom_diff);
+  if (!abs_oprnd)
+    return false;
+  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
+      && TYPE_UNSIGNED (unprom_diff.type))
+    if (!reject_unsigned)
+      return false;
+
+  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
+  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
+  if (!diff_stmt_vinfo)
+    return false;
+
+  bool assigned_oprnds = false;
+  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
+  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
+  {
+    assigned_oprnds = true;
+    diff_oprnds[0] = gimple_assign_rhs1 (diff);
+    diff_oprnds[1] = gimple_assign_rhs2 (diff);
+  }
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
+			     WIDEN_MINUS_EXPR,
+			     false, 2, unprom, half_type))
+  {
+    if (diff_oprnds && !assigned_oprnds)
+    {
+      diff_oprnds[0] = unprom[0].op;
+      diff_oprnds[1] = unprom[1].op;
+    }
+  }
+  else if (!assigned_oprnds)
+  {
+    return false;
+  }
+  else
+  {
+    *half_type = NULL_TREE;
+  }
+
+  return true;
+}
+
 /* Convert UNPROM to TYPE and return the result, adding new statements
    to STMT_INFO's pattern definition statements if no better way is
    available.  VECTYPE is the vector form of TYPE.
@@ -1308,40 +1391,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
      inside the loop (in case we are analyzing an outer-loop).  */
   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
-  if (!abs_stmt
-      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
-	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
-    return NULL;
-
-  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
-  tree abs_type = TREE_TYPE (abs_oprnd);
-  if (TYPE_UNSIGNED (abs_type))
-    return NULL;
-
-  /* Peel off conversions from the ABS input.  This can involve sign
-     changes (e.g. from an unsigned subtraction to a signed ABS input)
-     or signed promotion, but it can't include unsigned promotion.
-     (Note that ABS of an unsigned promotion should have been folded
-     away before now anyway.)  */
-  vect_unpromoted_value unprom_diff;
-  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
-						    &unprom_diff);
-  if (!abs_oprnd)
-    return NULL;
-  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
-      && TYPE_UNSIGNED (unprom_diff.type))
-    return NULL;
 
-  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
-  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
-  if (!diff_stmt_vinfo)
+  vect_unpromoted_value unprom[2];
+  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
+				       true, unprom, NULL))
     return NULL;
 
-  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
-     inside the loop (in case we are analyzing an outer-loop).  */
-  vect_unpromoted_value unprom[2];
-  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
-			     false, 2, unprom, &half_type))
+  if (!half_type)
     return NULL;
 
   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
@@ -1363,6 +1419,137 @@ vect_recog_sad_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+/* Function vect_recog_abd_pattern
+
+   Try to find the following ABsolute Difference (ABD) pattern:
+
+     VTYPE x, y, out;
+     type diff;
+   loop i in range:
+     S1 diff = x[i] - y[i]
+     S2 out[i] = ABS_EXPR <diff>;
+
+   where 'type' is a integer and 'VTYPE' is a vector of integers
+   the same size as 'type'
+
+   Input:
+
+   * STMT_VINFO: The stmt from which the pattern search begins
+
+   Output:
+
+   * TYPE_out: The type of the output of this pattern
+
+   * Return value: A new stmt that will be used to replace the sequence of
+     stmts that constitute the pattern; either SABD or UABD:
+	SABD_EXPR<x, y, out>
+	UABD_EXPR<x, y, out>
+
+      UABD expressions are used when the input types are
+      narrower than the output types or the output type is narrower
+      than 32 bits
+ */
+
+static gimple *
+vect_recog_abd_pattern (vec_info *vinfo,
+		stmt_vec_info stmt_vinfo, tree *type_out)
+{
+  /* Look for the following patterns
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+	out[i] = DAD
+
+     In which
+      - X, Y, DIFF, DAD all have the same type
+      - x, y, out are all vectors of the same type
+  */
+  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
+  if (!last_stmt)
+    return NULL;
+
+  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
+
+  gassign *abs_stmt = last_stmt;
+  if (gimple_assign_cast_p (last_stmt))
+  {
+    tree last_rhs = gimple_assign_rhs1 (last_stmt);
+    if (!SSA_VAR_P (last_rhs))
+      return NULL;
+
+    abs_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (last_rhs));
+    if (!abs_stmt)
+      return NULL;
+  }
+
+  vect_unpromoted_value unprom[2];
+  tree diff_oprnds[2];
+  tree half_type;
+  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
+				       false, unprom, diff_oprnds))
+    return NULL;
+
+#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
+
+  tree abd_oprnds[2];
+  if (half_type)
+  {
+    if (!SAME_TYPE (unprom[0].type, unprom[1].type))
+      return NULL;
+
+    tree diff_type = TREE_TYPE (diff_oprnds[0]);
+    if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
+    {
+      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds, half_type, unprom,
+			   get_vectype_for_scalar_type (vinfo, half_type));
+    }
+    else
+    {
+      abd_oprnds[0] = diff_oprnds[0];
+      abd_oprnds[1] = diff_oprnds[1];
+    }
+  }
+  else
+  {
+    if (unprom[0].op && unprom[1].op
+	&& (!SAME_TYPE (unprom[0].type, unprom[1].type)
+	|| !SAME_TYPE (unprom[0].type, out_type)))
+      return NULL;
+
+    unprom[0].op = diff_oprnds[0];
+    unprom[1].op = diff_oprnds[1];
+    tree signed_out = signed_type_for (out_type);
+    tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
+    vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
+			 signed_out, unprom, signed_out_vectype);
+
+    if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
+      return NULL;
+  }
+
+  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
+      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
+    return NULL;
+
+  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+
+  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);
+  if (!vectype
+      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
+					  OPTIMIZE_FOR_SPEED))
+    return NULL;
+
+  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
+
+  tree var = vect_recog_temp_ssa_var (out_type, NULL);
+  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
+						abd_oprnds[0], abd_oprnds[1]);
+  gimple_call_set_lhs (abd_stmt, var);
+  gimple_set_location (abd_stmt, gimple_location (last_stmt));
+  return abd_stmt;
+}
+
 /* Recognize an operation that performs ORIG_CODE on widened inputs,
    so that it can be treated as though it had the form:
 
@@ -6439,6 +6626,7 @@ struct vect_recog_func
 static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
   { vect_recog_bit_insert_pattern, "bit_insert" },
+  { vect_recog_abd_pattern, "abd" },
   { vect_recog_over_widening_pattern, "over_widening" },
   /* Must come after over_widening, which narrows the shift as much as
      possible beforehand.  */
-- 
2.25.1

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Missed-opportunity-to-use-SU-ABD.patch --]
[-- Type: text/x-patch; name="0001-Missed-opportunity-to-use-SU-ABD.patch", Size: 12620 bytes --]

From 0b5f469171c340ef61a48a31877d495bb77bd35f Mon Sep 17 00:00:00 2001
From: oluade01 <oluwatamilore.adebayo@arm.com>
Date: Fri, 14 Apr 2023 10:24:43 +0100
Subject: [PATCH 1/4] Missed opportunity to use [SU]ABD

This adds a recognition pattern for the non-widening
absolute difference (ABD).

gcc/ChangeLog:

	* doc/md.texi (sabd, uabd): Document them.
	* internal-fn.def (ABD): Use new optab.
	* optabs.def (sabd_optab, uabd_optab): New optabs,
	* tree-vect-patterns.cc (vect_recog_absolute_difference):
	Recognize the following idiom abs (a - b).
	(vect_recog_sad_pattern): Refactor to use
	vect_recog_absolute_difference.
	(vect_recog_abd_pattern): Use patterns found by
	vect_recog_absolute_difference to build a new ABD
	internal call.
---
 gcc/doc/md.texi           |  10 ++
 gcc/internal-fn.def       |   3 +
 gcc/optabs.def            |   2 +
 gcc/tree-vect-patterns.cc | 250 +++++++++++++++++++++++++++++++++-----
 4 files changed, 234 insertions(+), 31 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..0ad546c63a8deebb4b6db894f437d1e21f0245a8 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
 Vector shift and rotate instructions that take vectors as operand 2
 instead of a scalar type.
 
+@cindex @code{uabd@var{m}} instruction pattern
+@cindex @code{sabd@var{m}} instruction pattern
+@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
+Signed and unsigned absolute difference instructions.  These
+instructions find the difference between operands 1 and 2
+then return the absolute value.  A C code equivalent would be:
+@smallexample
+op0 = abs (op0 - op1)
+@end smallexample
+
 @cindex @code{avg@var{m}3_floor} instruction pattern
 @cindex @code{uavg@var{m}3_floor} instruction pattern
 @item @samp{avg@var{m}3_floor}
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
+			      sabd, uabd, binary)
+
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
 			      savg_floor, uavg_floor, binary)
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
 OPTAB_D (extract_last_optab, "extract_last_$a")
 OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
 
+OPTAB_D (uabd_optab, "uabd$a3")
+OPTAB_D (sabd_optab, "sabd$a3")
 OPTAB_D (savg_floor_optab, "avg$a3_floor")
 OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
 OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index a49b09539776c0056e77f99b10365d0a8747fbc5..91e1f9d4b610275dd833ec56dc77f76367ee7886 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -770,6 +770,89 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
     }
 }
 
+/* Look for the following pattern
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+ */
+static bool
+vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
+				tree *half_type, bool reject_unsigned,
+				vect_unpromoted_value unprom[2],
+				tree diff_oprnds[2])
+{
+  if (!abs_stmt)
+    return false;
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
+  if (code != ABS_EXPR && code != ABSU_EXPR)
+    return false;
+
+  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
+  tree abs_type = TREE_TYPE (abs_oprnd);
+  if (!abs_oprnd)
+    return false;
+  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
+    return false;
+  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
+    return false;
+
+  /* Peel off conversions from the ABS input.  This can involve sign
+     changes (e.g.  from an unsigned subtraction to a signed ABS input)
+     or signed promotion, but it can't include unsigned promotion.
+     (Note that ABS of an unsigned promotion should have been folded
+     away before now anyway.)  */
+  vect_unpromoted_value unprom_diff;
+  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
+						    &unprom_diff);
+  if (!abs_oprnd)
+    return false;
+  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
+      && TYPE_UNSIGNED (unprom_diff.type))
+    if (!reject_unsigned)
+      return false;
+
+  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
+  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
+  if (!diff_stmt_vinfo)
+    return false;
+
+  bool assigned_oprnds = false;
+  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
+  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
+  {
+    assigned_oprnds = true;
+    diff_oprnds[0] = gimple_assign_rhs1 (diff);
+    diff_oprnds[1] = gimple_assign_rhs2 (diff);
+  }
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
+			     WIDEN_MINUS_EXPR,
+			     false, 2, unprom, half_type))
+  {
+    if (diff_oprnds && !assigned_oprnds)
+    {
+      diff_oprnds[0] = unprom[0].op;
+      diff_oprnds[1] = unprom[1].op;
+    }
+  }
+  else if (!assigned_oprnds)
+  {
+    return false;
+  }
+  else
+  {
+    *half_type = NULL_TREE;
+  }
+
+  return true;
+}
+
 /* Convert UNPROM to TYPE and return the result, adding new statements
    to STMT_INFO's pattern definition statements if no better way is
    available.  VECTYPE is the vector form of TYPE.
@@ -1308,40 +1391,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
      inside the loop (in case we are analyzing an outer-loop).  */
   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
-  if (!abs_stmt
-      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
-	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
-    return NULL;
-
-  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
-  tree abs_type = TREE_TYPE (abs_oprnd);
-  if (TYPE_UNSIGNED (abs_type))
-    return NULL;
-
-  /* Peel off conversions from the ABS input.  This can involve sign
-     changes (e.g. from an unsigned subtraction to a signed ABS input)
-     or signed promotion, but it can't include unsigned promotion.
-     (Note that ABS of an unsigned promotion should have been folded
-     away before now anyway.)  */
-  vect_unpromoted_value unprom_diff;
-  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
-						    &unprom_diff);
-  if (!abs_oprnd)
-    return NULL;
-  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
-      && TYPE_UNSIGNED (unprom_diff.type))
-    return NULL;
 
-  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
-  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
-  if (!diff_stmt_vinfo)
+  vect_unpromoted_value unprom[2];
+  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
+				       true, unprom, NULL))
     return NULL;
 
-  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
-     inside the loop (in case we are analyzing an outer-loop).  */
-  vect_unpromoted_value unprom[2];
-  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
-			     false, 2, unprom, &half_type))
+  if (!half_type)
     return NULL;
 
   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
@@ -1363,6 +1419,137 @@ vect_recog_sad_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+/* Function vect_recog_abd_pattern
+
+   Try to find the following ABsolute Difference (ABD) pattern:
+
+     VTYPE x, y, out;
+     type diff;
+   loop i in range:
+     S1 diff = x[i] - y[i]
+     S2 out[i] = ABS_EXPR <diff>;
+
+   where 'type' is a integer and 'VTYPE' is a vector of integers
+   the same size as 'type'
+
+   Input:
+
+   * STMT_VINFO: The stmt from which the pattern search begins
+
+   Output:
+
+   * TYPE_out: The type of the output of this pattern
+
+   * Return value: A new stmt that will be used to replace the sequence of
+     stmts that constitute the pattern; either SABD or UABD:
+	SABD_EXPR<x, y, out>
+	UABD_EXPR<x, y, out>
+
+      UABD expressions are used when the input types are
+      narrower than the output types or the output type is narrower
+      than 32 bits
+ */
+
+static gimple *
+vect_recog_abd_pattern (vec_info *vinfo,
+		stmt_vec_info stmt_vinfo, tree *type_out)
+{
+  /* Look for the following patterns
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+	out[i] = DAD
+
+     In which
+      - X, Y, DIFF, DAD all have the same type
+      - x, y, out are all vectors of the same type
+  */
+  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
+  if (!last_stmt)
+    return NULL;
+
+  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
+
+  gassign *abs_stmt = last_stmt;
+  if (gimple_assign_cast_p (last_stmt))
+  {
+    tree last_rhs = gimple_assign_rhs1 (last_stmt);
+    if (!SSA_VAR_P (last_rhs))
+      return NULL;
+
+    abs_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (last_rhs));
+    if (!abs_stmt)
+      return NULL;
+  }
+
+  vect_unpromoted_value unprom[2];
+  tree diff_oprnds[2];
+  tree half_type;
+  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
+				       false, unprom, diff_oprnds))
+    return NULL;
+
+#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
+
+  tree abd_oprnds[2];
+  if (half_type)
+  {
+    if (!SAME_TYPE (unprom[0].type, unprom[1].type))
+      return NULL;
+
+    tree diff_type = TREE_TYPE (diff_oprnds[0]);
+    if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
+    {
+      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds, half_type, unprom,
+			   get_vectype_for_scalar_type (vinfo, half_type));
+    }
+    else
+    {
+      abd_oprnds[0] = diff_oprnds[0];
+      abd_oprnds[1] = diff_oprnds[1];
+    }
+  }
+  else
+  {
+    if (unprom[0].op && unprom[1].op
+	&& (!SAME_TYPE (unprom[0].type, unprom[1].type)
+	|| !SAME_TYPE (unprom[0].type, out_type)))
+      return NULL;
+
+    unprom[0].op = diff_oprnds[0];
+    unprom[1].op = diff_oprnds[1];
+    tree signed_out = signed_type_for (out_type);
+    tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
+    vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
+			 signed_out, unprom, signed_out_vectype);
+
+    if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
+      return NULL;
+  }
+
+  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
+      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
+    return NULL;
+
+  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+
+  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);
+  if (!vectype
+      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
+					  OPTIMIZE_FOR_SPEED))
+    return NULL;
+
+  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
+
+  tree var = vect_recog_temp_ssa_var (out_type, NULL);
+  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
+						abd_oprnds[0], abd_oprnds[1]);
+  gimple_call_set_lhs (abd_stmt, var);
+  gimple_set_location (abd_stmt, gimple_location (last_stmt));
+  return abd_stmt;
+}
+
 /* Recognize an operation that performs ORIG_CODE on widened inputs,
    so that it can be treated as though it had the form:
 
@@ -6439,6 +6626,7 @@ struct vect_recog_func
 static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
   { vect_recog_bit_insert_pattern, "bit_insert" },
+  { vect_recog_abd_pattern, "abd" },
   { vect_recog_over_widening_pattern, "over_widening" },
   /* Must come after over_widening, which narrows the shift as much as
      possible beforehand.  */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] vect: Missed opportunity to use [SU]ABD
  2023-05-09 16:07 [PATCH] vect: Missed opportunity to use [SU]ABD Oluwatamilore Adebayo
@ 2023-05-10  9:01 ` Richard Sandiford
  2023-05-10  9:49   ` Richard Biener
  0 siblings, 1 reply; 24+ messages in thread
From: Richard Sandiford @ 2023-05-10  9:01 UTC (permalink / raw)
  To: Oluwatamilore Adebayo; +Cc: gcc-patches, richard.guenther

Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com> writes:
> From 0b5f469171c340ef61a48a31877d495bb77bd35f Mon Sep 17 00:00:00 2001
> From: oluade01 <oluwatamilore.adebayo@arm.com>
> Date: Fri, 14 Apr 2023 10:24:43 +0100
> Subject: [PATCH 1/4] Missed opportunity to use [SU]ABD
>
> This adds a recognition pattern for the non-widening
> absolute difference (ABD).
>
> gcc/ChangeLog:
>
>         * doc/md.texi (sabd, uabd): Document them.
>         * internal-fn.def (ABD): Use new optab.
>         * optabs.def (sabd_optab, uabd_optab): New optabs,
>         * tree-vect-patterns.cc (vect_recog_absolute_difference):
>         Recognize the following idiom abs (a - b).
>         (vect_recog_sad_pattern): Refactor to use
>         vect_recog_absolute_difference.
>         (vect_recog_abd_pattern): Use patterns found by
>         vect_recog_absolute_difference to build a new ABD
>         internal call.
> ---
>  gcc/doc/md.texi           |  10 ++
>  gcc/internal-fn.def       |   3 +
>  gcc/optabs.def            |   2 +
>  gcc/tree-vect-patterns.cc | 250 +++++++++++++++++++++++++++++++++-----
>  4 files changed, 234 insertions(+), 31 deletions(-)
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..0ad546c63a8deebb4b6db894f437d1e21f0245a8 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
>  Vector shift and rotate instructions that take vectors as operand 2
>  instead of a scalar type.
>
> +@cindex @code{uabd@var{m}} instruction pattern
> +@cindex @code{sabd@var{m}} instruction pattern
> +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
> +Signed and unsigned absolute difference instructions.  These
> +instructions find the difference between operands 1 and 2
> +then return the absolute value.  A C code equivalent would be:
> +@smallexample
> +op0 = abs (op0 - op1)

op0 = abs (op1 - op2)

But that isn't the correct calculation for unsigned (where abs doesn't
really work).  It also doesn't handle some cases correctly for signed.

I think it's more:

  op0 = op1 > op2 ? (unsigned type) op1 - op2 : (unsigned type) op2 - op1

or (conceptually) max minus min.

E.g. for 16-bit values, the absolute difference between signed 0x7fff
and signed -0x8000 is 0xffff (reinterpreted as -1 if you cast back
to signed).  But, ignoring undefined behaviour:

  0x7fff - 0x8000 = -1
  abs(-1) = 1

which gives the wrong answer.

We might still be able to fold C abs(a - b) to abd for signed a and b
by relying on undefined behaviour (TYPE_OVERFLOW_UNDEFINED).  But we
can't do it for -fwrapv.

Richi knows better than me what would be appropriate here.

Thanks,
Richard

> +@end smallexample
> +
>  @cindex @code{avg@var{m}3_floor} instruction pattern
>  @cindex @code{uavg@var{m}3_floor} instruction pattern
>  @item @samp{avg@var{m}3_floor}
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
>  DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
>  DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
>
> +DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
> +                             sabd, uabd, binary)
> +
>  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
>                               savg_floor, uavg_floor, binary)
>  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
>  OPTAB_D (extract_last_optab, "extract_last_$a")
>  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
>
> +OPTAB_D (uabd_optab, "uabd$a3")
> +OPTAB_D (sabd_optab, "sabd$a3")
>  OPTAB_D (savg_floor_optab, "avg$a3_floor")
>  OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
>  OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index a49b09539776c0056e77f99b10365d0a8747fbc5..91e1f9d4b610275dd833ec56dc77f76367ee7886 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -770,6 +770,89 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
>      }
>  }
>
> +/* Look for the following pattern
> +       X = x[i]
> +       Y = y[i]
> +       DIFF = X - Y
> +       DAD = ABS_EXPR<DIFF>
> + */
> +static bool
> +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> +                               tree *half_type, bool reject_unsigned,
> +                               vect_unpromoted_value unprom[2],
> +                               tree diff_oprnds[2])
> +{
> +  if (!abs_stmt)
> +    return false;
> +
> +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> +     inside the loop (in case we are analyzing an outer-loop).  */
> +  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
> +  if (code != ABS_EXPR && code != ABSU_EXPR)
> +    return false;
> +
> +  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> +  tree abs_type = TREE_TYPE (abs_oprnd);
> +  if (!abs_oprnd)
> +    return false;
> +  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
> +    return false;
> +  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
> +    return false;
> +
> +  /* Peel off conversions from the ABS input.  This can involve sign
> +     changes (e.g.  from an unsigned subtraction to a signed ABS input)
> +     or signed promotion, but it can't include unsigned promotion.
> +     (Note that ABS of an unsigned promotion should have been folded
> +     away before now anyway.)  */
> +  vect_unpromoted_value unprom_diff;
> +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> +                                                   &unprom_diff);
> +  if (!abs_oprnd)
> +    return false;
> +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> +      && TYPE_UNSIGNED (unprom_diff.type))
> +    if (!reject_unsigned)
> +      return false;
> +
> +  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> +  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> +  if (!diff_stmt_vinfo)
> +    return false;
> +
> +  bool assigned_oprnds = false;
> +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> +  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
> +  {
> +    assigned_oprnds = true;
> +    diff_oprnds[0] = gimple_assign_rhs1 (diff);
> +    diff_oprnds[1] = gimple_assign_rhs2 (diff);
> +  }
> +
> +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> +     inside the loop (in case we are analyzing an outer-loop).  */
> +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
> +                            WIDEN_MINUS_EXPR,
> +                            false, 2, unprom, half_type))
> +  {
> +    if (diff_oprnds && !assigned_oprnds)
> +    {
> +      diff_oprnds[0] = unprom[0].op;
> +      diff_oprnds[1] = unprom[1].op;
> +    }
> +  }
> +  else if (!assigned_oprnds)
> +  {
> +    return false;
> +  }
> +  else
> +  {
> +    *half_type = NULL_TREE;
> +  }
> +
> +  return true;
> +}
> +
>  /* Convert UNPROM to TYPE and return the result, adding new statements
>     to STMT_INFO's pattern definition statements if no better way is
>     available.  VECTYPE is the vector form of TYPE.
> @@ -1308,40 +1391,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
>    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
>       inside the loop (in case we are analyzing an outer-loop).  */
>    gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
> -  if (!abs_stmt
> -      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
> -         && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
> -    return NULL;
> -
> -  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> -  tree abs_type = TREE_TYPE (abs_oprnd);
> -  if (TYPE_UNSIGNED (abs_type))
> -    return NULL;
> -
> -  /* Peel off conversions from the ABS input.  This can involve sign
> -     changes (e.g. from an unsigned subtraction to a signed ABS input)
> -     or signed promotion, but it can't include unsigned promotion.
> -     (Note that ABS of an unsigned promotion should have been folded
> -     away before now anyway.)  */
> -  vect_unpromoted_value unprom_diff;
> -  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> -                                                   &unprom_diff);
> -  if (!abs_oprnd)
> -    return NULL;
> -  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> -      && TYPE_UNSIGNED (unprom_diff.type))
> -    return NULL;
>
> -  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> -  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> -  if (!diff_stmt_vinfo)
> +  vect_unpromoted_value unprom[2];
> +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> +                                      true, unprom, NULL))
>      return NULL;
>
> -  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> -     inside the loop (in case we are analyzing an outer-loop).  */
> -  vect_unpromoted_value unprom[2];
> -  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
> -                            false, 2, unprom, &half_type))
> +  if (!half_type)
>      return NULL;
>
>    vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
> @@ -1363,6 +1419,137 @@ vect_recog_sad_pattern (vec_info *vinfo,
>    return pattern_stmt;
>  }
>
> +/* Function vect_recog_abd_pattern
> +
> +   Try to find the following ABsolute Difference (ABD) pattern:
> +
> +     VTYPE x, y, out;
> +     type diff;
> +   loop i in range:
> +     S1 diff = x[i] - y[i]
> +     S2 out[i] = ABS_EXPR <diff>;
> +
> +   where 'type' is a integer and 'VTYPE' is a vector of integers
> +   the same size as 'type'
> +
> +   Input:
> +
> +   * STMT_VINFO: The stmt from which the pattern search begins
> +
> +   Output:
> +
> +   * TYPE_out: The type of the output of this pattern
> +
> +   * Return value: A new stmt that will be used to replace the sequence of
> +     stmts that constitute the pattern; either SABD or UABD:
> +       SABD_EXPR<x, y, out>
> +       UABD_EXPR<x, y, out>
> +
> +      UABD expressions are used when the input types are
> +      narrower than the output types or the output type is narrower
> +      than 32 bits
> + */
> +
> +static gimple *
> +vect_recog_abd_pattern (vec_info *vinfo,
> +               stmt_vec_info stmt_vinfo, tree *type_out)
> +{
> +  /* Look for the following patterns
> +       X = x[i]
> +       Y = y[i]
> +       DIFF = X - Y
> +       DAD = ABS_EXPR<DIFF>
> +       out[i] = DAD
> +
> +     In which
> +      - X, Y, DIFF, DAD all have the same type
> +      - x, y, out are all vectors of the same type
> +  */
> +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> +  if (!last_stmt)
> +    return NULL;
> +
> +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> +
> +  gassign *abs_stmt = last_stmt;
> +  if (gimple_assign_cast_p (last_stmt))
> +  {
> +    tree last_rhs = gimple_assign_rhs1 (last_stmt);
> +    if (!SSA_VAR_P (last_rhs))
> +      return NULL;
> +
> +    abs_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (last_rhs));
> +    if (!abs_stmt)
> +      return NULL;
> +  }
> +
> +  vect_unpromoted_value unprom[2];
> +  tree diff_oprnds[2];
> +  tree half_type;
> +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> +                                      false, unprom, diff_oprnds))
> +    return NULL;
> +
> +#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
> +
> +  tree abd_oprnds[2];
> +  if (half_type)
> +  {
> +    if (!SAME_TYPE (unprom[0].type, unprom[1].type))
> +      return NULL;
> +
> +    tree diff_type = TREE_TYPE (diff_oprnds[0]);
> +    if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
> +    {
> +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds, half_type, unprom,
> +                          get_vectype_for_scalar_type (vinfo, half_type));
> +    }
> +    else
> +    {
> +      abd_oprnds[0] = diff_oprnds[0];
> +      abd_oprnds[1] = diff_oprnds[1];
> +    }
> +  }
> +  else
> +  {
> +    if (unprom[0].op && unprom[1].op
> +       && (!SAME_TYPE (unprom[0].type, unprom[1].type)
> +       || !SAME_TYPE (unprom[0].type, out_type)))
> +      return NULL;
> +
> +    unprom[0].op = diff_oprnds[0];
> +    unprom[1].op = diff_oprnds[1];
> +    tree signed_out = signed_type_for (out_type);
> +    tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> +    vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> +                        signed_out, unprom, signed_out_vectype);
> +
> +    if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
> +      return NULL;
> +  }
> +
> +  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
> +      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
> +    return NULL;
> +
> +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> +
> +  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);
> +  if (!vectype
> +      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> +                                         OPTIMIZE_FOR_SPEED))
> +    return NULL;
> +
> +  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
> +
> +  tree var = vect_recog_temp_ssa_var (out_type, NULL);
> +  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> +                                               abd_oprnds[0], abd_oprnds[1]);
> +  gimple_call_set_lhs (abd_stmt, var);
> +  gimple_set_location (abd_stmt, gimple_location (last_stmt));
> +  return abd_stmt;
> +}
> +
>  /* Recognize an operation that performs ORIG_CODE on widened inputs,
>     so that it can be treated as though it had the form:
>
> @@ -6439,6 +6626,7 @@ struct vect_recog_func
>  static vect_recog_func vect_vect_recog_func_ptrs[] = {
>    { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
>    { vect_recog_bit_insert_pattern, "bit_insert" },
> +  { vect_recog_abd_pattern, "abd" },
>    { vect_recog_over_widening_pattern, "over_widening" },
>    /* Must come after over_widening, which narrows the shift as much as
>       possible beforehand.  */
> --
> 2.25.1

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] vect: Missed opportunity to use [SU]ABD
  2023-05-10  9:01 ` Richard Sandiford
@ 2023-05-10  9:49   ` Richard Biener
  2023-05-10  9:51     ` Richard Biener
  2023-05-10 13:29     ` [PATCH] vect: " Oluwatamilore Adebayo
  0 siblings, 2 replies; 24+ messages in thread
From: Richard Biener @ 2023-05-10  9:49 UTC (permalink / raw)
  To: Oluwatamilore Adebayo, gcc-patches, richard.guenther, richard.sandiford

On Wed, May 10, 2023 at 11:01 AM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com> writes:
> > From 0b5f469171c340ef61a48a31877d495bb77bd35f Mon Sep 17 00:00:00 2001
> > From: oluade01 <oluwatamilore.adebayo@arm.com>
> > Date: Fri, 14 Apr 2023 10:24:43 +0100
> > Subject: [PATCH 1/4] Missed opportunity to use [SU]ABD
> >
> > This adds a recognition pattern for the non-widening
> > absolute difference (ABD).
> >
> > gcc/ChangeLog:
> >
> >         * doc/md.texi (sabd, uabd): Document them.
> >         * internal-fn.def (ABD): Use new optab.
> >         * optabs.def (sabd_optab, uabd_optab): New optabs,
> >         * tree-vect-patterns.cc (vect_recog_absolute_difference):
> >         Recognize the following idiom abs (a - b).
> >         (vect_recog_sad_pattern): Refactor to use
> >         vect_recog_absolute_difference.
> >         (vect_recog_abd_pattern): Use patterns found by
> >         vect_recog_absolute_difference to build a new ABD
> >         internal call.
> > ---
> >  gcc/doc/md.texi           |  10 ++
> >  gcc/internal-fn.def       |   3 +
> >  gcc/optabs.def            |   2 +
> >  gcc/tree-vect-patterns.cc | 250 +++++++++++++++++++++++++++++++++-----
> >  4 files changed, 234 insertions(+), 31 deletions(-)
> >
> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> > index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..0ad546c63a8deebb4b6db894f437d1e21f0245a8 100644
> > --- a/gcc/doc/md.texi
> > +++ b/gcc/doc/md.texi
> > @@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
> >  Vector shift and rotate instructions that take vectors as operand 2
> >  instead of a scalar type.
> >
> > +@cindex @code{uabd@var{m}} instruction pattern
> > +@cindex @code{sabd@var{m}} instruction pattern
> > +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
> > +Signed and unsigned absolute difference instructions.  These
> > +instructions find the difference between operands 1 and 2
> > +then return the absolute value.  A C code equivalent would be:
> > +@smallexample
> > +op0 = abs (op0 - op1)
>
> op0 = abs (op1 - op2)
>
> But that isn't the correct calculation for unsigned (where abs doesn't
> really work).  It also doesn't handle some cases correctly for signed.
>
> I think it's more:
>
>   op0 = op1 > op2 ? (unsigned type) op1 - op2 : (unsigned type) op2 - op1
>
> or (conceptually) max minus min.
>
> E.g. for 16-bit values, the absolute difference between signed 0x7fff
> and signed -0x8000 is 0xffff (reinterpreted as -1 if you cast back
> to signed).  But, ignoring undefined behaviour:
>
>   0x7fff - 0x8000 = -1
>   abs(-1) = 1
>
> which gives the wrong answer.
>
> We might still be able to fold C abs(a - b) to abd for signed a and b
> by relying on undefined behaviour (TYPE_OVERFLOW_UNDEFINED).  But we
> can't do it for -fwrapv.
>
> Richi knows better than me what would be appropriate here.

The question is what does the hardware do?  For the widening [us]sad it's
obvious since the difference is computed in a wider signed mode and the
absolute value always fits.

So what does it actually do, esp. when the difference yields 0x8000?

Richard.

>
> Thanks,
> Richard
>
> > +@end smallexample
> > +
> >  @cindex @code{avg@var{m}3_floor} instruction pattern
> >  @cindex @code{uavg@var{m}3_floor} instruction pattern
> >  @item @samp{avg@var{m}3_floor}
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
> >  DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
> >  DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
> >
> > +DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
> > +                             sabd, uabd, binary)
> > +
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
> >                               savg_floor, uavg_floor, binary)
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
> >  OPTAB_D (extract_last_optab, "extract_last_$a")
> >  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
> >
> > +OPTAB_D (uabd_optab, "uabd$a3")
> > +OPTAB_D (sabd_optab, "sabd$a3")
> >  OPTAB_D (savg_floor_optab, "avg$a3_floor")
> >  OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
> >  OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
> > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> > index a49b09539776c0056e77f99b10365d0a8747fbc5..91e1f9d4b610275dd833ec56dc77f76367ee7886 100644
> > --- a/gcc/tree-vect-patterns.cc
> > +++ b/gcc/tree-vect-patterns.cc
> > @@ -770,6 +770,89 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
> >      }
> >  }
> >
> > +/* Look for the following pattern
> > +       X = x[i]
> > +       Y = y[i]
> > +       DIFF = X - Y
> > +       DAD = ABS_EXPR<DIFF>
> > + */
> > +static bool
> > +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> > +                               tree *half_type, bool reject_unsigned,
> > +                               vect_unpromoted_value unprom[2],
> > +                               tree diff_oprnds[2])
> > +{
> > +  if (!abs_stmt)
> > +    return false;
> > +
> > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > +     inside the loop (in case we are analyzing an outer-loop).  */
> > +  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
> > +  if (code != ABS_EXPR && code != ABSU_EXPR)
> > +    return false;
> > +
> > +  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > +  tree abs_type = TREE_TYPE (abs_oprnd);
> > +  if (!abs_oprnd)
> > +    return false;
> > +  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
> > +    return false;
> > +  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
> > +    return false;
> > +
> > +  /* Peel off conversions from the ABS input.  This can involve sign
> > +     changes (e.g.  from an unsigned subtraction to a signed ABS input)
> > +     or signed promotion, but it can't include unsigned promotion.
> > +     (Note that ABS of an unsigned promotion should have been folded
> > +     away before now anyway.)  */
> > +  vect_unpromoted_value unprom_diff;
> > +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > +                                                   &unprom_diff);
> > +  if (!abs_oprnd)
> > +    return false;
> > +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > +      && TYPE_UNSIGNED (unprom_diff.type))
> > +    if (!reject_unsigned)
> > +      return false;
> > +
> > +  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > +  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > +  if (!diff_stmt_vinfo)
> > +    return false;
> > +
> > +  bool assigned_oprnds = false;
> > +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> > +  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
> > +  {
> > +    assigned_oprnds = true;
> > +    diff_oprnds[0] = gimple_assign_rhs1 (diff);
> > +    diff_oprnds[1] = gimple_assign_rhs2 (diff);
> > +  }
> > +
> > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > +     inside the loop (in case we are analyzing an outer-loop).  */
> > +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
> > +                            WIDEN_MINUS_EXPR,
> > +                            false, 2, unprom, half_type))
> > +  {
> > +    if (diff_oprnds && !assigned_oprnds)
> > +    {
> > +      diff_oprnds[0] = unprom[0].op;
> > +      diff_oprnds[1] = unprom[1].op;
> > +    }
> > +  }
> > +  else if (!assigned_oprnds)
> > +  {
> > +    return false;
> > +  }
> > +  else
> > +  {
> > +    *half_type = NULL_TREE;
> > +  }
> > +
> > +  return true;
> > +}
> > +
> >  /* Convert UNPROM to TYPE and return the result, adding new statements
> >     to STMT_INFO's pattern definition statements if no better way is
> >     available.  VECTYPE is the vector form of TYPE.
> > @@ -1308,40 +1391,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
> >    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> >       inside the loop (in case we are analyzing an outer-loop).  */
> >    gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
> > -  if (!abs_stmt
> > -      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
> > -         && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
> > -    return NULL;
> > -
> > -  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > -  tree abs_type = TREE_TYPE (abs_oprnd);
> > -  if (TYPE_UNSIGNED (abs_type))
> > -    return NULL;
> > -
> > -  /* Peel off conversions from the ABS input.  This can involve sign
> > -     changes (e.g. from an unsigned subtraction to a signed ABS input)
> > -     or signed promotion, but it can't include unsigned promotion.
> > -     (Note that ABS of an unsigned promotion should have been folded
> > -     away before now anyway.)  */
> > -  vect_unpromoted_value unprom_diff;
> > -  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > -                                                   &unprom_diff);
> > -  if (!abs_oprnd)
> > -    return NULL;
> > -  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > -      && TYPE_UNSIGNED (unprom_diff.type))
> > -    return NULL;
> >
> > -  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > -  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > -  if (!diff_stmt_vinfo)
> > +  vect_unpromoted_value unprom[2];
> > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > +                                      true, unprom, NULL))
> >      return NULL;
> >
> > -  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > -     inside the loop (in case we are analyzing an outer-loop).  */
> > -  vect_unpromoted_value unprom[2];
> > -  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
> > -                            false, 2, unprom, &half_type))
> > +  if (!half_type)
> >      return NULL;
> >
> >    vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
> > @@ -1363,6 +1419,137 @@ vect_recog_sad_pattern (vec_info *vinfo,
> >    return pattern_stmt;
> >  }
> >
> > +/* Function vect_recog_abd_pattern
> > +
> > +   Try to find the following ABsolute Difference (ABD) pattern:
> > +
> > +     VTYPE x, y, out;
> > +     type diff;
> > +   loop i in range:
> > +     S1 diff = x[i] - y[i]
> > +     S2 out[i] = ABS_EXPR <diff>;
> > +
> > +   where 'type' is a integer and 'VTYPE' is a vector of integers
> > +   the same size as 'type'
> > +
> > +   Input:
> > +
> > +   * STMT_VINFO: The stmt from which the pattern search begins
> > +
> > +   Output:
> > +
> > +   * TYPE_out: The type of the output of this pattern
> > +
> > +   * Return value: A new stmt that will be used to replace the sequence of
> > +     stmts that constitute the pattern; either SABD or UABD:
> > +       SABD_EXPR<x, y, out>
> > +       UABD_EXPR<x, y, out>
> > +
> > +      UABD expressions are used when the input types are
> > +      narrower than the output types or the output type is narrower
> > +      than 32 bits
> > + */
> > +
> > +static gimple *
> > +vect_recog_abd_pattern (vec_info *vinfo,
> > +               stmt_vec_info stmt_vinfo, tree *type_out)
> > +{
> > +  /* Look for the following patterns
> > +       X = x[i]
> > +       Y = y[i]
> > +       DIFF = X - Y
> > +       DAD = ABS_EXPR<DIFF>
> > +       out[i] = DAD
> > +
> > +     In which
> > +      - X, Y, DIFF, DAD all have the same type
> > +      - x, y, out are all vectors of the same type
> > +  */
> > +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> > +  if (!last_stmt)
> > +    return NULL;
> > +
> > +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> > +
> > +  gassign *abs_stmt = last_stmt;
> > +  if (gimple_assign_cast_p (last_stmt))
> > +  {
> > +    tree last_rhs = gimple_assign_rhs1 (last_stmt);
> > +    if (!SSA_VAR_P (last_rhs))
> > +      return NULL;
> > +
> > +    abs_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (last_rhs));
> > +    if (!abs_stmt)
> > +      return NULL;
> > +  }
> > +
> > +  vect_unpromoted_value unprom[2];
> > +  tree diff_oprnds[2];
> > +  tree half_type;
> > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > +                                      false, unprom, diff_oprnds))
> > +    return NULL;
> > +
> > +#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
> > +
> > +  tree abd_oprnds[2];
> > +  if (half_type)
> > +  {
> > +    if (!SAME_TYPE (unprom[0].type, unprom[1].type))
> > +      return NULL;
> > +
> > +    tree diff_type = TREE_TYPE (diff_oprnds[0]);
> > +    if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
> > +    {
> > +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds, half_type, unprom,
> > +                          get_vectype_for_scalar_type (vinfo, half_type));
> > +    }
> > +    else
> > +    {
> > +      abd_oprnds[0] = diff_oprnds[0];
> > +      abd_oprnds[1] = diff_oprnds[1];
> > +    }
> > +  }
> > +  else
> > +  {
> > +    if (unprom[0].op && unprom[1].op
> > +       && (!SAME_TYPE (unprom[0].type, unprom[1].type)
> > +       || !SAME_TYPE (unprom[0].type, out_type)))
> > +      return NULL;
> > +
> > +    unprom[0].op = diff_oprnds[0];
> > +    unprom[1].op = diff_oprnds[1];
> > +    tree signed_out = signed_type_for (out_type);
> > +    tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> > +    vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> > +                        signed_out, unprom, signed_out_vectype);
> > +
> > +    if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
> > +      return NULL;
> > +  }
> > +
> > +  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
> > +      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
> > +    return NULL;
> > +
> > +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> > +
> > +  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);
> > +  if (!vectype
> > +      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> > +                                         OPTIMIZE_FOR_SPEED))
> > +    return NULL;
> > +
> > +  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
> > +
> > +  tree var = vect_recog_temp_ssa_var (out_type, NULL);
> > +  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> > +                                               abd_oprnds[0], abd_oprnds[1]);
> > +  gimple_call_set_lhs (abd_stmt, var);
> > +  gimple_set_location (abd_stmt, gimple_location (last_stmt));
> > +  return abd_stmt;
> > +}
> > +
> >  /* Recognize an operation that performs ORIG_CODE on widened inputs,
> >     so that it can be treated as though it had the form:
> >
> > @@ -6439,6 +6626,7 @@ struct vect_recog_func
> >  static vect_recog_func vect_vect_recog_func_ptrs[] = {
> >    { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
> >    { vect_recog_bit_insert_pattern, "bit_insert" },
> > +  { vect_recog_abd_pattern, "abd" },
> >    { vect_recog_over_widening_pattern, "over_widening" },
> >    /* Must come after over_widening, which narrows the shift as much as
> >       possible beforehand.  */
> > --
> > 2.25.1

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] vect: Missed opportunity to use [SU]ABD
  2023-05-10  9:49   ` Richard Biener
@ 2023-05-10  9:51     ` Richard Biener
  2023-05-10 15:27       ` Richard Sandiford
  2023-05-10 13:29     ` [PATCH] vect: " Oluwatamilore Adebayo
  1 sibling, 1 reply; 24+ messages in thread
From: Richard Biener @ 2023-05-10  9:51 UTC (permalink / raw)
  To: Oluwatamilore Adebayo, gcc-patches, richard.guenther, richard.sandiford

On Wed, May 10, 2023 at 11:49 AM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Wed, May 10, 2023 at 11:01 AM Richard Sandiford
> <richard.sandiford@arm.com> wrote:
> >
> > Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com> writes:
> > > From 0b5f469171c340ef61a48a31877d495bb77bd35f Mon Sep 17 00:00:00 2001
> > > From: oluade01 <oluwatamilore.adebayo@arm.com>
> > > Date: Fri, 14 Apr 2023 10:24:43 +0100
> > > Subject: [PATCH 1/4] Missed opportunity to use [SU]ABD
> > >
> > > This adds a recognition pattern for the non-widening
> > > absolute difference (ABD).
> > >
> > > gcc/ChangeLog:
> > >
> > >         * doc/md.texi (sabd, uabd): Document them.
> > >         * internal-fn.def (ABD): Use new optab.
> > >         * optabs.def (sabd_optab, uabd_optab): New optabs,
> > >         * tree-vect-patterns.cc (vect_recog_absolute_difference):
> > >         Recognize the following idiom abs (a - b).
> > >         (vect_recog_sad_pattern): Refactor to use
> > >         vect_recog_absolute_difference.
> > >         (vect_recog_abd_pattern): Use patterns found by
> > >         vect_recog_absolute_difference to build a new ABD
> > >         internal call.
> > > ---
> > >  gcc/doc/md.texi           |  10 ++
> > >  gcc/internal-fn.def       |   3 +
> > >  gcc/optabs.def            |   2 +
> > >  gcc/tree-vect-patterns.cc | 250 +++++++++++++++++++++++++++++++++-----
> > >  4 files changed, 234 insertions(+), 31 deletions(-)
> > >
> > > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> > > index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..0ad546c63a8deebb4b6db894f437d1e21f0245a8 100644
> > > --- a/gcc/doc/md.texi
> > > +++ b/gcc/doc/md.texi
> > > @@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
> > >  Vector shift and rotate instructions that take vectors as operand 2
> > >  instead of a scalar type.
> > >
> > > +@cindex @code{uabd@var{m}} instruction pattern
> > > +@cindex @code{sabd@var{m}} instruction pattern
> > > +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
> > > +Signed and unsigned absolute difference instructions.  These
> > > +instructions find the difference between operands 1 and 2
> > > +then return the absolute value.  A C code equivalent would be:
> > > +@smallexample
> > > +op0 = abs (op0 - op1)
> >
> > op0 = abs (op1 - op2)
> >
> > But that isn't the correct calculation for unsigned (where abs doesn't
> > really work).  It also doesn't handle some cases correctly for signed.
> >
> > I think it's more:
> >
> >   op0 = op1 > op2 ? (unsigned type) op1 - op2 : (unsigned type) op2 - op1
> >
> > or (conceptually) max minus min.
> >
> > E.g. for 16-bit values, the absolute difference between signed 0x7fff
> > and signed -0x8000 is 0xffff (reinterpreted as -1 if you cast back
> > to signed).  But, ignoring undefined behaviour:
> >
> >   0x7fff - 0x8000 = -1
> >   abs(-1) = 1
> >
> > which gives the wrong answer.
> >
> > We might still be able to fold C abs(a - b) to abd for signed a and b
> > by relying on undefined behaviour (TYPE_OVERFLOW_UNDEFINED).  But we
> > can't do it for -fwrapv.
> >
> > Richi knows better than me what would be appropriate here.
>
> The question is what does the hardware do?  For the widening [us]sad it's
> obvious since the difference is computed in a wider signed mode and the
> absolute value always fits.
>
> So what does it actually do, esp. when the difference yields 0x8000?

A "sensible" definition would be that it works like the widening [us]sad
and applies truncation to the result (modulo-reducing when the result
isn't always unsigned).

Richard.

> Richard.
>
> >
> > Thanks,
> > Richard
> >
> > > +@end smallexample
> > > +
> > >  @cindex @code{avg@var{m}3_floor} instruction pattern
> > >  @cindex @code{uavg@var{m}3_floor} instruction pattern
> > >  @item @samp{avg@var{m}3_floor}
> > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > > index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
> > > --- a/gcc/internal-fn.def
> > > +++ b/gcc/internal-fn.def
> > > @@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
> > >  DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
> > >  DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
> > >
> > > +DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
> > > +                             sabd, uabd, binary)
> > > +
> > >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
> > >                               savg_floor, uavg_floor, binary)
> > >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
> > > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > > index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
> > > --- a/gcc/optabs.def
> > > +++ b/gcc/optabs.def
> > > @@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
> > >  OPTAB_D (extract_last_optab, "extract_last_$a")
> > >  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
> > >
> > > +OPTAB_D (uabd_optab, "uabd$a3")
> > > +OPTAB_D (sabd_optab, "sabd$a3")
> > >  OPTAB_D (savg_floor_optab, "avg$a3_floor")
> > >  OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
> > >  OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
> > > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> > > index a49b09539776c0056e77f99b10365d0a8747fbc5..91e1f9d4b610275dd833ec56dc77f76367ee7886 100644
> > > --- a/gcc/tree-vect-patterns.cc
> > > +++ b/gcc/tree-vect-patterns.cc
> > > @@ -770,6 +770,89 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
> > >      }
> > >  }
> > >
> > > +/* Look for the following pattern
> > > +       X = x[i]
> > > +       Y = y[i]
> > > +       DIFF = X - Y
> > > +       DAD = ABS_EXPR<DIFF>
> > > + */
> > > +static bool
> > > +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> > > +                               tree *half_type, bool reject_unsigned,
> > > +                               vect_unpromoted_value unprom[2],
> > > +                               tree diff_oprnds[2])
> > > +{
> > > +  if (!abs_stmt)
> > > +    return false;
> > > +
> > > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > > +     inside the loop (in case we are analyzing an outer-loop).  */
> > > +  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
> > > +  if (code != ABS_EXPR && code != ABSU_EXPR)
> > > +    return false;
> > > +
> > > +  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > > +  tree abs_type = TREE_TYPE (abs_oprnd);
> > > +  if (!abs_oprnd)
> > > +    return false;
> > > +  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
> > > +    return false;
> > > +  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
> > > +    return false;
> > > +
> > > +  /* Peel off conversions from the ABS input.  This can involve sign
> > > +     changes (e.g.  from an unsigned subtraction to a signed ABS input)
> > > +     or signed promotion, but it can't include unsigned promotion.
> > > +     (Note that ABS of an unsigned promotion should have been folded
> > > +     away before now anyway.)  */
> > > +  vect_unpromoted_value unprom_diff;
> > > +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > > +                                                   &unprom_diff);
> > > +  if (!abs_oprnd)
> > > +    return false;
> > > +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > > +      && TYPE_UNSIGNED (unprom_diff.type))
> > > +    if (!reject_unsigned)
> > > +      return false;
> > > +
> > > +  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > > +  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > > +  if (!diff_stmt_vinfo)
> > > +    return false;
> > > +
> > > +  bool assigned_oprnds = false;
> > > +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> > > +  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
> > > +  {
> > > +    assigned_oprnds = true;
> > > +    diff_oprnds[0] = gimple_assign_rhs1 (diff);
> > > +    diff_oprnds[1] = gimple_assign_rhs2 (diff);
> > > +  }
> > > +
> > > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > > +     inside the loop (in case we are analyzing an outer-loop).  */
> > > +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
> > > +                            WIDEN_MINUS_EXPR,
> > > +                            false, 2, unprom, half_type))
> > > +  {
> > > +    if (diff_oprnds && !assigned_oprnds)
> > > +    {
> > > +      diff_oprnds[0] = unprom[0].op;
> > > +      diff_oprnds[1] = unprom[1].op;
> > > +    }
> > > +  }
> > > +  else if (!assigned_oprnds)
> > > +  {
> > > +    return false;
> > > +  }
> > > +  else
> > > +  {
> > > +    *half_type = NULL_TREE;
> > > +  }
> > > +
> > > +  return true;
> > > +}
> > > +
> > >  /* Convert UNPROM to TYPE and return the result, adding new statements
> > >     to STMT_INFO's pattern definition statements if no better way is
> > >     available.  VECTYPE is the vector form of TYPE.
> > > @@ -1308,40 +1391,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
> > >    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > >       inside the loop (in case we are analyzing an outer-loop).  */
> > >    gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
> > > -  if (!abs_stmt
> > > -      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
> > > -         && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
> > > -    return NULL;
> > > -
> > > -  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > > -  tree abs_type = TREE_TYPE (abs_oprnd);
> > > -  if (TYPE_UNSIGNED (abs_type))
> > > -    return NULL;
> > > -
> > > -  /* Peel off conversions from the ABS input.  This can involve sign
> > > -     changes (e.g. from an unsigned subtraction to a signed ABS input)
> > > -     or signed promotion, but it can't include unsigned promotion.
> > > -     (Note that ABS of an unsigned promotion should have been folded
> > > -     away before now anyway.)  */
> > > -  vect_unpromoted_value unprom_diff;
> > > -  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > > -                                                   &unprom_diff);
> > > -  if (!abs_oprnd)
> > > -    return NULL;
> > > -  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > > -      && TYPE_UNSIGNED (unprom_diff.type))
> > > -    return NULL;
> > >
> > > -  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > > -  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > > -  if (!diff_stmt_vinfo)
> > > +  vect_unpromoted_value unprom[2];
> > > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > > +                                      true, unprom, NULL))
> > >      return NULL;
> > >
> > > -  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > > -     inside the loop (in case we are analyzing an outer-loop).  */
> > > -  vect_unpromoted_value unprom[2];
> > > -  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
> > > -                            false, 2, unprom, &half_type))
> > > +  if (!half_type)
> > >      return NULL;
> > >
> > >    vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
> > > @@ -1363,6 +1419,137 @@ vect_recog_sad_pattern (vec_info *vinfo,
> > >    return pattern_stmt;
> > >  }
> > >
> > > +/* Function vect_recog_abd_pattern
> > > +
> > > +   Try to find the following ABsolute Difference (ABD) pattern:
> > > +
> > > +     VTYPE x, y, out;
> > > +     type diff;
> > > +   loop i in range:
> > > +     S1 diff = x[i] - y[i]
> > > +     S2 out[i] = ABS_EXPR <diff>;
> > > +
> > > +   where 'type' is a integer and 'VTYPE' is a vector of integers
> > > +   the same size as 'type'
> > > +
> > > +   Input:
> > > +
> > > +   * STMT_VINFO: The stmt from which the pattern search begins
> > > +
> > > +   Output:
> > > +
> > > +   * TYPE_out: The type of the output of this pattern
> > > +
> > > +   * Return value: A new stmt that will be used to replace the sequence of
> > > +     stmts that constitute the pattern; either SABD or UABD:
> > > +       SABD_EXPR<x, y, out>
> > > +       UABD_EXPR<x, y, out>
> > > +
> > > +      UABD expressions are used when the input types are
> > > +      narrower than the output types or the output type is narrower
> > > +      than 32 bits
> > > + */
> > > +
> > > +static gimple *
> > > +vect_recog_abd_pattern (vec_info *vinfo,
> > > +               stmt_vec_info stmt_vinfo, tree *type_out)
> > > +{
> > > +  /* Look for the following patterns
> > > +       X = x[i]
> > > +       Y = y[i]
> > > +       DIFF = X - Y
> > > +       DAD = ABS_EXPR<DIFF>
> > > +       out[i] = DAD
> > > +
> > > +     In which
> > > +      - X, Y, DIFF, DAD all have the same type
> > > +      - x, y, out are all vectors of the same type
> > > +  */
> > > +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> > > +  if (!last_stmt)
> > > +    return NULL;
> > > +
> > > +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> > > +
> > > +  gassign *abs_stmt = last_stmt;
> > > +  if (gimple_assign_cast_p (last_stmt))
> > > +  {
> > > +    tree last_rhs = gimple_assign_rhs1 (last_stmt);
> > > +    if (!SSA_VAR_P (last_rhs))
> > > +      return NULL;
> > > +
> > > +    abs_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (last_rhs));
> > > +    if (!abs_stmt)
> > > +      return NULL;
> > > +  }
> > > +
> > > +  vect_unpromoted_value unprom[2];
> > > +  tree diff_oprnds[2];
> > > +  tree half_type;
> > > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > > +                                      false, unprom, diff_oprnds))
> > > +    return NULL;
> > > +
> > > +#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
> > > +
> > > +  tree abd_oprnds[2];
> > > +  if (half_type)
> > > +  {
> > > +    if (!SAME_TYPE (unprom[0].type, unprom[1].type))
> > > +      return NULL;
> > > +
> > > +    tree diff_type = TREE_TYPE (diff_oprnds[0]);
> > > +    if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
> > > +    {
> > > +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds, half_type, unprom,
> > > +                          get_vectype_for_scalar_type (vinfo, half_type));
> > > +    }
> > > +    else
> > > +    {
> > > +      abd_oprnds[0] = diff_oprnds[0];
> > > +      abd_oprnds[1] = diff_oprnds[1];
> > > +    }
> > > +  }
> > > +  else
> > > +  {
> > > +    if (unprom[0].op && unprom[1].op
> > > +       && (!SAME_TYPE (unprom[0].type, unprom[1].type)
> > > +       || !SAME_TYPE (unprom[0].type, out_type)))
> > > +      return NULL;
> > > +
> > > +    unprom[0].op = diff_oprnds[0];
> > > +    unprom[1].op = diff_oprnds[1];
> > > +    tree signed_out = signed_type_for (out_type);
> > > +    tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> > > +    vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> > > +                        signed_out, unprom, signed_out_vectype);
> > > +
> > > +    if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
> > > +      return NULL;
> > > +  }
> > > +
> > > +  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
> > > +      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
> > > +    return NULL;
> > > +
> > > +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> > > +
> > > +  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);
> > > +  if (!vectype
> > > +      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> > > +                                         OPTIMIZE_FOR_SPEED))
> > > +    return NULL;
> > > +
> > > +  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
> > > +
> > > +  tree var = vect_recog_temp_ssa_var (out_type, NULL);
> > > +  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> > > +                                               abd_oprnds[0], abd_oprnds[1]);
> > > +  gimple_call_set_lhs (abd_stmt, var);
> > > +  gimple_set_location (abd_stmt, gimple_location (last_stmt));
> > > +  return abd_stmt;
> > > +}
> > > +
> > >  /* Recognize an operation that performs ORIG_CODE on widened inputs,
> > >     so that it can be treated as though it had the form:
> > >
> > > @@ -6439,6 +6626,7 @@ struct vect_recog_func
> > >  static vect_recog_func vect_vect_recog_func_ptrs[] = {
> > >    { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
> > >    { vect_recog_bit_insert_pattern, "bit_insert" },
> > > +  { vect_recog_abd_pattern, "abd" },
> > >    { vect_recog_over_widening_pattern, "over_widening" },
> > >    /* Must come after over_widening, which narrows the shift as much as
> > >       possible beforehand.  */
> > > --
> > > 2.25.1

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] vect: Missed opportunity to use [SU]ABD
  2023-05-10  9:49   ` Richard Biener
  2023-05-10  9:51     ` Richard Biener
@ 2023-05-10 13:29     ` Oluwatamilore Adebayo
  2023-05-15 12:35       ` Oluwatamilore Adebayo
  1 sibling, 1 reply; 24+ messages in thread
From: Oluwatamilore Adebayo @ 2023-05-10 13:29 UTC (permalink / raw)
  To: Richard Biener, gcc-patches, Richard Sandiford

[-- Attachment #1: Type: text/plain, Size: 16843 bytes --]

When using inputs of 0x7fff and 0x8000 the result yielded is -1.
When using inputs of -1 and 0x7fff the results yielded is 0x8000.

Tami
________________________________
From: Richard Biener <richard.guenther@gmail.com>
Sent: Wednesday, May 10, 2023 10:49 AM
To: Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; richard.guenther@gmail.com <richard.guenther@gmail.com>; Richard Sandiford <Richard.Sandiford@arm.com>
Subject: Re: [PATCH] vect: Missed opportunity to use [SU]ABD

On Wed, May 10, 2023 at 11:01 AM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com> writes:
> > From 0b5f469171c340ef61a48a31877d495bb77bd35f Mon Sep 17 00:00:00 2001
> > From: oluade01 <oluwatamilore.adebayo@arm.com>
> > Date: Fri, 14 Apr 2023 10:24:43 +0100
> > Subject: [PATCH 1/4] Missed opportunity to use [SU]ABD
> >
> > This adds a recognition pattern for the non-widening
> > absolute difference (ABD).
> >
> > gcc/ChangeLog:
> >
> >         * doc/md.texi (sabd, uabd): Document them.
> >         * internal-fn.def (ABD): Use new optab.
> >         * optabs.def (sabd_optab, uabd_optab): New optabs,
> >         * tree-vect-patterns.cc (vect_recog_absolute_difference):
> >         Recognize the following idiom abs (a - b).
> >         (vect_recog_sad_pattern): Refactor to use
> >         vect_recog_absolute_difference.
> >         (vect_recog_abd_pattern): Use patterns found by
> >         vect_recog_absolute_difference to build a new ABD
> >         internal call.
> > ---
> >  gcc/doc/md.texi           |  10 ++
> >  gcc/internal-fn.def       |   3 +
> >  gcc/optabs.def            |   2 +
> >  gcc/tree-vect-patterns.cc | 250 +++++++++++++++++++++++++++++++++-----
> >  4 files changed, 234 insertions(+), 31 deletions(-)
> >
> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> > index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..0ad546c63a8deebb4b6db894f437d1e21f0245a8 100644
> > --- a/gcc/doc/md.texi
> > +++ b/gcc/doc/md.texi
> > @@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
> >  Vector shift and rotate instructions that take vectors as operand 2
> >  instead of a scalar type.
> >
> > +@cindex @code{uabd@var{m}} instruction pattern
> > +@cindex @code{sabd@var{m}} instruction pattern
> > +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
> > +Signed and unsigned absolute difference instructions.  These
> > +instructions find the difference between operands 1 and 2
> > +then return the absolute value.  A C code equivalent would be:
> > +@smallexample
> > +op0 = abs (op0 - op1)
>
> op0 = abs (op1 - op2)
>
> But that isn't the correct calculation for unsigned (where abs doesn't
> really work).  It also doesn't handle some cases correctly for signed.
>
> I think it's more:
>
>   op0 = op1 > op2 ? (unsigned type) op1 - op2 : (unsigned type) op2 - op1
>
> or (conceptually) max minus min.
>
> E.g. for 16-bit values, the absolute difference between signed 0x7fff
> and signed -0x8000 is 0xffff (reinterpreted as -1 if you cast back
> to signed).  But, ignoring undefined behaviour:
>
>   0x7fff - 0x8000 = -1
>   abs(-1) = 1
>
> which gives the wrong answer.
>
> We might still be able to fold C abs(a - b) to abd for signed a and b
> by relying on undefined behaviour (TYPE_OVERFLOW_UNDEFINED).  But we
> can't do it for -fwrapv.
>
> Richi knows better than me what would be appropriate here.

The question is what does the hardware do?  For the widening [us]sad it's
obvious since the difference is computed in a wider signed mode and the
absolute value always fits.

So what does it actually do, esp. when the difference yields 0x8000?

Richard.

>
> Thanks,
> Richard
>
> > +@end smallexample
> > +
> >  @cindex @code{avg@var{m}3_floor} instruction pattern
> >  @cindex @code{uavg@var{m}3_floor} instruction pattern
> >  @item @samp{avg@var{m}3_floor}
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
> >  DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
> >  DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
> >
> > +DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
> > +                             sabd, uabd, binary)
> > +
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
> >                               savg_floor, uavg_floor, binary)
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
> >  OPTAB_D (extract_last_optab, "extract_last_$a")
> >  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
> >
> > +OPTAB_D (uabd_optab, "uabd$a3")
> > +OPTAB_D (sabd_optab, "sabd$a3")
> >  OPTAB_D (savg_floor_optab, "avg$a3_floor")
> >  OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
> >  OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
> > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> > index a49b09539776c0056e77f99b10365d0a8747fbc5..91e1f9d4b610275dd833ec56dc77f76367ee7886 100644
> > --- a/gcc/tree-vect-patterns.cc
> > +++ b/gcc/tree-vect-patterns.cc
> > @@ -770,6 +770,89 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
> >      }
> >  }
> >
> > +/* Look for the following pattern
> > +       X = x[i]
> > +       Y = y[i]
> > +       DIFF = X - Y
> > +       DAD = ABS_EXPR<DIFF>
> > + */
> > +static bool
> > +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> > +                               tree *half_type, bool reject_unsigned,
> > +                               vect_unpromoted_value unprom[2],
> > +                               tree diff_oprnds[2])
> > +{
> > +  if (!abs_stmt)
> > +    return false;
> > +
> > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > +     inside the loop (in case we are analyzing an outer-loop).  */
> > +  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
> > +  if (code != ABS_EXPR && code != ABSU_EXPR)
> > +    return false;
> > +
> > +  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > +  tree abs_type = TREE_TYPE (abs_oprnd);
> > +  if (!abs_oprnd)
> > +    return false;
> > +  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
> > +    return false;
> > +  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
> > +    return false;
> > +
> > +  /* Peel off conversions from the ABS input.  This can involve sign
> > +     changes (e.g.  from an unsigned subtraction to a signed ABS input)
> > +     or signed promotion, but it can't include unsigned promotion.
> > +     (Note that ABS of an unsigned promotion should have been folded
> > +     away before now anyway.)  */
> > +  vect_unpromoted_value unprom_diff;
> > +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > +                                                   &unprom_diff);
> > +  if (!abs_oprnd)
> > +    return false;
> > +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > +      && TYPE_UNSIGNED (unprom_diff.type))
> > +    if (!reject_unsigned)
> > +      return false;
> > +
> > +  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > +  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > +  if (!diff_stmt_vinfo)
> > +    return false;
> > +
> > +  bool assigned_oprnds = false;
> > +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> > +  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
> > +  {
> > +    assigned_oprnds = true;
> > +    diff_oprnds[0] = gimple_assign_rhs1 (diff);
> > +    diff_oprnds[1] = gimple_assign_rhs2 (diff);
> > +  }
> > +
> > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > +     inside the loop (in case we are analyzing an outer-loop).  */
> > +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
> > +                            WIDEN_MINUS_EXPR,
> > +                            false, 2, unprom, half_type))
> > +  {
> > +    if (diff_oprnds && !assigned_oprnds)
> > +    {
> > +      diff_oprnds[0] = unprom[0].op;
> > +      diff_oprnds[1] = unprom[1].op;
> > +    }
> > +  }
> > +  else if (!assigned_oprnds)
> > +  {
> > +    return false;
> > +  }
> > +  else
> > +  {
> > +    *half_type = NULL_TREE;
> > +  }
> > +
> > +  return true;
> > +}
> > +
> >  /* Convert UNPROM to TYPE and return the result, adding new statements
> >     to STMT_INFO's pattern definition statements if no better way is
> >     available.  VECTYPE is the vector form of TYPE.
> > @@ -1308,40 +1391,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
> >    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> >       inside the loop (in case we are analyzing an outer-loop).  */
> >    gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
> > -  if (!abs_stmt
> > -      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
> > -         && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
> > -    return NULL;
> > -
> > -  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > -  tree abs_type = TREE_TYPE (abs_oprnd);
> > -  if (TYPE_UNSIGNED (abs_type))
> > -    return NULL;
> > -
> > -  /* Peel off conversions from the ABS input.  This can involve sign
> > -     changes (e.g. from an unsigned subtraction to a signed ABS input)
> > -     or signed promotion, but it can't include unsigned promotion.
> > -     (Note that ABS of an unsigned promotion should have been folded
> > -     away before now anyway.)  */
> > -  vect_unpromoted_value unprom_diff;
> > -  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > -                                                   &unprom_diff);
> > -  if (!abs_oprnd)
> > -    return NULL;
> > -  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > -      && TYPE_UNSIGNED (unprom_diff.type))
> > -    return NULL;
> >
> > -  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > -  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > -  if (!diff_stmt_vinfo)
> > +  vect_unpromoted_value unprom[2];
> > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > +                                      true, unprom, NULL))
> >      return NULL;
> >
> > -  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > -     inside the loop (in case we are analyzing an outer-loop).  */
> > -  vect_unpromoted_value unprom[2];
> > -  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
> > -                            false, 2, unprom, &half_type))
> > +  if (!half_type)
> >      return NULL;
> >
> >    vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
> > @@ -1363,6 +1419,137 @@ vect_recog_sad_pattern (vec_info *vinfo,
> >    return pattern_stmt;
> >  }
> >
> > +/* Function vect_recog_abd_pattern
> > +
> > +   Try to find the following ABsolute Difference (ABD) pattern:
> > +
> > +     VTYPE x, y, out;
> > +     type diff;
> > +   loop i in range:
> > +     S1 diff = x[i] - y[i]
> > +     S2 out[i] = ABS_EXPR <diff>;
> > +
> > +   where 'type' is a integer and 'VTYPE' is a vector of integers
> > +   the same size as 'type'
> > +
> > +   Input:
> > +
> > +   * STMT_VINFO: The stmt from which the pattern search begins
> > +
> > +   Output:
> > +
> > +   * TYPE_out: The type of the output of this pattern
> > +
> > +   * Return value: A new stmt that will be used to replace the sequence of
> > +     stmts that constitute the pattern; either SABD or UABD:
> > +       SABD_EXPR<x, y, out>
> > +       UABD_EXPR<x, y, out>
> > +
> > +      UABD expressions are used when the input types are
> > +      narrower than the output types or the output type is narrower
> > +      than 32 bits
> > + */
> > +
> > +static gimple *
> > +vect_recog_abd_pattern (vec_info *vinfo,
> > +               stmt_vec_info stmt_vinfo, tree *type_out)
> > +{
> > +  /* Look for the following patterns
> > +       X = x[i]
> > +       Y = y[i]
> > +       DIFF = X - Y
> > +       DAD = ABS_EXPR<DIFF>
> > +       out[i] = DAD
> > +
> > +     In which
> > +      - X, Y, DIFF, DAD all have the same type
> > +      - x, y, out are all vectors of the same type
> > +  */
> > +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> > +  if (!last_stmt)
> > +    return NULL;
> > +
> > +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> > +
> > +  gassign *abs_stmt = last_stmt;
> > +  if (gimple_assign_cast_p (last_stmt))
> > +  {
> > +    tree last_rhs = gimple_assign_rhs1 (last_stmt);
> > +    if (!SSA_VAR_P (last_rhs))
> > +      return NULL;
> > +
> > +    abs_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (last_rhs));
> > +    if (!abs_stmt)
> > +      return NULL;
> > +  }
> > +
> > +  vect_unpromoted_value unprom[2];
> > +  tree diff_oprnds[2];
> > +  tree half_type;
> > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > +                                      false, unprom, diff_oprnds))
> > +    return NULL;
> > +
> > +#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
> > +
> > +  tree abd_oprnds[2];
> > +  if (half_type)
> > +  {
> > +    if (!SAME_TYPE (unprom[0].type, unprom[1].type))
> > +      return NULL;
> > +
> > +    tree diff_type = TREE_TYPE (diff_oprnds[0]);
> > +    if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
> > +    {
> > +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds, half_type, unprom,
> > +                          get_vectype_for_scalar_type (vinfo, half_type));
> > +    }
> > +    else
> > +    {
> > +      abd_oprnds[0] = diff_oprnds[0];
> > +      abd_oprnds[1] = diff_oprnds[1];
> > +    }
> > +  }
> > +  else
> > +  {
> > +    if (unprom[0].op && unprom[1].op
> > +       && (!SAME_TYPE (unprom[0].type, unprom[1].type)
> > +       || !SAME_TYPE (unprom[0].type, out_type)))
> > +      return NULL;
> > +
> > +    unprom[0].op = diff_oprnds[0];
> > +    unprom[1].op = diff_oprnds[1];
> > +    tree signed_out = signed_type_for (out_type);
> > +    tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> > +    vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> > +                        signed_out, unprom, signed_out_vectype);
> > +
> > +    if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
> > +      return NULL;
> > +  }
> > +
> > +  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
> > +      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
> > +    return NULL;
> > +
> > +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> > +
> > +  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);
> > +  if (!vectype
> > +      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> > +                                         OPTIMIZE_FOR_SPEED))
> > +    return NULL;
> > +
> > +  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
> > +
> > +  tree var = vect_recog_temp_ssa_var (out_type, NULL);
> > +  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> > +                                               abd_oprnds[0], abd_oprnds[1]);
> > +  gimple_call_set_lhs (abd_stmt, var);
> > +  gimple_set_location (abd_stmt, gimple_location (last_stmt));
> > +  return abd_stmt;
> > +}
> > +
> >  /* Recognize an operation that performs ORIG_CODE on widened inputs,
> >     so that it can be treated as though it had the form:
> >
> > @@ -6439,6 +6626,7 @@ struct vect_recog_func
> >  static vect_recog_func vect_vect_recog_func_ptrs[] = {
> >    { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
> >    { vect_recog_bit_insert_pattern, "bit_insert" },
> > +  { vect_recog_abd_pattern, "abd" },
> >    { vect_recog_over_widening_pattern, "over_widening" },
> >    /* Must come after over_widening, which narrows the shift as much as
> >       possible beforehand.  */
> > --
> > 2.25.1

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] vect: Missed opportunity to use [SU]ABD
  2023-05-10  9:51     ` Richard Biener
@ 2023-05-10 15:27       ` Richard Sandiford
  2023-05-17 12:21         ` oluwatamilore.adebayo
  0 siblings, 1 reply; 24+ messages in thread
From: Richard Sandiford @ 2023-05-10 15:27 UTC (permalink / raw)
  To: Richard Biener; +Cc: Oluwatamilore Adebayo, gcc-patches

Richard Biener <richard.guenther@gmail.com> writes:
> On Wed, May 10, 2023 at 11:49 AM Richard Biener
> <richard.guenther@gmail.com> wrote:
>>
>> On Wed, May 10, 2023 at 11:01 AM Richard Sandiford
>> <richard.sandiford@arm.com> wrote:
>> >
>> > Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com> writes:
>> > > From 0b5f469171c340ef61a48a31877d495bb77bd35f Mon Sep 17 00:00:00 2001
>> > > From: oluade01 <oluwatamilore.adebayo@arm.com>
>> > > Date: Fri, 14 Apr 2023 10:24:43 +0100
>> > > Subject: [PATCH 1/4] Missed opportunity to use [SU]ABD
>> > >
>> > > This adds a recognition pattern for the non-widening
>> > > absolute difference (ABD).
>> > >
>> > > gcc/ChangeLog:
>> > >
>> > >         * doc/md.texi (sabd, uabd): Document them.
>> > >         * internal-fn.def (ABD): Use new optab.
>> > >         * optabs.def (sabd_optab, uabd_optab): New optabs,
>> > >         * tree-vect-patterns.cc (vect_recog_absolute_difference):
>> > >         Recognize the following idiom abs (a - b).
>> > >         (vect_recog_sad_pattern): Refactor to use
>> > >         vect_recog_absolute_difference.
>> > >         (vect_recog_abd_pattern): Use patterns found by
>> > >         vect_recog_absolute_difference to build a new ABD
>> > >         internal call.
>> > > ---
>> > >  gcc/doc/md.texi           |  10 ++
>> > >  gcc/internal-fn.def       |   3 +
>> > >  gcc/optabs.def            |   2 +
>> > >  gcc/tree-vect-patterns.cc | 250 +++++++++++++++++++++++++++++++++-----
>> > >  4 files changed, 234 insertions(+), 31 deletions(-)
>> > >
>> > > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
>> > > index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..0ad546c63a8deebb4b6db894f437d1e21f0245a8 100644
>> > > --- a/gcc/doc/md.texi
>> > > +++ b/gcc/doc/md.texi
>> > > @@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
>> > >  Vector shift and rotate instructions that take vectors as operand 2
>> > >  instead of a scalar type.
>> > >
>> > > +@cindex @code{uabd@var{m}} instruction pattern
>> > > +@cindex @code{sabd@var{m}} instruction pattern
>> > > +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
>> > > +Signed and unsigned absolute difference instructions.  These
>> > > +instructions find the difference between operands 1 and 2
>> > > +then return the absolute value.  A C code equivalent would be:
>> > > +@smallexample
>> > > +op0 = abs (op0 - op1)
>> >
>> > op0 = abs (op1 - op2)
>> >
>> > But that isn't the correct calculation for unsigned (where abs doesn't
>> > really work).  It also doesn't handle some cases correctly for signed.
>> >
>> > I think it's more:
>> >
>> >   op0 = op1 > op2 ? (unsigned type) op1 - op2 : (unsigned type) op2 - op1
>> >
>> > or (conceptually) max minus min.
>> >
>> > E.g. for 16-bit values, the absolute difference between signed 0x7fff
>> > and signed -0x8000 is 0xffff (reinterpreted as -1 if you cast back
>> > to signed).  But, ignoring undefined behaviour:
>> >
>> >   0x7fff - 0x8000 = -1
>> >   abs(-1) = 1
>> >
>> > which gives the wrong answer.
>> >
>> > We might still be able to fold C abs(a - b) to abd for signed a and b
>> > by relying on undefined behaviour (TYPE_OVERFLOW_UNDEFINED).  But we
>> > can't do it for -fwrapv.
>> >
>> > Richi knows better than me what would be appropriate here.
>>
>> The question is what does the hardware do?  For the widening [us]sad it's
>> obvious since the difference is computed in a wider signed mode and the
>> absolute value always fits.
>>
>> So what does it actually do, esp. when the difference yields 0x8000?
>
> A "sensible" definition would be that it works like the widening [us]sad
> and applies truncation to the result (modulo-reducing when the result
> isn't always unsigned).

Yeah.  Like Tami says, this is what the instruction does.

I think all three definitions are equivalent: the extend/operate/truncate
one, the ?: one above, and the "max - min" one.  Probably just personal
preference as to which seems more natural.

Reading the patch again, it does check TYPE_OVERFLOW_WRAPS, so -fwrapv
might be handled correctly after all.  Sorry for missing it first time.

On the patch:

> +/* Look for the following pattern
> +	X = x[i]
> +	Y = y[i]
> +	DIFF = X - Y
> +	DAD = ABS_EXPR<DIFF>
> + */
> +static bool
> +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> +				tree *half_type, bool reject_unsigned,
> +				vect_unpromoted_value unprom[2],
> +				tree diff_oprnds[2])

It would be good to document what the parameters mean (except VINFO,
which is obvious).

> +  /* Peel off conversions from the ABS input.  This can involve sign
> +     changes (e.g.  from an unsigned subtraction to a signed ABS input)
> +     or signed promotion, but it can't include unsigned promotion.
> +     (Note that ABS of an unsigned promotion should have been folded
> +     away before now anyway.)  */
> +  vect_unpromoted_value unprom_diff;
> +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> +						    &unprom_diff);
> +  if (!abs_oprnd)
> +    return false;
> +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> +      && TYPE_UNSIGNED (unprom_diff.type))
> +    if (!reject_unsigned)
> +      return false;

I think this should instead be:

  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
      && TYPE_UNSIGNED (unprom_diff.type)
      && TYPE_UNSIGNED (abs_type))
    return false;

The point is that if any promotion (from "A" to "B" say) happens on the
input to the ABS, it has to be signed rather than unsigned promotion,
so that the extra bits in B get filled with the sign of A.  Without that,
the ABS will always be a no-op (which is why it should have been removed
by now).

> +  bool assigned_oprnds = false;
> +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> +  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
> +  {
> +    assigned_oprnds = true;
> +    diff_oprnds[0] = gimple_assign_rhs1 (diff);
> +    diff_oprnds[1] = gimple_assign_rhs2 (diff);
> +  }
> +
> +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> +     inside the loop (in case we are analyzing an outer-loop).  */
> +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
> +			     WIDEN_MINUS_EXPR,
> +			     false, 2, unprom, half_type))
> +  {
> +    if (diff_oprnds && !assigned_oprnds)
> +    {
> +      diff_oprnds[0] = unprom[0].op;
> +      diff_oprnds[1] = unprom[1].op;
> +    }
> +  }
> +  else if (!assigned_oprnds)
> +  {
> +    return false;
> +  }
> +  else
> +  {
> +    *half_type = NULL_TREE;
> +  }
> +
> +  return true;
> +}

I think the code would be easier to follow if it used vect_widened_op_tree
first, and only considered the unextended case on failure.

Minor formatting nit: GCC style is to indent braces by two spaces
further than an "if":

  if (...)
    {
      ...
    }

Thanks,
Richard

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH] vect: Missed opportunity to use [SU]ABD
  2023-05-10 13:29     ` [PATCH] vect: " Oluwatamilore Adebayo
@ 2023-05-15 12:35       ` Oluwatamilore Adebayo
  0 siblings, 0 replies; 24+ messages in thread
From: Oluwatamilore Adebayo @ 2023-05-15 12:35 UTC (permalink / raw)
  To: Richard Biener, gcc-patches, Richard Sandiford

[-- Attachment #1: Type: text/plain, Size: 17481 bytes --]



From: Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com>
Sent: Wednesday, May 10, 2023 14:29
To: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; Richard Sandiford <Richard.Sandiford@arm.com>
Subject: Re: [PATCH] vect: Missed opportunity to use [SU]ABD

When using inputs of 0x7fff and 0x8000 the result yielded is -1.
When using inputs of -1 and 0x7fff the results yielded is 0x8000.

Tami
________________________________
From: Richard Biener <richard.guenther@gmail.com<mailto:richard.guenther@gmail.com>>
Sent: Wednesday, May 10, 2023 10:49 AM
To: Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com<mailto:Oluwatamilore.Adebayo@arm.com>>; gcc-patches@gcc.gnu.org<mailto:gcc-patches@gcc.gnu.org> <gcc-patches@gcc.gnu.org<mailto:gcc-patches@gcc.gnu.org>>; richard.guenther@gmail.com<mailto:richard.guenther@gmail.com> <richard.guenther@gmail.com<mailto:richard.guenther@gmail.com>>; Richard Sandiford <Richard.Sandiford@arm.com<mailto:Richard.Sandiford@arm.com>>
Subject: Re: [PATCH] vect: Missed opportunity to use [SU]ABD

On Wed, May 10, 2023 at 11:01 AM Richard Sandiford
<richard.sandiford@arm.com<mailto:richard.sandiford@arm.com>> wrote:
>
> Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com<mailto:Oluwatamilore.Adebayo@arm.com>> writes:
> > From 0b5f469171c340ef61a48a31877d495bb77bd35f Mon Sep 17 00:00:00 2001
> > From: oluade01 <oluwatamilore.adebayo@arm.com<mailto:oluwatamilore.adebayo@arm.com>>
> > Date: Fri, 14 Apr 2023 10:24:43 +0100
> > Subject: [PATCH 1/4] Missed opportunity to use [SU]ABD
> >
> > This adds a recognition pattern for the non-widening
> > absolute difference (ABD).
> >
> > gcc/ChangeLog:
> >
> >         * doc/md.texi (sabd, uabd): Document them.
> >         * internal-fn.def (ABD): Use new optab.
> >         * optabs.def (sabd_optab, uabd_optab): New optabs,
> >         * tree-vect-patterns.cc (vect_recog_absolute_difference):
> >         Recognize the following idiom abs (a - b).
> >         (vect_recog_sad_pattern): Refactor to use
> >         vect_recog_absolute_difference.
> >         (vect_recog_abd_pattern): Use patterns found by
> >         vect_recog_absolute_difference to build a new ABD
> >         internal call.
> > ---
> >  gcc/doc/md.texi           |  10 ++
> >  gcc/internal-fn.def       |   3 +
> >  gcc/optabs.def            |   2 +
> >  gcc/tree-vect-patterns.cc | 250 +++++++++++++++++++++++++++++++++-----
> >  4 files changed, 234 insertions(+), 31 deletions(-)
> >
> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> > index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..0ad546c63a8deebb4b6db894f437d1e21f0245a8 100644
> > --- a/gcc/doc/md.texi
> > +++ b/gcc/doc/md.texi
> > @@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
> >  Vector shift and rotate instructions that take vectors as operand 2
> >  instead of a scalar type.
> >
> > +@cindex @code{uabd@var{m}} instruction pattern
> > +@cindex @code{sabd@var{m}} instruction pattern
> > +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
> > +Signed and unsigned absolute difference instructions.  These
> > +instructions find the difference between operands 1 and 2
> > +then return the absolute value.  A C code equivalent would be:
> > +@smallexample
> > +op0 = abs (op0 - op1)
>
> op0 = abs (op1 - op2)
>
> But that isn't the correct calculation for unsigned (where abs doesn't
> really work).  It also doesn't handle some cases correctly for signed.
>
> I think it's more:
>
>   op0 = op1 > op2 ? (unsigned type) op1 - op2 : (unsigned type) op2 - op1
>
> or (conceptually) max minus min.
>
> E.g. for 16-bit values, the absolute difference between signed 0x7fff
> and signed -0x8000 is 0xffff (reinterpreted as -1 if you cast back
> to signed).  But, ignoring undefined behaviour:
>
>   0x7fff - 0x8000 = -1
>   abs(-1) = 1
>
> which gives the wrong answer.
>
> We might still be able to fold C abs(a - b) to abd for signed a and b
> by relying on undefined behaviour (TYPE_OVERFLOW_UNDEFINED).  But we
> can't do it for -fwrapv.
>
> Richi knows better than me what would be appropriate here.

The question is what does the hardware do?  For the widening [us]sad it's
obvious since the difference is computed in a wider signed mode and the
absolute value always fits.

So what does it actually do, esp. when the difference yields 0x8000?

Richard.

>
> Thanks,
> Richard
>
> > +@end smallexample
> > +
> >  @cindex @code{avg@var{m}3_floor} instruction pattern
> >  @cindex @code{uavg@var{m}3_floor} instruction pattern
> >  @item @samp{avg@var{m}3_floor}
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
> >  DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
> >  DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
> >
> > +DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
> > +                             sabd, uabd, binary)
> > +
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
> >                               savg_floor, uavg_floor, binary)
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
> >  OPTAB_D (extract_last_optab, "extract_last_$a")
> >  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
> >
> > +OPTAB_D (uabd_optab, "uabd$a3")
> > +OPTAB_D (sabd_optab, "sabd$a3")
> >  OPTAB_D (savg_floor_optab, "avg$a3_floor")
> >  OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
> >  OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
> > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> > index a49b09539776c0056e77f99b10365d0a8747fbc5..91e1f9d4b610275dd833ec56dc77f76367ee7886 100644
> > --- a/gcc/tree-vect-patterns.cc
> > +++ b/gcc/tree-vect-patterns.cc
> > @@ -770,6 +770,89 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
> >      }
> >  }
> >
> > +/* Look for the following pattern
> > +       X = x[i]
> > +       Y = y[i]
> > +       DIFF = X - Y
> > +       DAD = ABS_EXPR<DIFF>
> > + */
> > +static bool
> > +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> > +                               tree *half_type, bool reject_unsigned,
> > +                               vect_unpromoted_value unprom[2],
> > +                               tree diff_oprnds[2])
> > +{
> > +  if (!abs_stmt)
> > +    return false;
> > +
> > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > +     inside the loop (in case we are analyzing an outer-loop).  */
> > +  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
> > +  if (code != ABS_EXPR && code != ABSU_EXPR)
> > +    return false;
> > +
> > +  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > +  tree abs_type = TREE_TYPE (abs_oprnd);
> > +  if (!abs_oprnd)
> > +    return false;
> > +  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
> > +    return false;
> > +  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
> > +    return false;
> > +
> > +  /* Peel off conversions from the ABS input.  This can involve sign
> > +     changes (e.g.  from an unsigned subtraction to a signed ABS input)
> > +     or signed promotion, but it can't include unsigned promotion.
> > +     (Note that ABS of an unsigned promotion should have been folded
> > +     away before now anyway.)  */
> > +  vect_unpromoted_value unprom_diff;
> > +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > +                                                   &unprom_diff);
> > +  if (!abs_oprnd)
> > +    return false;
> > +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > +      && TYPE_UNSIGNED (unprom_diff.type))
> > +    if (!reject_unsigned)
> > +      return false;
> > +
> > +  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > +  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > +  if (!diff_stmt_vinfo)
> > +    return false;
> > +
> > +  bool assigned_oprnds = false;
> > +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> > +  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
> > +  {
> > +    assigned_oprnds = true;
> > +    diff_oprnds[0] = gimple_assign_rhs1 (diff);
> > +    diff_oprnds[1] = gimple_assign_rhs2 (diff);
> > +  }
> > +
> > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > +     inside the loop (in case we are analyzing an outer-loop).  */
> > +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
> > +                            WIDEN_MINUS_EXPR,
> > +                            false, 2, unprom, half_type))
> > +  {
> > +    if (diff_oprnds && !assigned_oprnds)
> > +    {
> > +      diff_oprnds[0] = unprom[0].op;
> > +      diff_oprnds[1] = unprom[1].op;
> > +    }
> > +  }
> > +  else if (!assigned_oprnds)
> > +  {
> > +    return false;
> > +  }
> > +  else
> > +  {
> > +    *half_type = NULL_TREE;
> > +  }
> > +
> > +  return true;
> > +}
> > +
> >  /* Convert UNPROM to TYPE and return the result, adding new statements
> >     to STMT_INFO's pattern definition statements if no better way is
> >     available.  VECTYPE is the vector form of TYPE.
> > @@ -1308,40 +1391,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
> >    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> >       inside the loop (in case we are analyzing an outer-loop).  */
> >    gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
> > -  if (!abs_stmt
> > -      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
> > -         && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
> > -    return NULL;
> > -
> > -  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > -  tree abs_type = TREE_TYPE (abs_oprnd);
> > -  if (TYPE_UNSIGNED (abs_type))
> > -    return NULL;
> > -
> > -  /* Peel off conversions from the ABS input.  This can involve sign
> > -     changes (e.g. from an unsigned subtraction to a signed ABS input)
> > -     or signed promotion, but it can't include unsigned promotion.
> > -     (Note that ABS of an unsigned promotion should have been folded
> > -     away before now anyway.)  */
> > -  vect_unpromoted_value unprom_diff;
> > -  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > -                                                   &unprom_diff);
> > -  if (!abs_oprnd)
> > -    return NULL;
> > -  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > -      && TYPE_UNSIGNED (unprom_diff.type))
> > -    return NULL;
> >
> > -  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > -  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > -  if (!diff_stmt_vinfo)
> > +  vect_unpromoted_value unprom[2];
> > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > +                                      true, unprom, NULL))
> >      return NULL;
> >
> > -  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > -     inside the loop (in case we are analyzing an outer-loop).  */
> > -  vect_unpromoted_value unprom[2];
> > -  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
> > -                            false, 2, unprom, &half_type))
> > +  if (!half_type)
> >      return NULL;
> >
> >    vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
> > @@ -1363,6 +1419,137 @@ vect_recog_sad_pattern (vec_info *vinfo,
> >    return pattern_stmt;
> >  }
> >
> > +/* Function vect_recog_abd_pattern
> > +
> > +   Try to find the following ABsolute Difference (ABD) pattern:
> > +
> > +     VTYPE x, y, out;
> > +     type diff;
> > +   loop i in range:
> > +     S1 diff = x[i] - y[i]
> > +     S2 out[i] = ABS_EXPR <diff>;
> > +
> > +   where 'type' is a integer and 'VTYPE' is a vector of integers
> > +   the same size as 'type'
> > +
> > +   Input:
> > +
> > +   * STMT_VINFO: The stmt from which the pattern search begins
> > +
> > +   Output:
> > +
> > +   * TYPE_out: The type of the output of this pattern
> > +
> > +   * Return value: A new stmt that will be used to replace the sequence of
> > +     stmts that constitute the pattern; either SABD or UABD:
> > +       SABD_EXPR<x, y, out>
> > +       UABD_EXPR<x, y, out>
> > +
> > +      UABD expressions are used when the input types are
> > +      narrower than the output types or the output type is narrower
> > +      than 32 bits
> > + */
> > +
> > +static gimple *
> > +vect_recog_abd_pattern (vec_info *vinfo,
> > +               stmt_vec_info stmt_vinfo, tree *type_out)
> > +{
> > +  /* Look for the following patterns
> > +       X = x[i]
> > +       Y = y[i]
> > +       DIFF = X - Y
> > +       DAD = ABS_EXPR<DIFF>
> > +       out[i] = DAD
> > +
> > +     In which
> > +      - X, Y, DIFF, DAD all have the same type
> > +      - x, y, out are all vectors of the same type
> > +  */
> > +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> > +  if (!last_stmt)
> > +    return NULL;
> > +
> > +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> > +
> > +  gassign *abs_stmt = last_stmt;
> > +  if (gimple_assign_cast_p (last_stmt))
> > +  {
> > +    tree last_rhs = gimple_assign_rhs1 (last_stmt);
> > +    if (!SSA_VAR_P (last_rhs))
> > +      return NULL;
> > +
> > +    abs_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (last_rhs));
> > +    if (!abs_stmt)
> > +      return NULL;
> > +  }
> > +
> > +  vect_unpromoted_value unprom[2];
> > +  tree diff_oprnds[2];
> > +  tree half_type;
> > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > +                                      false, unprom, diff_oprnds))
> > +    return NULL;
> > +
> > +#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
> > +
> > +  tree abd_oprnds[2];
> > +  if (half_type)
> > +  {
> > +    if (!SAME_TYPE (unprom[0].type, unprom[1].type))
> > +      return NULL;
> > +
> > +    tree diff_type = TREE_TYPE (diff_oprnds[0]);
> > +    if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
> > +    {
> > +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds, half_type, unprom,
> > +                          get_vectype_for_scalar_type (vinfo, half_type));
> > +    }
> > +    else
> > +    {
> > +      abd_oprnds[0] = diff_oprnds[0];
> > +      abd_oprnds[1] = diff_oprnds[1];
> > +    }
> > +  }
> > +  else
> > +  {
> > +    if (unprom[0].op && unprom[1].op
> > +       && (!SAME_TYPE (unprom[0].type, unprom[1].type)
> > +       || !SAME_TYPE (unprom[0].type, out_type)))
> > +      return NULL;
> > +
> > +    unprom[0].op = diff_oprnds[0];
> > +    unprom[1].op = diff_oprnds[1];
> > +    tree signed_out = signed_type_for (out_type);
> > +    tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> > +    vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> > +                        signed_out, unprom, signed_out_vectype);
> > +
> > +    if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
> > +      return NULL;
> > +  }
> > +
> > +  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
> > +      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
> > +    return NULL;
> > +
> > +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> > +
> > +  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);
> > +  if (!vectype
> > +      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> > +                                         OPTIMIZE_FOR_SPEED))
> > +    return NULL;
> > +
> > +  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
> > +
> > +  tree var = vect_recog_temp_ssa_var (out_type, NULL);
> > +  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> > +                                               abd_oprnds[0], abd_oprnds[1]);
> > +  gimple_call_set_lhs (abd_stmt, var);
> > +  gimple_set_location (abd_stmt, gimple_location (last_stmt));
> > +  return abd_stmt;
> > +}
> > +
> >  /* Recognize an operation that performs ORIG_CODE on widened inputs,
> >     so that it can be treated as though it had the form:
> >
> > @@ -6439,6 +6626,7 @@ struct vect_recog_func
> >  static vect_recog_func vect_vect_recog_func_ptrs[] = {
> >    { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
> >    { vect_recog_bit_insert_pattern, "bit_insert" },
> > +  { vect_recog_abd_pattern, "abd" },
> >    { vect_recog_over_widening_pattern, "over_widening" },
> >    /* Must come after over_widening, which narrows the shift as much as
> >       possible beforehand.  */
> > --
> > 2.25.1

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] vect: Missed opportunity to use [SU]ABD
  2023-05-10 15:27       ` Richard Sandiford
@ 2023-05-17 12:21         ` oluwatamilore.adebayo
  2023-05-18  8:39           ` [PATCH 1/4] " Oluwatamilore.Adebayo
  0 siblings, 1 reply; 24+ messages in thread
From: oluwatamilore.adebayo @ 2023-05-17 12:21 UTC (permalink / raw)
  To: richard.sandiford; +Cc: Oluwatamilore.Adebayo, gcc-patches, richard.guenther

> Yeah.  Like Tami says, this is what the instruction does.
> 
> I think all three definitions are equivalent: the extend/operate/truncate
> one, the ?: one above, and the "max - min" one.  Probably just personal
> preference as to which seems more natural.

Decided to switch to using the ?: one as it makes more sense
for unsigned types.

> It would be good to document what the parameters mean (except VINFO,
> which is obvious).

Documentation added for vect_recog_absolute_difference.

> I think this should instead be:
> 
>   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
>       && TYPE_UNSIGNED (unprom_diff.type)
>       && TYPE_UNSIGNED (abs_type))
>     return false;
> 
> ....
> 
> I think the code would be easier to follow if it used vect_widened_op_tree
> first, and only considered the unextended case on failure.

Implemented Richard's suggested changes to vect_recog_absolute_difference.

> Minor formatting nit: GCC style is to indent braces by two spaces
> further than an "if":
> 
>   if (...)
>     {
>       ...
>     }

Adopted this style.

New patch will be in the reply.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/4] Missed opportunity to use [SU]ABD
  2023-05-17 12:21         ` oluwatamilore.adebayo
@ 2023-05-18  8:39           ` Oluwatamilore.Adebayo
  2023-05-18 17:59             ` Richard Sandiford
  0 siblings, 1 reply; 24+ messages in thread
From: Oluwatamilore.Adebayo @ 2023-05-18  8:39 UTC (permalink / raw)
  To: oluwatamilore.adebayo; +Cc: gcc-patches, richard.guenther, richard.sandiford

From: oluade01 <oluwatamilore.adebayo@arm.com>

This adds a recognition pattern for the non-widening
absolute difference (ABD).

gcc/ChangeLog:

	* doc/md.texi (sabd, uabd): Document them.
	* internal-fn.def (ABD): Use new optab.
	* optabs.def (sabd_optab, uabd_optab): New optabs,
	* tree-vect-patterns.cc (vect_recog_absolute_difference):
	Recognize the following idiom abs (a - b).
	(vect_recog_sad_pattern): Refactor to use
	vect_recog_absolute_difference.
	(vect_recog_abd_pattern): Use patterns found by
	vect_recog_absolute_difference to build a new ABD
	internal call.
---
 gcc/doc/md.texi           |  10 ++
 gcc/internal-fn.def       |   3 +
 gcc/optabs.def            |   2 +
 gcc/tree-vect-patterns.cc | 255 +++++++++++++++++++++++++++++++++-----
 4 files changed, 239 insertions(+), 31 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..3e65584d7efcd301f2c96a40edd82d30b84462b8 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
 Vector shift and rotate instructions that take vectors as operand 2
 instead of a scalar type.
 
+@cindex @code{uabd@var{m}} instruction pattern
+@cindex @code{sabd@var{m}} instruction pattern
+@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
+Signed and unsigned absolute difference instructions.  These
+instructions find the difference between operands 1 and 2
+then return the absolute value.  A C code equivalent would be:
+@smallexample
+op0 = op0 > op1 ? op0 - op1 : op1 - op0;
+@end smallexample
+
 @cindex @code{avg@var{m}3_floor} instruction pattern
 @cindex @code{uavg@var{m}3_floor} instruction pattern
 @item @samp{avg@var{m}3_floor}
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
+			      sabd, uabd, binary)
+
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
 			      savg_floor, uavg_floor, binary)
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
 OPTAB_D (extract_last_optab, "extract_last_$a")
 OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
 
+OPTAB_D (uabd_optab, "uabd$a3")
+OPTAB_D (sabd_optab, "sabd$a3")
 OPTAB_D (savg_floor_optab, "avg$a3_floor")
 OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
 OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index a49b09539776c0056e77f99b10365d0a8747fbc5..50f1822f220c023027f4b0f777965f3757842fa2 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -770,6 +770,93 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
     }
 }
 
+/* Look for the following pattern
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+
+   ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
+   If REJECT_UNSIGNED is true it aborts if the type of ABS_STMT is unsigned.
+   HALF_TYPE and UNPROM will be set should the statement be found to
+   be a widened operation.
+   DIFF_OPRNDS will be set to the two inputs of the MINUS_EXPR preceding
+   ABS_STMT, otherwise it will be set the operations found by
+   vect_widened_op_tree.
+ */
+static bool
+vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
+				tree *half_type, bool reject_unsigned,
+				vect_unpromoted_value unprom[2],
+				tree diff_oprnds[2])
+{
+  if (!abs_stmt)
+    return false;
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
+  if (code != ABS_EXPR && code != ABSU_EXPR)
+    return false;
+
+  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
+  tree abs_type = TREE_TYPE (abs_oprnd);
+  if (!abs_oprnd)
+    return false;
+  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
+    return false;
+  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
+    return false;
+
+  /* Peel off conversions from the ABS input.  This can involve sign
+     changes (e.g.  from an unsigned subtraction to a signed ABS input)
+     or signed promotion, but it can't include unsigned promotion.
+     (Note that ABS of an unsigned promotion should have been folded
+     away before now anyway.)  */
+  vect_unpromoted_value unprom_diff;
+  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
+						    &unprom_diff);
+  if (!abs_oprnd)
+    return false;
+  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
+      && TYPE_UNSIGNED (unprom_diff.type)
+      && TYPE_UNSIGNED (abs_type))
+    return false;
+
+  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
+  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
+  if (!diff_stmt_vinfo)
+    return false;
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
+			     WIDEN_MINUS_EXPR,
+			     false, 2, unprom, half_type))
+    {
+      if (diff_oprnds)
+	{
+	  diff_oprnds[0] = unprom[0].op;
+	  diff_oprnds[1] = unprom[1].op;
+	}
+      return true;
+    }
+
+  // Failed to find a widen operation so we check for a regular MINUS_EXPR
+  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
+  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
+    {
+      diff_oprnds[0] = gimple_assign_rhs1 (diff);
+      diff_oprnds[1] = gimple_assign_rhs2 (diff);
+    }
+  else
+    return false;
+
+  *half_type = NULL_TREE;
+
+  return true;
+}
+
 /* Convert UNPROM to TYPE and return the result, adding new statements
    to STMT_INFO's pattern definition statements if no better way is
    available.  VECTYPE is the vector form of TYPE.
@@ -1308,40 +1395,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
      inside the loop (in case we are analyzing an outer-loop).  */
   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
-  if (!abs_stmt
-      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
-	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
-    return NULL;
 
-  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
-  tree abs_type = TREE_TYPE (abs_oprnd);
-  if (TYPE_UNSIGNED (abs_type))
-    return NULL;
-
-  /* Peel off conversions from the ABS input.  This can involve sign
-     changes (e.g. from an unsigned subtraction to a signed ABS input)
-     or signed promotion, but it can't include unsigned promotion.
-     (Note that ABS of an unsigned promotion should have been folded
-     away before now anyway.)  */
-  vect_unpromoted_value unprom_diff;
-  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
-						    &unprom_diff);
-  if (!abs_oprnd)
-    return NULL;
-  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
-      && TYPE_UNSIGNED (unprom_diff.type))
-    return NULL;
-
-  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
-  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
-  if (!diff_stmt_vinfo)
+  vect_unpromoted_value unprom[2];
+  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
+				       true, unprom, NULL))
     return NULL;
 
-  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
-     inside the loop (in case we are analyzing an outer-loop).  */
-  vect_unpromoted_value unprom[2];
-  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
-			     false, 2, unprom, &half_type))
+  if (!half_type)
     return NULL;
 
   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
@@ -1363,6 +1423,138 @@ vect_recog_sad_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+/* Function vect_recog_abd_pattern
+
+   Try to find the following ABsolute Difference (ABD) pattern:
+
+     VTYPE x, y, out;
+     type diff;
+   loop i in range:
+     S1 diff = x[i] - y[i]
+     S2 out[i] = ABS_EXPR <diff>;
+
+   where 'type' is a integer and 'VTYPE' is a vector of integers
+   the same size as 'type'
+
+   Input:
+
+   * STMT_VINFO: The stmt from which the pattern search begins
+
+   Output:
+
+   * TYPE_out: The type of the output of this pattern
+
+   * Return value: A new stmt that will be used to replace the sequence of
+     stmts that constitute the pattern; either SABD or UABD:
+	SABD_EXPR<x, y, out>
+	UABD_EXPR<x, y, out>
+
+      UABD expressions are used when the input types are
+      narrower than the output types or the output type is narrower
+      than 32 bits
+ */
+
+static gimple *
+vect_recog_abd_pattern (vec_info *vinfo,
+		stmt_vec_info stmt_vinfo, tree *type_out)
+{
+  /* Look for the following patterns
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+	out[i] = DAD
+
+     In which
+      - X, Y, DIFF, DAD all have the same type
+      - x, y, out are all vectors of the same type
+  */
+  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
+  if (!last_stmt)
+    return NULL;
+
+  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
+
+  gassign *abs_stmt = last_stmt;
+  if (gimple_assign_cast_p (last_stmt))
+    {
+      tree last_rhs = gimple_assign_rhs1 (last_stmt);
+      if (!SSA_VAR_P (last_rhs))
+	return NULL;
+
+      abs_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (last_rhs));
+      if (!abs_stmt)
+	return NULL;
+    }
+
+  vect_unpromoted_value unprom[2];
+  tree diff_oprnds[2];
+  tree half_type;
+  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
+				       false, unprom, diff_oprnds))
+    return NULL;
+
+#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
+
+  tree abd_oprnds[2];
+  if (half_type)
+    {
+      if (!SAME_TYPE (unprom[0].type, unprom[1].type))
+	return NULL;
+
+      tree diff_type = TREE_TYPE (diff_oprnds[0]);
+      if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
+	{
+	  tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
+	  vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
+			       half_type, unprom, vectype);
+	}
+      else
+	{
+	  abd_oprnds[0] = diff_oprnds[0];
+	  abd_oprnds[1] = diff_oprnds[1];
+	}
+    }
+  else
+    {
+      if (unprom[0].op && unprom[1].op
+	  && (!SAME_TYPE (unprom[0].type, unprom[1].type)
+	  || !SAME_TYPE (unprom[0].type, out_type)))
+	return NULL;
+
+      unprom[0].op = diff_oprnds[0];
+      unprom[1].op = diff_oprnds[1];
+      tree signed_out = signed_type_for (out_type);
+      tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
+      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
+			   signed_out, unprom, signed_out_vectype);
+
+      if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
+	return NULL;
+    }
+
+  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
+      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
+    return NULL;
+
+  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+
+  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);
+  if (!vectype
+      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
+					  OPTIMIZE_FOR_SPEED))
+    return NULL;
+
+  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
+
+  tree var = vect_recog_temp_ssa_var (out_type, NULL);
+  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
+						abd_oprnds[0], abd_oprnds[1]);
+  gimple_call_set_lhs (abd_stmt, var);
+  gimple_set_location (abd_stmt, gimple_location (last_stmt));
+  return abd_stmt;
+}
+
 /* Recognize an operation that performs ORIG_CODE on widened inputs,
    so that it can be treated as though it had the form:
 
@@ -6439,6 +6631,7 @@ struct vect_recog_func
 static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
   { vect_recog_bit_insert_pattern, "bit_insert" },
+  { vect_recog_abd_pattern, "abd" },
   { vect_recog_over_widening_pattern, "over_widening" },
   /* Must come after over_widening, which narrows the shift as much as
      possible beforehand.  */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/4] Missed opportunity to use [SU]ABD
  2023-05-18  8:39           ` [PATCH 1/4] " Oluwatamilore.Adebayo
@ 2023-05-18 17:59             ` Richard Sandiford
  2023-05-22 13:32               ` Richard Biener
  2023-05-23 14:27               ` [PATCH] vect: " Oluwatamilore Adebayo
  0 siblings, 2 replies; 24+ messages in thread
From: Richard Sandiford @ 2023-05-18 17:59 UTC (permalink / raw)
  To: Oluwatamilore.Adebayo; +Cc: gcc-patches, richard.guenther

Thanks for the update.  Some of these comments would have applied
to the first version, so sorry for not catching them first time.

<Oluwatamilore.Adebayo@arm.com> writes:
> From: oluade01 <oluwatamilore.adebayo@arm.com>
>
> This adds a recognition pattern for the non-widening
> absolute difference (ABD).
>
> gcc/ChangeLog:
>
> 	* doc/md.texi (sabd, uabd): Document them.
> 	* internal-fn.def (ABD): Use new optab.
> 	* optabs.def (sabd_optab, uabd_optab): New optabs,
> 	* tree-vect-patterns.cc (vect_recog_absolute_difference):
> 	Recognize the following idiom abs (a - b).
> 	(vect_recog_sad_pattern): Refactor to use
> 	vect_recog_absolute_difference.
> 	(vect_recog_abd_pattern): Use patterns found by
> 	vect_recog_absolute_difference to build a new ABD
> 	internal call.
> ---
>  gcc/doc/md.texi           |  10 ++
>  gcc/internal-fn.def       |   3 +
>  gcc/optabs.def            |   2 +
>  gcc/tree-vect-patterns.cc | 255 +++++++++++++++++++++++++++++++++-----
>  4 files changed, 239 insertions(+), 31 deletions(-)
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..3e65584d7efcd301f2c96a40edd82d30b84462b8 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
>  Vector shift and rotate instructions that take vectors as operand 2
>  instead of a scalar type.
>  
> +@cindex @code{uabd@var{m}} instruction pattern
> +@cindex @code{sabd@var{m}} instruction pattern
> +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
> +Signed and unsigned absolute difference instructions.  These
> +instructions find the difference between operands 1 and 2
> +then return the absolute value.  A C code equivalent would be:
> +@smallexample
> +op0 = op0 > op1 ? op0 - op1 : op1 - op0;

Should be:

  op0 = op1 > op2 ? op1 - op2 : op2 - op1;

since op0 is the output.

> +@end smallexample
> +
>  @cindex @code{avg@var{m}3_floor} instruction pattern
>  @cindex @code{uavg@var{m}3_floor} instruction pattern
>  @item @samp{avg@var{m}3_floor}
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
>  DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
>  DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
>  
> +DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
> +			      sabd, uabd, binary)
> +
>  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
>  			      savg_floor, uavg_floor, binary)
>  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
>  OPTAB_D (extract_last_optab, "extract_last_$a")
>  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
>  
> +OPTAB_D (uabd_optab, "uabd$a3")
> +OPTAB_D (sabd_optab, "sabd$a3")
>  OPTAB_D (savg_floor_optab, "avg$a3_floor")
>  OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
>  OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index a49b09539776c0056e77f99b10365d0a8747fbc5..50f1822f220c023027f4b0f777965f3757842fa2 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -770,6 +770,93 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
>      }
>  }
>  
> +/* Look for the following pattern
> +	X = x[i]
> +	Y = y[i]
> +	DIFF = X - Y
> +	DAD = ABS_EXPR<DIFF>
> +
> +   ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
> +   If REJECT_UNSIGNED is true it aborts if the type of ABS_STMT is unsigned.
> +   HALF_TYPE and UNPROM will be set should the statement be found to
> +   be a widened operation.
> +   DIFF_OPRNDS will be set to the two inputs of the MINUS_EXPR preceding
> +   ABS_STMT, otherwise it will be set the operations found by
> +   vect_widened_op_tree.
> + */
> +static bool
> +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> +				tree *half_type, bool reject_unsigned,
> +				vect_unpromoted_value unprom[2],
> +				tree diff_oprnds[2])
> +{
> +  if (!abs_stmt)
> +    return false;
> +
> +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> +     inside the loop (in case we are analyzing an outer-loop).  */
> +  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
> +  if (code != ABS_EXPR && code != ABSU_EXPR)
> +    return false;
> +
> +  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> +  tree abs_type = TREE_TYPE (abs_oprnd);
> +  if (!abs_oprnd)
> +    return false;
> +  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
> +    return false;
> +  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
> +    return false;

Could you explain the reject_unsigned behaviour?  I'd have expected
TYPE_OVERFLOW_WRAPS (abs_type) to reject the unsigned case anyway.

> +
> +  /* Peel off conversions from the ABS input.  This can involve sign
> +     changes (e.g.  from an unsigned subtraction to a signed ABS input)
> +     or signed promotion, but it can't include unsigned promotion.
> +     (Note that ABS of an unsigned promotion should have been folded
> +     away before now anyway.)  */

I'm not sure we can do the "unsigned subtraction to a signed ABS input"
case.  The code:

int
f (unsigned int x, int y)
{
  unsigned int diff = x - y;
  return __builtin_abs (diff);
}

is well-defined C (no undefined behaviour).  But it doesn't do the
same thing as an absolute difference.

So...

> +  vect_unpromoted_value unprom_diff;
> +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> +						    &unprom_diff);
> +  if (!abs_oprnd)
> +    return false;
> +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> +      && TYPE_UNSIGNED (unprom_diff.type)
> +      && TYPE_UNSIGNED (abs_type))
> +    return false;

...I think there are four valid cases:

(1) !TYPE_UNSIGNED (abs_type)
    && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd))
    && TYPE_PRECISION (unprom_diff.type) == TYPE_PRECISION (abs_type)
    && regular unwidened MINUS_EXPR

    Here the subtraction and ABS input are signed.  We can assume that
    there is no signed overflow in the subtraction.

(2) !TYPE_UNSIGNED (abs_type)
    && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd))
    && TYPE_PRECISION (unprom_diff.type) < TYPE_PRECISION (abs_type)
    && !TYPE_UNSIGNED (unprom_diff.type)
    && regular unwidened MINUS_EXPR

    This is like (1) except that the MINUS_EXPR result is sign-extended
    before the ABS.  Again we rely on having no signed overflow, hence the
    TYPE_OVERFLOW_UNDEFINED check on the subtraction type.  We can then
    push the sign-extension of the subtraction result “down” to the inputs
    of the subtraction.

(3) !TYPE_UNSIGNED (abs_type)
    && TYPE_PRECISION (unprom_diff.type) == TYPE_PRECISION (abs_type)
    && MINUS_EXPR of widened inputs

    In other words;

    - the ABS input is signed
    - the MINUS_EXPR has the same precision as the ABS
    - the MINUS_EXPR inputs are narrower than the result

    In this case the MINUS_EXPR is known to give correct results for
    all inputs, due to the extra range.  There's no need to rely on
    TYPE_OVERFLOW_UNDEFINED.

(4) !TYPE_UNSIGNED (abs_type)
    && TYPE_PRECISION (unprom_diff.type) < TYPE_PRECISION (abs_type)
    && !TYPE_UNSIGNED (unprom_diff.type)
    && MINUS_EXPR of widened inputs

    Like (3), but the subtraction result is further sign-extended
    (somewhat pointlessly).

Which is a very convoluated way of classifying it, sorry.

I think this means that:

(a) We should reject all cases in TYPE_UNSIGNED (abs_type) is true.
    Absolute values with unsigned inputs are a niche game, if they're
    valid at all.

(b) We should require TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd))
    whenever we see a MINUS_EXPR with unextended inputs (i.e. whenever
    vect_widened_op_tree fails).

I guess some of this is really a comment about the existing SAD_EXPR code.

> +
> +  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> +  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> +  if (!diff_stmt_vinfo)
> +    return false;
> +
> +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> +     inside the loop (in case we are analyzing an outer-loop).  */
> +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
> +			     WIDEN_MINUS_EXPR,
> +			     false, 2, unprom, half_type))
> +    {
> +      if (diff_oprnds)
> +	{
> +	  diff_oprnds[0] = unprom[0].op;
> +	  diff_oprnds[1] = unprom[1].op;
> +	}
> +      return true;
> +    }
> +
> +  // Failed to find a widen operation so we check for a regular MINUS_EXPR
> +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> +  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
> +    {
> +      diff_oprnds[0] = gimple_assign_rhs1 (diff);
> +      diff_oprnds[1] = gimple_assign_rhs2 (diff);
> +    }
> +  else
> +    return false;
> +
> +  *half_type = NULL_TREE;
> +
> +  return true;

Very minor, but I think it'd be clearer to put the *half_type assignment
and return true in the "if" body, and unconditionally return false
at the end.  That makes it more consistent with the widening case.

> +}
> +
>  /* Convert UNPROM to TYPE and return the result, adding new statements
>     to STMT_INFO's pattern definition statements if no better way is
>     available.  VECTYPE is the vector form of TYPE.
> @@ -1308,40 +1395,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
>    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
>       inside the loop (in case we are analyzing an outer-loop).  */
>    gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
> -  if (!abs_stmt
> -      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
> -	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
> -    return NULL;
>  
> -  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> -  tree abs_type = TREE_TYPE (abs_oprnd);
> -  if (TYPE_UNSIGNED (abs_type))
> -    return NULL;
> -
> -  /* Peel off conversions from the ABS input.  This can involve sign
> -     changes (e.g. from an unsigned subtraction to a signed ABS input)
> -     or signed promotion, but it can't include unsigned promotion.
> -     (Note that ABS of an unsigned promotion should have been folded
> -     away before now anyway.)  */
> -  vect_unpromoted_value unprom_diff;
> -  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> -						    &unprom_diff);
> -  if (!abs_oprnd)
> -    return NULL;
> -  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> -      && TYPE_UNSIGNED (unprom_diff.type))
> -    return NULL;
> -
> -  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> -  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> -  if (!diff_stmt_vinfo)
> +  vect_unpromoted_value unprom[2];
> +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> +				       true, unprom, NULL))
>      return NULL;
>  
> -  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> -     inside the loop (in case we are analyzing an outer-loop).  */
> -  vect_unpromoted_value unprom[2];
> -  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
> -			     false, 2, unprom, &half_type))
> +  if (!half_type)
>      return NULL;
>  
>    vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
> @@ -1363,6 +1423,138 @@ vect_recog_sad_pattern (vec_info *vinfo,
>    return pattern_stmt;
>  }
>  
> +/* Function vect_recog_abd_pattern
> +
> +   Try to find the following ABsolute Difference (ABD) pattern:
> +
> +     VTYPE x, y, out;
> +     type diff;
> +   loop i in range:
> +     S1 diff = x[i] - y[i]
> +     S2 out[i] = ABS_EXPR <diff>;
> +
> +   where 'type' is a integer and 'VTYPE' is a vector of integers
> +   the same size as 'type'
> +
> +   Input:
> +
> +   * STMT_VINFO: The stmt from which the pattern search begins
> +
> +   Output:
> +
> +   * TYPE_out: The type of the output of this pattern
> +
> +   * Return value: A new stmt that will be used to replace the sequence of
> +     stmts that constitute the pattern; either SABD or UABD:
> +	SABD_EXPR<x, y, out>
> +	UABD_EXPR<x, y, out>
> +
> +      UABD expressions are used when the input types are
> +      narrower than the output types or the output type is narrower
> +      than 32 bits
> + */
> +
> +static gimple *
> +vect_recog_abd_pattern (vec_info *vinfo,
> +		stmt_vec_info stmt_vinfo, tree *type_out)
> +{
> +  /* Look for the following patterns
> +	X = x[i]
> +	Y = y[i]
> +	DIFF = X - Y
> +	DAD = ABS_EXPR<DIFF>
> +	out[i] = DAD
> +
> +     In which
> +      - X, Y, DIFF, DAD all have the same type
> +      - x, y, out are all vectors of the same type
> +  */
> +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> +  if (!last_stmt)
> +    return NULL;
> +
> +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> +
> +  gassign *abs_stmt = last_stmt;
> +  if (gimple_assign_cast_p (last_stmt))
> +    {
> +      tree last_rhs = gimple_assign_rhs1 (last_stmt);
> +      if (!SSA_VAR_P (last_rhs))
> +	return NULL;
> +
> +      abs_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (last_rhs));
> +      if (!abs_stmt)
> +	return NULL;
> +    }

Could you explain why this is needed?  I'd expect the function to be
called on the cast rhs too, so I wouldn't expect that we'd need to handle
casts explicitly.

> +  vect_unpromoted_value unprom[2];
> +  tree diff_oprnds[2];
> +  tree half_type;
> +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> +				       false, unprom, diff_oprnds))
> +    return NULL;
> +
> +#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
> +
> +  tree abd_oprnds[2];
> +  if (half_type)
> +    {
> +      if (!SAME_TYPE (unprom[0].type, unprom[1].type))
> +	return NULL;

I wouldn't have expected this to be unecessary.  half_type is supposed
to be a common type that can hold all values of unprom[0].type and
unprom[1].type.  We should just be able to do:

> +      tree diff_type = TREE_TYPE (diff_oprnds[0]);
> +      if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
> +	{
> +	  tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
> +	  vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> +			       half_type, unprom, vectype);

...this vect_convert_inputs unconditionally.  We need to check that
the get_vectype_for_scalar_type call succeeds though.

So does it work as:

  if (half_type)
    {
      tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
      if (!vectype)
        return false;
      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
                           half_type, unprom, vectype);
    }

?

> +	}
> +      else
> +	{
> +	  abd_oprnds[0] = diff_oprnds[0];
> +	  abd_oprnds[1] = diff_oprnds[1];
> +	}
> +    }
> +  else
> +    {
> +      if (unprom[0].op && unprom[1].op
> +	  && (!SAME_TYPE (unprom[0].type, unprom[1].type)
> +	  || !SAME_TYPE (unprom[0].type, out_type)))
> +	return NULL;

AIUI, the !half_type case shouldn't look at unprom, since it's handling
simple MINUS_EXPRs.  I think we can just delete this "if" statement.

> +
> +      unprom[0].op = diff_oprnds[0];
> +      unprom[1].op = diff_oprnds[1];
> +      tree signed_out = signed_type_for (out_type);
> +      tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);

We need to check for success here too.

> +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> +			   signed_out, unprom, signed_out_vectype);
> +
> +      if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
> +	return NULL;

I don't think this is needed.

> +    }
> +
> +  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
> +      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
> +    return NULL;

I also don't think this is needed.  AIUI, the previous code has done
all the necessary correctness checks.

> +
> +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> +
> +  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);

I think instead we want the vector types computed above.  That is:

- The ABD should be done on the vector version of half_type
  if the subtraction was on promoted inputs.  The result of
  the ABD should then be zero-extended (using vect_convert_output)
  to out_type.

  In particular, it's the sign of HALF_TYPE that decides whether
  it's signed or unsigned ABD.

- The ABD should be done on the vector version of signed_outtype
  if the subtraction was on unpromoted inputs.  We then might need
  to sign-cast it to outtype, if outtype is unsigned.  We can
  use vect_convert_output for that too.

  In other words, this case must use signed ABD.

Hope I've got that right...

Thanks,
Richard

> +  if (!vectype
> +      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> +					  OPTIMIZE_FOR_SPEED))
> +    return NULL;
> +
> +  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
> +
> +  tree var = vect_recog_temp_ssa_var (out_type, NULL);
> +  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> +						abd_oprnds[0], abd_oprnds[1]);
> +  gimple_call_set_lhs (abd_stmt, var);
> +  gimple_set_location (abd_stmt, gimple_location (last_stmt));
> +  return abd_stmt;
> +}
> +
>  /* Recognize an operation that performs ORIG_CODE on widened inputs,
>     so that it can be treated as though it had the form:
>  
> @@ -6439,6 +6631,7 @@ struct vect_recog_func
>  static vect_recog_func vect_vect_recog_func_ptrs[] = {
>    { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
>    { vect_recog_bit_insert_pattern, "bit_insert" },
> +  { vect_recog_abd_pattern, "abd" },
>    { vect_recog_over_widening_pattern, "over_widening" },
>    /* Must come after over_widening, which narrows the shift as much as
>       possible beforehand.  */

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/4] Missed opportunity to use [SU]ABD
  2023-05-18 17:59             ` Richard Sandiford
@ 2023-05-22 13:32               ` Richard Biener
  2023-05-23 14:27               ` [PATCH] vect: " Oluwatamilore Adebayo
  1 sibling, 0 replies; 24+ messages in thread
From: Richard Biener @ 2023-05-22 13:32 UTC (permalink / raw)
  To: Oluwatamilore.Adebayo, gcc-patches, richard.guenther, richard.sandiford

On Thu, May 18, 2023 at 7:59 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Thanks for the update.  Some of these comments would have applied
> to the first version, so sorry for not catching them first time.
>
> <Oluwatamilore.Adebayo@arm.com> writes:
> > From: oluade01 <oluwatamilore.adebayo@arm.com>
> >
> > This adds a recognition pattern for the non-widening
> > absolute difference (ABD).
> >
> > gcc/ChangeLog:
> >
> >       * doc/md.texi (sabd, uabd): Document them.
> >       * internal-fn.def (ABD): Use new optab.
> >       * optabs.def (sabd_optab, uabd_optab): New optabs,
> >       * tree-vect-patterns.cc (vect_recog_absolute_difference):
> >       Recognize the following idiom abs (a - b).
> >       (vect_recog_sad_pattern): Refactor to use
> >       vect_recog_absolute_difference.
> >       (vect_recog_abd_pattern): Use patterns found by
> >       vect_recog_absolute_difference to build a new ABD
> >       internal call.
> > ---
> >  gcc/doc/md.texi           |  10 ++
> >  gcc/internal-fn.def       |   3 +
> >  gcc/optabs.def            |   2 +
> >  gcc/tree-vect-patterns.cc | 255 +++++++++++++++++++++++++++++++++-----
> >  4 files changed, 239 insertions(+), 31 deletions(-)
> >
> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> > index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..3e65584d7efcd301f2c96a40edd82d30b84462b8 100644
> > --- a/gcc/doc/md.texi
> > +++ b/gcc/doc/md.texi
> > @@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
> >  Vector shift and rotate instructions that take vectors as operand 2
> >  instead of a scalar type.
> >
> > +@cindex @code{uabd@var{m}} instruction pattern
> > +@cindex @code{sabd@var{m}} instruction pattern
> > +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
> > +Signed and unsigned absolute difference instructions.  These
> > +instructions find the difference between operands 1 and 2
> > +then return the absolute value.  A C code equivalent would be:
> > +@smallexample
> > +op0 = op0 > op1 ? op0 - op1 : op1 - op0;
>
> Should be:
>
>   op0 = op1 > op2 ? op1 - op2 : op2 - op1;
>
> since op0 is the output.
>
> > +@end smallexample
> > +
> >  @cindex @code{avg@var{m}3_floor} instruction pattern
> >  @cindex @code{uavg@var{m}3_floor} instruction pattern
> >  @item @samp{avg@var{m}3_floor}
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
> >  DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
> >  DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
> >
> > +DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
> > +                           sabd, uabd, binary)
> > +
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
> >                             savg_floor, uavg_floor, binary)
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
> >  OPTAB_D (extract_last_optab, "extract_last_$a")
> >  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
> >
> > +OPTAB_D (uabd_optab, "uabd$a3")
> > +OPTAB_D (sabd_optab, "sabd$a3")
> >  OPTAB_D (savg_floor_optab, "avg$a3_floor")
> >  OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
> >  OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
> > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> > index a49b09539776c0056e77f99b10365d0a8747fbc5..50f1822f220c023027f4b0f777965f3757842fa2 100644
> > --- a/gcc/tree-vect-patterns.cc
> > +++ b/gcc/tree-vect-patterns.cc
> > @@ -770,6 +770,93 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
> >      }
> >  }
> >
> > +/* Look for the following pattern
> > +     X = x[i]
> > +     Y = y[i]
> > +     DIFF = X - Y
> > +     DAD = ABS_EXPR<DIFF>
> > +
> > +   ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
> > +   If REJECT_UNSIGNED is true it aborts if the type of ABS_STMT is unsigned.
> > +   HALF_TYPE and UNPROM will be set should the statement be found to
> > +   be a widened operation.
> > +   DIFF_OPRNDS will be set to the two inputs of the MINUS_EXPR preceding
> > +   ABS_STMT, otherwise it will be set the operations found by
> > +   vect_widened_op_tree.
> > + */
> > +static bool
> > +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> > +                             tree *half_type, bool reject_unsigned,
> > +                             vect_unpromoted_value unprom[2],
> > +                             tree diff_oprnds[2])
> > +{
> > +  if (!abs_stmt)
> > +    return false;
> > +
> > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > +     inside the loop (in case we are analyzing an outer-loop).  */
> > +  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
> > +  if (code != ABS_EXPR && code != ABSU_EXPR)
> > +    return false;
> > +
> > +  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > +  tree abs_type = TREE_TYPE (abs_oprnd);
> > +  if (!abs_oprnd)
> > +    return false;
> > +  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
> > +    return false;
> > +  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
> > +    return false;
>
> Could you explain the reject_unsigned behaviour?  I'd have expected
> TYPE_OVERFLOW_WRAPS (abs_type) to reject the unsigned case anyway.
>
> > +
> > +  /* Peel off conversions from the ABS input.  This can involve sign
> > +     changes (e.g.  from an unsigned subtraction to a signed ABS input)
> > +     or signed promotion, but it can't include unsigned promotion.
> > +     (Note that ABS of an unsigned promotion should have been folded
> > +     away before now anyway.)  */
>
> I'm not sure we can do the "unsigned subtraction to a signed ABS input"
> case.  The code:
>
> int
> f (unsigned int x, int y)
> {
>   unsigned int diff = x - y;
>   return __builtin_abs (diff);
> }
>
> is well-defined C (no undefined behaviour).  But it doesn't do the
> same thing as an absolute difference.
>
> So...
>
> > +  vect_unpromoted_value unprom_diff;
> > +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > +                                                 &unprom_diff);
> > +  if (!abs_oprnd)
> > +    return false;
> > +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > +      && TYPE_UNSIGNED (unprom_diff.type)
> > +      && TYPE_UNSIGNED (abs_type))
> > +    return false;
>
> ...I think there are four valid cases:
>
> (1) !TYPE_UNSIGNED (abs_type)
>     && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd))
>     && TYPE_PRECISION (unprom_diff.type) == TYPE_PRECISION (abs_type)
>     && regular unwidened MINUS_EXPR
>
>     Here the subtraction and ABS input are signed.  We can assume that
>     there is no signed overflow in the subtraction.
>
> (2) !TYPE_UNSIGNED (abs_type)
>     && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd))
>     && TYPE_PRECISION (unprom_diff.type) < TYPE_PRECISION (abs_type)
>     && !TYPE_UNSIGNED (unprom_diff.type)
>     && regular unwidened MINUS_EXPR
>
>     This is like (1) except that the MINUS_EXPR result is sign-extended
>     before the ABS.  Again we rely on having no signed overflow, hence the
>     TYPE_OVERFLOW_UNDEFINED check on the subtraction type.  We can then
>     push the sign-extension of the subtraction result “down” to the inputs
>     of the subtraction.
>
> (3) !TYPE_UNSIGNED (abs_type)
>     && TYPE_PRECISION (unprom_diff.type) == TYPE_PRECISION (abs_type)
>     && MINUS_EXPR of widened inputs
>
>     In other words;
>
>     - the ABS input is signed
>     - the MINUS_EXPR has the same precision as the ABS
>     - the MINUS_EXPR inputs are narrower than the result
>
>     In this case the MINUS_EXPR is known to give correct results for
>     all inputs, due to the extra range.  There's no need to rely on
>     TYPE_OVERFLOW_UNDEFINED.
>
> (4) !TYPE_UNSIGNED (abs_type)
>     && TYPE_PRECISION (unprom_diff.type) < TYPE_PRECISION (abs_type)
>     && !TYPE_UNSIGNED (unprom_diff.type)
>     && MINUS_EXPR of widened inputs
>
>     Like (3), but the subtraction result is further sign-extended
>     (somewhat pointlessly).
>
> Which is a very convoluated way of classifying it, sorry.

There's also the possibility that you see ABSU_EXPR instead of ABS_EXPR.

> I think this means that:
>
> (a) We should reject all cases in TYPE_UNSIGNED (abs_type) is true.
>     Absolute values with unsigned inputs are a niche game, if they're
>     valid at all.
>
> (b) We should require TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd))
>     whenever we see a MINUS_EXPR with unextended inputs (i.e. whenever
>     vect_widened_op_tree fails).
>
> I guess some of this is really a comment about the existing SAD_EXPR code.
>
> > +
> > +  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > +  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > +  if (!diff_stmt_vinfo)
> > +    return false;
> > +
> > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > +     inside the loop (in case we are analyzing an outer-loop).  */
> > +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
> > +                          WIDEN_MINUS_EXPR,
> > +                          false, 2, unprom, half_type))
> > +    {
> > +      if (diff_oprnds)
> > +     {
> > +       diff_oprnds[0] = unprom[0].op;
> > +       diff_oprnds[1] = unprom[1].op;
> > +     }
> > +      return true;
> > +    }
> > +
> > +  // Failed to find a widen operation so we check for a regular MINUS_EXPR
> > +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> > +  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
> > +    {
> > +      diff_oprnds[0] = gimple_assign_rhs1 (diff);
> > +      diff_oprnds[1] = gimple_assign_rhs2 (diff);
> > +    }
> > +  else
> > +    return false;
> > +
> > +  *half_type = NULL_TREE;
> > +
> > +  return true;
>
> Very minor, but I think it'd be clearer to put the *half_type assignment
> and return true in the "if" body, and unconditionally return false
> at the end.  That makes it more consistent with the widening case.
>
> > +}
> > +
> >  /* Convert UNPROM to TYPE and return the result, adding new statements
> >     to STMT_INFO's pattern definition statements if no better way is
> >     available.  VECTYPE is the vector form of TYPE.
> > @@ -1308,40 +1395,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
> >    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> >       inside the loop (in case we are analyzing an outer-loop).  */
> >    gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
> > -  if (!abs_stmt
> > -      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
> > -       && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
> > -    return NULL;
> >
> > -  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > -  tree abs_type = TREE_TYPE (abs_oprnd);
> > -  if (TYPE_UNSIGNED (abs_type))
> > -    return NULL;
> > -
> > -  /* Peel off conversions from the ABS input.  This can involve sign
> > -     changes (e.g. from an unsigned subtraction to a signed ABS input)
> > -     or signed promotion, but it can't include unsigned promotion.
> > -     (Note that ABS of an unsigned promotion should have been folded
> > -     away before now anyway.)  */
> > -  vect_unpromoted_value unprom_diff;
> > -  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > -                                                 &unprom_diff);
> > -  if (!abs_oprnd)
> > -    return NULL;
> > -  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > -      && TYPE_UNSIGNED (unprom_diff.type))
> > -    return NULL;
> > -
> > -  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > -  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > -  if (!diff_stmt_vinfo)
> > +  vect_unpromoted_value unprom[2];
> > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > +                                    true, unprom, NULL))
> >      return NULL;
> >
> > -  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > -     inside the loop (in case we are analyzing an outer-loop).  */
> > -  vect_unpromoted_value unprom[2];
> > -  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
> > -                          false, 2, unprom, &half_type))
> > +  if (!half_type)
> >      return NULL;
> >
> >    vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
> > @@ -1363,6 +1423,138 @@ vect_recog_sad_pattern (vec_info *vinfo,
> >    return pattern_stmt;
> >  }
> >
> > +/* Function vect_recog_abd_pattern
> > +
> > +   Try to find the following ABsolute Difference (ABD) pattern:
> > +
> > +     VTYPE x, y, out;
> > +     type diff;
> > +   loop i in range:
> > +     S1 diff = x[i] - y[i]
> > +     S2 out[i] = ABS_EXPR <diff>;
> > +
> > +   where 'type' is a integer and 'VTYPE' is a vector of integers
> > +   the same size as 'type'
> > +
> > +   Input:
> > +
> > +   * STMT_VINFO: The stmt from which the pattern search begins
> > +
> > +   Output:
> > +
> > +   * TYPE_out: The type of the output of this pattern
> > +
> > +   * Return value: A new stmt that will be used to replace the sequence of
> > +     stmts that constitute the pattern; either SABD or UABD:
> > +     SABD_EXPR<x, y, out>
> > +     UABD_EXPR<x, y, out>
> > +
> > +      UABD expressions are used when the input types are
> > +      narrower than the output types or the output type is narrower
> > +      than 32 bits
> > + */
> > +
> > +static gimple *
> > +vect_recog_abd_pattern (vec_info *vinfo,
> > +             stmt_vec_info stmt_vinfo, tree *type_out)
> > +{
> > +  /* Look for the following patterns
> > +     X = x[i]
> > +     Y = y[i]
> > +     DIFF = X - Y
> > +     DAD = ABS_EXPR<DIFF>
> > +     out[i] = DAD
> > +
> > +     In which
> > +      - X, Y, DIFF, DAD all have the same type
> > +      - x, y, out are all vectors of the same type
> > +  */
> > +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> > +  if (!last_stmt)
> > +    return NULL;
> > +
> > +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> > +
> > +  gassign *abs_stmt = last_stmt;
> > +  if (gimple_assign_cast_p (last_stmt))
> > +    {
> > +      tree last_rhs = gimple_assign_rhs1 (last_stmt);
> > +      if (!SSA_VAR_P (last_rhs))
> > +     return NULL;
> > +
> > +      abs_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (last_rhs));
> > +      if (!abs_stmt)
> > +     return NULL;
> > +    }
>
> Could you explain why this is needed?  I'd expect the function to be
> called on the cast rhs too, so I wouldn't expect that we'd need to handle
> casts explicitly.
>
> > +  vect_unpromoted_value unprom[2];
> > +  tree diff_oprnds[2];
> > +  tree half_type;
> > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > +                                    false, unprom, diff_oprnds))
> > +    return NULL;
> > +
> > +#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
> > +
> > +  tree abd_oprnds[2];
> > +  if (half_type)
> > +    {
> > +      if (!SAME_TYPE (unprom[0].type, unprom[1].type))
> > +     return NULL;
>
> I wouldn't have expected this to be unecessary.  half_type is supposed
> to be a common type that can hold all values of unprom[0].type and
> unprom[1].type.  We should just be able to do:
>
> > +      tree diff_type = TREE_TYPE (diff_oprnds[0]);
> > +      if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
> > +     {
> > +       tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
> > +       vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> > +                            half_type, unprom, vectype);
>
> ...this vect_convert_inputs unconditionally.  We need to check that
> the get_vectype_for_scalar_type call succeeds though.
>
> So does it work as:
>
>   if (half_type)
>     {
>       tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
>       if (!vectype)
>         return false;
>       vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
>                            half_type, unprom, vectype);
>     }
>
> ?
>
> > +     }
> > +      else
> > +     {
> > +       abd_oprnds[0] = diff_oprnds[0];
> > +       abd_oprnds[1] = diff_oprnds[1];
> > +     }
> > +    }
> > +  else
> > +    {
> > +      if (unprom[0].op && unprom[1].op
> > +       && (!SAME_TYPE (unprom[0].type, unprom[1].type)
> > +       || !SAME_TYPE (unprom[0].type, out_type)))
> > +     return NULL;
>
> AIUI, the !half_type case shouldn't look at unprom, since it's handling
> simple MINUS_EXPRs.  I think we can just delete this "if" statement.
>
> > +
> > +      unprom[0].op = diff_oprnds[0];
> > +      unprom[1].op = diff_oprnds[1];
> > +      tree signed_out = signed_type_for (out_type);
> > +      tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
>
> We need to check for success here too.
>
> > +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> > +                        signed_out, unprom, signed_out_vectype);
> > +
> > +      if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
> > +     return NULL;
>
> I don't think this is needed.
>
> > +    }
> > +
> > +  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
> > +      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
> > +    return NULL;
>
> I also don't think this is needed.  AIUI, the previous code has done
> all the necessary correctness checks.
>
> > +
> > +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> > +
> > +  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);
>
> I think instead we want the vector types computed above.  That is:
>
> - The ABD should be done on the vector version of half_type
>   if the subtraction was on promoted inputs.  The result of
>   the ABD should then be zero-extended (using vect_convert_output)
>   to out_type.
>
>   In particular, it's the sign of HALF_TYPE that decides whether
>   it's signed or unsigned ABD.
>
> - The ABD should be done on the vector version of signed_outtype
>   if the subtraction was on unpromoted inputs.  We then might need
>   to sign-cast it to outtype, if outtype is unsigned.  We can
>   use vect_convert_output for that too.
>
>   In other words, this case must use signed ABD.
>
> Hope I've got that right...
>
> Thanks,
> Richard
>
> > +  if (!vectype
> > +      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> > +                                       OPTIMIZE_FOR_SPEED))
> > +    return NULL;
> > +
> > +  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
> > +
> > +  tree var = vect_recog_temp_ssa_var (out_type, NULL);
> > +  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> > +                                             abd_oprnds[0], abd_oprnds[1]);
> > +  gimple_call_set_lhs (abd_stmt, var);
> > +  gimple_set_location (abd_stmt, gimple_location (last_stmt));
> > +  return abd_stmt;
> > +}
> > +
> >  /* Recognize an operation that performs ORIG_CODE on widened inputs,
> >     so that it can be treated as though it had the form:
> >
> > @@ -6439,6 +6631,7 @@ struct vect_recog_func
> >  static vect_recog_func vect_vect_recog_func_ptrs[] = {
> >    { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
> >    { vect_recog_bit_insert_pattern, "bit_insert" },
> > +  { vect_recog_abd_pattern, "abd" },
> >    { vect_recog_over_widening_pattern, "over_widening" },
> >    /* Must come after over_widening, which narrows the shift as much as
> >       possible beforehand.  */

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] vect: Missed opportunity to use [SU]ABD
  2023-05-18 17:59             ` Richard Sandiford
  2023-05-22 13:32               ` Richard Biener
@ 2023-05-23 14:27               ` Oluwatamilore Adebayo
  2023-05-23 14:34                 ` [PATCH 1/2] " Oluwatamilore Adebayo
  1 sibling, 1 reply; 24+ messages in thread
From: Oluwatamilore Adebayo @ 2023-05-23 14:27 UTC (permalink / raw)
  To: richard.sandiford; +Cc: Oluwatamilore.Adebayo, gcc-patches, richard.guenther

> > +  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
> > +    return false;
> > +  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
> > +    return false;
> 
> Could you explain the reject_unsigned behaviour?  I'd have expected
> TYPE_OVERFLOW_WRAPS (abs_type) to reject the unsigned case anyway.

When REJECT_UNSIGNED is true, cases wherein the abs type is unsigned or when the
unpromoted diff type is both not equal in precision to the abs type and unsigned
the statement is rejected.

vect_recog_absolute_difference replaces some of the logic in vect_recog_sad_pattern
and is used by vect_recog_abd_pattern.
vect_recog_sad_pattern aborts if the abs type is unsigned or when the unprom
diff type isn't the same precision as abs type and unsigned.
vect_recog_abd_pattern doesn't do the same, so REJECT_UNSIGNED is a flag for this.

I found it to be unnecessary as you suggested, so it's been dropped.

> > +  if (half_type)
> > +    {
> > +      if (!SAME_TYPE (unprom[0].type, unprom[1].type))
> > +	return NULL;
> 
> I wouldn't have expected this to be unecessary.  half_type is supposed
> to be a common type that can hold all values of unprom[0].type and
> unprom[1].type.  We should just be able to do:
> 
> > +      tree diff_type = TREE_TYPE (diff_oprnds[0]);
> > +      if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
> > +	{
> > +	  tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
> > +	  vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> > +			       half_type, unprom, vectype);
> 
> ...this vect_convert_inputs unconditionally.  We need to check that
> the get_vectype_for_scalar_type call succeeds though.
> 
> So does it work as:
> 
>   if (half_type)
>     {
>       tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
>       if (!vectype)
>         return false;
>       vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
>                            half_type, unprom, vectype);
>     }
> 
> ?

The proposed solution works.

> > +	}
> > +      else
> > +	{
> > +	  abd_oprnds[0] = diff_oprnds[0];
> > +	  abd_oprnds[1] = diff_oprnds[1];
> > +	}
> > +    }
> > +  else
> > +    {
> > +      if (unprom[0].op && unprom[1].op
> > +	  && (!SAME_TYPE (unprom[0].type, unprom[1].type)
> > +	  || !SAME_TYPE (unprom[0].type, out_type)))
> > +	return NULL;
> 
> AIUI, the !half_type case shouldn't look at unprom, since it's handling
> simple MINUS_EXPRs.  I think we can just delete this "if" statement.

If statement removed.

> > +      unprom[0].op = diff_oprnds[0];
> > +      unprom[1].op = diff_oprnds[1];
> > +      tree signed_out = signed_type_for (out_type);
> > +      tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> 
> We need to check for success here too.

Add a check.

> > +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> > +			   signed_out, unprom, signed_out_vectype);
> > +
> > +      if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
> > +	return NULL;
> 
> I don't think this is needed.

Statement removed.

> > +    }
> > +
> > +  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
> > +      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
> > +    return NULL;
> 
> I also don't think this is needed.  AIUI, the previous code has done
> all the necessary correctness checks.

Statements removed.

> > +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> > +
> > +  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);
> 
> I think instead we want the vector types computed above.  That is:
> 
> - The ABD should be done on the vector version of half_type
>   if the subtraction was on promoted inputs.  The result of
>   the ABD should then be zero-extended (using vect_convert_output)
>   to out_type.
> 
>   In particular, it's the sign of HALF_TYPE that decides whether
>   it's signed or unsigned ABD.
> 
> - The ABD should be done on the vector version of signed_outtype
>   if the subtraction was on unpromoted inputs.  We then might need
>   to sign-cast it to outtype, if outtype is unsigned.  We can
>   use vect_convert_output for that too.
> 
>   In other words, this case must use signed ABD.

In the half_type case out_type is set to be half_type.

Patch is in the next response.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/2] Missed opportunity to use [SU]ABD
  2023-05-23 14:27               ` [PATCH] vect: " Oluwatamilore Adebayo
@ 2023-05-23 14:34                 ` Oluwatamilore Adebayo
  2023-05-24  9:48                   ` Richard Sandiford
  0 siblings, 1 reply; 24+ messages in thread
From: Oluwatamilore Adebayo @ 2023-05-23 14:34 UTC (permalink / raw)
  To: oluwatamilore.adebayo; +Cc: gcc-patches, richard.guenther, richard.sandiford

From: oluade01 <oluwatamilore.adebayo@arm.com>

This adds a recognition pattern for the non-widening
absolute difference (ABD).

gcc/ChangeLog:

	* doc/md.texi (sabd, uabd): Document them.
	* internal-fn.def (ABD): Use new optab.
	* optabs.def (sabd_optab, uabd_optab): New optabs,
	* tree-vect-patterns.cc (vect_recog_absolute_difference):
	Recognize the following idiom abs (a - b).
	(vect_recog_sad_pattern): Refactor to use
	vect_recog_absolute_difference.
	(vect_recog_abd_pattern): Use patterns found by
	vect_recog_absolute_difference to build a new ABD
	internal call.
---
 gcc/doc/md.texi           |  10 ++
 gcc/internal-fn.def       |   3 +
 gcc/optabs.def            |   2 +
 gcc/tree-vect-patterns.cc | 231 +++++++++++++++++++++++++++++++++-----
 4 files changed, 215 insertions(+), 31 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..77c77c6ecb0c29ef764e914e88d1090c45fb9a9e 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
 Vector shift and rotate instructions that take vectors as operand 2
 instead of a scalar type.
 
+@cindex @code{uabd@var{m}} instruction pattern
+@cindex @code{sabd@var{m}} instruction pattern
+@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
+Signed and unsigned absolute difference instructions.  These
+instructions find the difference between operands 1 and 2
+then return the absolute value.  A C code equivalent would be:
+@smallexample
+op0 = op1 > op2 ? op1 - op2 : op2 - op1;
+@end smallexample
+
 @cindex @code{avg@var{m}3_floor} instruction pattern
 @cindex @code{uavg@var{m}3_floor} instruction pattern
 @item @samp{avg@var{m}3_floor}
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
+			      sabd, uabd, binary)
+
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
 			      savg_floor, uavg_floor, binary)
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
 OPTAB_D (extract_last_optab, "extract_last_$a")
 OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
 
+OPTAB_D (uabd_optab, "uabd$a3")
+OPTAB_D (sabd_optab, "sabd$a3")
 OPTAB_D (savg_floor_optab, "avg$a3_floor")
 OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
 OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index a49b09539776c0056e77f99b10365d0a8747fbc5..3a2248263cf67834a1cb41167a1783a3b6400014 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -770,6 +770,93 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
     }
 }
 
+/* Look for the following pattern
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+
+   ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
+   If REJECT_UNSIGNED is true it aborts if the type of ABS_STMT is unsigned.
+   HALF_TYPE and UNPROM will be set should the statement be found to
+   be a widened operation.
+   DIFF_OPRNDS will be set to the two inputs of the MINUS_EXPR preceding
+   ABS_STMT, otherwise it will be set the operations found by
+   vect_widened_op_tree.
+ */
+static bool
+vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
+				tree *half_type,
+				vect_unpromoted_value unprom[2],
+				tree diff_oprnds[2])
+{
+  if (!abs_stmt)
+    return false;
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
+  if (code != ABS_EXPR && code != ABSU_EXPR)
+    return false;
+
+  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
+  tree abs_type = TREE_TYPE (abs_oprnd);
+  if (!abs_oprnd)
+    return false;
+  if (!ANY_INTEGRAL_TYPE_P (abs_type)
+      || TYPE_OVERFLOW_WRAPS (abs_type)
+      || TYPE_UNSIGNED (abs_type))
+    return false;
+
+  /* Peel off conversions from the ABS input.  This can involve sign
+     changes (e.g.  from an unsigned subtraction to a signed ABS input)
+     or signed promotion, but it can't include unsigned promotion.
+     (Note that ABS of an unsigned promotion should have been folded
+     away before now anyway.)  */
+  vect_unpromoted_value unprom_diff;
+  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
+						    &unprom_diff);
+  if (!abs_oprnd)
+    return false;
+  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
+      && TYPE_UNSIGNED (unprom_diff.type)
+      && TYPE_UNSIGNED (abs_type))
+    return false;
+
+  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
+  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
+  if (!diff_stmt_vinfo)
+    return false;
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
+			    MINUS_EXPR, WIDEN_MINUS_EXPR,
+			    false, 2, unprom, half_type))
+    {
+      if (diff_oprnds)
+	{
+	  diff_oprnds[0] = unprom[0].op;
+	  diff_oprnds[1] = unprom[1].op;
+	}
+      return true;
+    }
+
+  // Failed to find a widen operation so we check for a regular MINUS_EXPR
+  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
+
+  if (diff_oprnds && diff
+      && gimple_assign_rhs_code (diff) == MINUS_EXPR)
+    {
+      diff_oprnds[0] = gimple_assign_rhs1 (diff);
+      diff_oprnds[1] = gimple_assign_rhs2 (diff);
+      *half_type = NULL_TREE;
+      return true;
+    }
+
+  return false;
+}
+
 /* Convert UNPROM to TYPE and return the result, adding new statements
    to STMT_INFO's pattern definition statements if no better way is
    available.  VECTYPE is the vector form of TYPE.
@@ -1308,40 +1395,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
      inside the loop (in case we are analyzing an outer-loop).  */
   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
-  if (!abs_stmt
-      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
-	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
-    return NULL;
-
-  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
-  tree abs_type = TREE_TYPE (abs_oprnd);
-  if (TYPE_UNSIGNED (abs_type))
-    return NULL;
-
-  /* Peel off conversions from the ABS input.  This can involve sign
-     changes (e.g. from an unsigned subtraction to a signed ABS input)
-     or signed promotion, but it can't include unsigned promotion.
-     (Note that ABS of an unsigned promotion should have been folded
-     away before now anyway.)  */
-  vect_unpromoted_value unprom_diff;
-  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
-						    &unprom_diff);
-  if (!abs_oprnd)
-    return NULL;
-  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
-      && TYPE_UNSIGNED (unprom_diff.type))
-    return NULL;
 
-  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
-  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
-  if (!diff_stmt_vinfo)
+  vect_unpromoted_value unprom[2];
+  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
+				       unprom, NULL))
     return NULL;
 
-  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
-     inside the loop (in case we are analyzing an outer-loop).  */
-  vect_unpromoted_value unprom[2];
-  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
-			     false, 2, unprom, &half_type))
+  if (!half_type)
     return NULL;
 
   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
@@ -1363,6 +1423,114 @@ vect_recog_sad_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+/* Function vect_recog_abd_pattern
+
+   Try to find the following ABsolute Difference (ABD) pattern:
+
+     VTYPE x, y, out;
+     type diff;
+   loop i in range:
+     S1 diff = x[i] - y[i]
+     S2 out[i] = ABS_EXPR <diff>;
+
+   where 'type' is a integer and 'VTYPE' is a vector of integers
+   the same size as 'type'
+
+   Input:
+
+   * STMT_VINFO: The stmt from which the pattern search begins
+
+   Output:
+
+   * TYPE_out: The type of the output of this pattern
+
+   * Return value: A new stmt that will be used to replace the sequence of
+     stmts that constitute the pattern; either SABD or UABD:
+	SABD_EXPR<x, y, out>
+	UABD_EXPR<x, y, out>
+
+      UABD expressions are used when the input types are
+      narrower than the output types or the output type is narrower
+      than 32 bits
+ */
+
+static gimple *
+vect_recog_abd_pattern (vec_info *vinfo,
+			stmt_vec_info stmt_vinfo, tree *type_out)
+{
+  /* Look for the following patterns
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+	out[i] = DAD
+
+     In which
+      - X, Y, DIFF, DAD all have the same type
+      - x, y, out are all vectors of the same type
+  */
+
+  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
+  if (!last_stmt)
+    return NULL;
+
+  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
+
+  vect_unpromoted_value unprom[2];
+  tree diff_oprnds[2];
+  tree half_type;
+  if (!vect_recog_absolute_difference (vinfo, last_stmt, &half_type,
+				       unprom, diff_oprnds))
+    return NULL;
+
+#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
+
+  tree vectype;
+  tree abd_oprnds[2];
+  if (half_type)
+    {
+      vectype = get_vectype_for_scalar_type (vinfo, half_type);
+      if (!vectype)
+	return NULL;
+
+      out_type = half_type;
+      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
+			   half_type, unprom, vectype);
+    }
+  else
+    {
+      unprom[0].op = diff_oprnds[0];
+      unprom[1].op = diff_oprnds[1];
+      tree signed_out = signed_type_for (out_type);
+      tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
+      if (!signed_out_vectype)
+	return NULL;
+
+      vectype = signed_out_vectype;
+      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
+			   signed_out, unprom, signed_out_vectype);
+
+      if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
+	return NULL;
+    }
+
+  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+
+  if (!vectype
+      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
+					  OPTIMIZE_FOR_SPEED))
+    return NULL;
+
+  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
+
+  tree var = vect_recog_temp_ssa_var (out_type, NULL);
+  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
+						abd_oprnds[0], abd_oprnds[1]);
+  gimple_call_set_lhs (abd_stmt, var);
+  gimple_set_location (abd_stmt, gimple_location (last_stmt));
+  return abd_stmt;
+}
+
 /* Recognize an operation that performs ORIG_CODE on widened inputs,
    so that it can be treated as though it had the form:
 
@@ -6439,6 +6607,7 @@ struct vect_recog_func
 static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
   { vect_recog_bit_insert_pattern, "bit_insert" },
+  { vect_recog_abd_pattern, "abd" },
   { vect_recog_over_widening_pattern, "over_widening" },
   /* Must come after over_widening, which narrows the shift as much as
      possible beforehand.  */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] Missed opportunity to use [SU]ABD
  2023-05-23 14:34                 ` [PATCH 1/2] " Oluwatamilore Adebayo
@ 2023-05-24  9:48                   ` Richard Sandiford
  2023-06-06  9:50                     ` [PATCH] vect: " Oluwatamilore Adebayo
  0 siblings, 1 reply; 24+ messages in thread
From: Richard Sandiford @ 2023-05-24  9:48 UTC (permalink / raw)
  To: Oluwatamilore Adebayo; +Cc: gcc-patches, richard.guenther

Thanks for the update.  Mostly LGTM, just some minor things left below.

Oluwatamilore Adebayo <oluwatamilore.adebayo@arm.com> writes:
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index a49b09539776c0056e77f99b10365d0a8747fbc5..3a2248263cf67834a1cb41167a1783a3b6400014 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -770,6 +770,93 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
>      }
>  }
>  
> +/* Look for the following pattern
> +	X = x[i]
> +	Y = y[i]
> +	DIFF = X - Y
> +	DAD = ABS_EXPR<DIFF>
> +
> +   ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
> +   If REJECT_UNSIGNED is true it aborts if the type of ABS_STMT is unsigned.

This line no longer applies.

> +   HALF_TYPE and UNPROM will be set should the statement be found to
> +   be a widened operation.
> +   DIFF_OPRNDS will be set to the two inputs of the MINUS_EXPR preceding
> +   ABS_STMT, otherwise it will be set the operations found by
> +   vect_widened_op_tree.
> + */
> +static bool
> +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> +				tree *half_type,
> +				vect_unpromoted_value unprom[2],
> +				tree diff_oprnds[2])
> +{
> +  if (!abs_stmt)
> +    return false;
> +
> +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> +     inside the loop (in case we are analyzing an outer-loop).  */
> +  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
> +  if (code != ABS_EXPR && code != ABSU_EXPR)
> +    return false;
> +
> +  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> +  tree abs_type = TREE_TYPE (abs_oprnd);
> +  if (!abs_oprnd)
> +    return false;
> +  if (!ANY_INTEGRAL_TYPE_P (abs_type)
> +      || TYPE_OVERFLOW_WRAPS (abs_type)
> +      || TYPE_UNSIGNED (abs_type))
> +    return false;
> +
> +  /* Peel off conversions from the ABS input.  This can involve sign
> +     changes (e.g.  from an unsigned subtraction to a signed ABS input)

Nit: should just be one space after "e.g."

> +     or signed promotion, but it can't include unsigned promotion.
> +     (Note that ABS of an unsigned promotion should have been folded
> +     away before now anyway.)  */
> +  vect_unpromoted_value unprom_diff;
> +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> +						    &unprom_diff);
> +  if (!abs_oprnd)
> +    return false;
> +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> +      && TYPE_UNSIGNED (unprom_diff.type)
> +      && TYPE_UNSIGNED (abs_type))

The last line is now redundant, since TYPE_UNSIGNED was checked above.

> +    return false;
> +
> +  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> +  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> +  if (!diff_stmt_vinfo)
> +    return false;
> +
> +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> +     inside the loop (in case we are analyzing an outer-loop).  */
> +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
> +			    MINUS_EXPR, WIDEN_MINUS_EXPR,
> +			    false, 2, unprom, half_type))
> +    {
> +      if (diff_oprnds)
> +	{
> +	  diff_oprnds[0] = unprom[0].op;
> +	  diff_oprnds[1] = unprom[1].op;
> +	}
> +      return true;
> +    }
> +
> +  // Failed to find a widen operation so we check for a regular MINUS_EXPR

Nit: please use /* ... */ for files that currently use that style.

> +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> +
> +  if (diff_oprnds && diff
> +      && gimple_assign_rhs_code (diff) == MINUS_EXPR)

We also need to check TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)),
since this case exploits undefined behaviour for signed subtraction.

> +    {
> +      diff_oprnds[0] = gimple_assign_rhs1 (diff);
> +      diff_oprnds[1] = gimple_assign_rhs2 (diff);
> +      *half_type = NULL_TREE;
> +      return true;
> +    }
> +
> +  return false;
> +}
> +
>  /* Convert UNPROM to TYPE and return the result, adding new statements
>     to STMT_INFO's pattern definition statements if no better way is
>     available.  VECTYPE is the vector form of TYPE.
> @@ -1308,40 +1395,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
>    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
>       inside the loop (in case we are analyzing an outer-loop).  */
>    gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
> -  if (!abs_stmt
> -      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
> -	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
> -    return NULL;
> -
> -  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> -  tree abs_type = TREE_TYPE (abs_oprnd);
> -  if (TYPE_UNSIGNED (abs_type))
> -    return NULL;
> -
> -  /* Peel off conversions from the ABS input.  This can involve sign
> -     changes (e.g. from an unsigned subtraction to a signed ABS input)
> -     or signed promotion, but it can't include unsigned promotion.
> -     (Note that ABS of an unsigned promotion should have been folded
> -     away before now anyway.)  */
> -  vect_unpromoted_value unprom_diff;
> -  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> -						    &unprom_diff);
> -  if (!abs_oprnd)
> -    return NULL;
> -  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> -      && TYPE_UNSIGNED (unprom_diff.type))
> -    return NULL;
>  
> -  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> -  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> -  if (!diff_stmt_vinfo)
> +  vect_unpromoted_value unprom[2];
> +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> +				       unprom, NULL))
>      return NULL;
>  
> -  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> -     inside the loop (in case we are analyzing an outer-loop).  */
> -  vect_unpromoted_value unprom[2];
> -  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
> -			     false, 2, unprom, &half_type))
> +  if (!half_type)
>      return NULL;
>  
>    vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
> @@ -1363,6 +1423,114 @@ vect_recog_sad_pattern (vec_info *vinfo,
>    return pattern_stmt;
>  }
>  
> +/* Function vect_recog_abd_pattern
> +
> +   Try to find the following ABsolute Difference (ABD) pattern:
> +
> +     VTYPE x, y, out;
> +     type diff;
> +   loop i in range:
> +     S1 diff = x[i] - y[i]
> +     S2 out[i] = ABS_EXPR <diff>;
> +
> +   where 'type' is a integer and 'VTYPE' is a vector of integers
> +   the same size as 'type'
> +
> +   Input:
> +
> +   * STMT_VINFO: The stmt from which the pattern search begins
> +
> +   Output:
> +
> +   * TYPE_out: The type of the output of this pattern
> +
> +   * Return value: A new stmt that will be used to replace the sequence of
> +     stmts that constitute the pattern; either SABD or UABD:
> +	SABD_EXPR<x, y, out>
> +	UABD_EXPR<x, y, out>
> +
> +      UABD expressions are used when the input types are
> +      narrower than the output types or the output type is narrower
> +      than 32 bits

Isn't it only the first (input types narrower than the output types)?
I guess the second part is referring to the fact that, in C, an operation
on sub-32-bit values will happen in int and then be narrowed back down.
But in gimple that's all explicit, and it's the widening to int that
allows UABD to be used.  (And UABD can then be used regardless of whether
the result is narrowed back down.)

> + */
> +
> +static gimple *
> +vect_recog_abd_pattern (vec_info *vinfo,
> +			stmt_vec_info stmt_vinfo, tree *type_out)
> +{
> +  /* Look for the following patterns
> +	X = x[i]
> +	Y = y[i]
> +	DIFF = X - Y
> +	DAD = ABS_EXPR<DIFF>
> +	out[i] = DAD
> +
> +     In which
> +      - X, Y, DIFF, DAD all have the same type
> +      - x, y, out are all vectors of the same type
> +  */
> +
> +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> +  if (!last_stmt)
> +    return NULL;
> +
> +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> +
> +  vect_unpromoted_value unprom[2];
> +  tree diff_oprnds[2];
> +  tree half_type;
> +  if (!vect_recog_absolute_difference (vinfo, last_stmt, &half_type,
> +				       unprom, diff_oprnds))
> +    return NULL;
> +
> +#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
> +
> +  tree vectype;
> +  tree abd_oprnds[2];
> +  if (half_type)
> +    {
> +      vectype = get_vectype_for_scalar_type (vinfo, half_type);
> +      if (!vectype)
> +	return NULL;
> +
> +      out_type = half_type;
> +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> +			   half_type, unprom, vectype);
> +    }
> +  else
> +    {
> +      unprom[0].op = diff_oprnds[0];
> +      unprom[1].op = diff_oprnds[1];
> +      tree signed_out = signed_type_for (out_type);
> +      tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> +      if (!signed_out_vectype)
> +	return NULL;
> +
> +      vectype = signed_out_vectype;

Might as well drop signed_out_vectype and use vectype directly.

> +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> +			   signed_out, unprom, signed_out_vectype);
> +
> +      if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
> +	return NULL;

This shouldn't be necesary.  The transformation would be correct
even if the subtraction is widened before the ABS, since we already
checked for signed promotion, and since we're only trying SABD.

Thanks,
Richard

> +    }
> +
> +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> +
> +  if (!vectype
> +      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> +					  OPTIMIZE_FOR_SPEED))
> +    return NULL;
> +
> +  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
> +
> +  tree var = vect_recog_temp_ssa_var (out_type, NULL);
> +  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> +						abd_oprnds[0], abd_oprnds[1]);
> +  gimple_call_set_lhs (abd_stmt, var);
> +  gimple_set_location (abd_stmt, gimple_location (last_stmt));
> +  return abd_stmt;
> +}
> +
>  /* Recognize an operation that performs ORIG_CODE on widened inputs,
>     so that it can be treated as though it had the form:
>  
> @@ -6439,6 +6607,7 @@ struct vect_recog_func
>  static vect_recog_func vect_vect_recog_func_ptrs[] = {
>    { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
>    { vect_recog_bit_insert_pattern, "bit_insert" },
> +  { vect_recog_abd_pattern, "abd" },
>    { vect_recog_over_widening_pattern, "over_widening" },
>    /* Must come after over_widening, which narrows the shift as much as
>       possible beforehand.  */

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] vect: Missed opportunity to use [SU]ABD
  2023-05-24  9:48                   ` Richard Sandiford
@ 2023-06-06  9:50                     ` Oluwatamilore Adebayo
  2023-06-06  9:53                       ` [PATCH 1/2] " Oluwatamilore Adebayo
  0 siblings, 1 reply; 24+ messages in thread
From: Oluwatamilore Adebayo @ 2023-06-06  9:50 UTC (permalink / raw)
  To: richard.sandiford; +Cc: gcc-patches, oluwatamilore.adebayo, richard.guenther

> > +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > +      && TYPE_UNSIGNED (unprom_diff.type)
> > +      && TYPE_UNSIGNED (abs_type))
> 
> The last line is now redundant, since TYPE_UNSIGNED was checked above.

Done.

> > +  // Failed to find a widen operation so we check for a regular MINUS_EXPR
> 
> Nit: please use /* ... */ for files that currently use that style.

Done.

> > +  if (diff_oprnds && diff
> > +      && gimple_assign_rhs_code (diff) == MINUS_EXPR)
> 
> We also need to check TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)),
> since this case exploits undefined behaviour for signed subtraction.

Done.

> > +      UABD expressions are used when the input types are
> > +      narrower than the output types or the output type is narrower
> > +      than 32 bits
> 
> Isn't it only the first (input types narrower than the output types)?
> I guess the second part is referring to the fact that, in C, an operation
> on sub-32-bit values will happen in int and then be narrowed back down.
> But in gimple that's all explicit, and it's the widening to int that
> allows UABD to be used.  (And UABD can then be used regardless of whether
> the result is narrowed back down.)

Dropped this block as it is no longer applicable. Must've been from a much
earlier version of this patch.

> Might as well drop signed_out_vectype and use vectype directly.

Done.

> > +      if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
> > +	return NULL;
> 
> This shouldn't be necesary.  The transformation would be correct
> even if the subtraction is widened before the ABS, since we already
> checked for signed promotion, and since we're only trying SABD.

Done.

Patch is in the next response.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/2] Missed opportunity to use [SU]ABD
  2023-06-06  9:50                     ` [PATCH] vect: " Oluwatamilore Adebayo
@ 2023-06-06  9:53                       ` Oluwatamilore Adebayo
  2023-06-06 12:56                         ` Richard Sandiford
  0 siblings, 1 reply; 24+ messages in thread
From: Oluwatamilore Adebayo @ 2023-06-06  9:53 UTC (permalink / raw)
  To: oluwatamilore.adebayo; +Cc: gcc-patches, richard.guenther, richard.sandiford

From: oluade01 <oluwatamilore.adebayo@arm.com>

This adds a recognition pattern for the non-widening
absolute difference (ABD).

gcc/ChangeLog:

	* doc/md.texi (sabd, uabd): Document them.
	* internal-fn.def (ABD): Use new optab.
	* optabs.def (sabd_optab, uabd_optab): New optabs,
	* tree-vect-patterns.cc (vect_recog_absolute_difference):
	Recognize the following idiom abs (a - b).
	(vect_recog_sad_pattern): Refactor to use
	vect_recog_absolute_difference.
	(vect_recog_abd_pattern): Use patterns found by
	vect_recog_absolute_difference to build a new ABD
	internal call.
---
 gcc/doc/md.texi           |  10 ++
 gcc/internal-fn.def       |   3 +
 gcc/optabs.def            |   2 +
 gcc/tree-vect-patterns.cc | 246 +++++++++++++++++++++++++++++++++-----
 4 files changed, 232 insertions(+), 29 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..77c77c6ecb0c29ef764e914e88d1090c45fb9a9e 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
 Vector shift and rotate instructions that take vectors as operand 2
 instead of a scalar type.
 
+@cindex @code{uabd@var{m}} instruction pattern
+@cindex @code{sabd@var{m}} instruction pattern
+@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
+Signed and unsigned absolute difference instructions.  These
+instructions find the difference between operands 1 and 2
+then return the absolute value.  A C code equivalent would be:
+@smallexample
+op0 = op1 > op2 ? op1 - op2 : op2 - op1;
+@end smallexample
+
 @cindex @code{avg@var{m}3_floor} instruction pattern
 @cindex @code{uavg@var{m}3_floor} instruction pattern
 @item @samp{avg@var{m}3_floor}
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
+			      sabd, uabd, binary)
+
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
 			      savg_floor, uavg_floor, binary)
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
 OPTAB_D (extract_last_optab, "extract_last_$a")
 OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
 
+OPTAB_D (uabd_optab, "uabd$a3")
+OPTAB_D (sabd_optab, "sabd$a3")
 OPTAB_D (savg_floor_optab, "avg$a3_floor")
 OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
 OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index a49b09539776c0056e77f99b10365d0a8747fbc5..96dfeb25cdc307e8a795af1fef41fac1c0195515 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -770,6 +770,86 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
     }
 }
 
+/* Look for the following pattern
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+
+   ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
+   HALF_TYPE and UNPROM will be set should the statement be found to
+   be a widened operation.
+   DIFF_OPRNDS will be set to the two inputs of the MINUS_EXPR preceding
+   ABS_STMT, otherwise it will be set the operations found by
+   vect_widened_op_tree.
+ */
+static bool
+vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
+				tree *half_type,
+				vect_unpromoted_value unprom[2],
+				gassign **diff_stmt)
+{
+  if (!abs_stmt)
+    return false;
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
+  if (code != ABS_EXPR && code != ABSU_EXPR)
+    return false;
+
+  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
+  tree abs_type = TREE_TYPE (abs_oprnd);
+  if (!abs_oprnd)
+    return false;
+  if (!ANY_INTEGRAL_TYPE_P (abs_type)
+      || TYPE_OVERFLOW_WRAPS (abs_type)
+      || TYPE_UNSIGNED (abs_type))
+    return false;
+
+  /* Peel off conversions from the ABS input.  This can involve sign
+     changes (e.g. from an unsigned subtraction to a signed ABS input)
+     or signed promotion, but it can't include unsigned promotion.
+     (Note that ABS of an unsigned promotion should have been folded
+     away before now anyway.)  */
+  vect_unpromoted_value unprom_diff;
+  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
+						    &unprom_diff);
+  if (!abs_oprnd)
+    return false;
+  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
+      && TYPE_UNSIGNED (unprom_diff.type))
+    return false;
+
+  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
+  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
+  if (!diff_stmt_vinfo)
+    return false;
+
+  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
+
+  if (diff_stmt)
+    *diff_stmt = diff;
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
+			    MINUS_EXPR, WIDEN_MINUS_EXPR,
+			    false, 2, unprom, half_type))
+    return true;
+
+  /* Failed to find a widen operation so we check for a regular MINUS_EXPR.  */
+  if (diff
+      && gimple_assign_rhs_code (diff) == MINUS_EXPR
+      && (TYPE_UNSIGNED (abs_type) || TYPE_OVERFLOW_UNDEFINED (abs_type)))
+    {
+      *half_type = NULL_TREE;
+      return true;
+    }
+
+  return false;
+}
+
 /* Convert UNPROM to TYPE and return the result, adding new statements
    to STMT_INFO's pattern definition statements if no better way is
    available.  VECTYPE is the vector form of TYPE.
@@ -1308,40 +1388,31 @@ vect_recog_sad_pattern (vec_info *vinfo,
   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
      inside the loop (in case we are analyzing an outer-loop).  */
   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
-  if (!abs_stmt
-      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
-	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
-    return NULL;
+  vect_unpromoted_value unprom[2];
 
-  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
-  tree abs_type = TREE_TYPE (abs_oprnd);
-  if (TYPE_UNSIGNED (abs_type))
-    return NULL;
+  if (!abs_stmt)
+    {
+      gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
+      if (!abd_stmt
+	  || !gimple_call_internal_p (abd_stmt)
+	  || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
+	return NULL;
 
-  /* Peel off conversions from the ABS input.  This can involve sign
-     changes (e.g. from an unsigned subtraction to a signed ABS input)
-     or signed promotion, but it can't include unsigned promotion.
-     (Note that ABS of an unsigned promotion should have been folded
-     away before now anyway.)  */
-  vect_unpromoted_value unprom_diff;
-  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
-						    &unprom_diff);
-  if (!abs_oprnd)
-    return NULL;
-  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
-      && TYPE_UNSIGNED (unprom_diff.type))
-    return NULL;
+      tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
+      tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
 
-  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
-  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
-  if (!diff_stmt_vinfo)
+      if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, &unprom[0])
+	  || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
+						    &unprom[1]))
+	return NULL;
+
+      half_type = unprom[0].type;
+    }
+  else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
+					    unprom, NULL))
     return NULL;
 
-  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
-     inside the loop (in case we are analyzing an outer-loop).  */
-  vect_unpromoted_value unprom[2];
-  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
-			     false, 2, unprom, &half_type))
+  if (!half_type)
     return NULL;
 
   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
@@ -1363,6 +1434,122 @@ vect_recog_sad_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+/* Function vect_recog_abd_pattern
+
+   Try to find the following ABsolute Difference (ABD) pattern:
+
+     VTYPE x, y, out;
+     type diff;
+   loop i in range:
+     S1 diff = x[i] - y[i]
+     S2 out[i] = ABS_EXPR <diff>;
+
+   where 'type' is a integer and 'VTYPE' is a vector of integers
+   the same size as 'type'
+
+   Input:
+
+   * STMT_VINFO: The stmt from which the pattern search begins
+
+   Output:
+
+   * TYPE_out: The type of the output of this pattern
+
+   * Return value: A new stmt that will be used to replace the sequence of
+     stmts that constitute the pattern; either SABD or UABD:
+	SABD_EXPR<x, y, out>
+	UABD_EXPR<x, y, out>
+ */
+
+static gimple *
+vect_recog_abd_pattern (vec_info *vinfo,
+			stmt_vec_info stmt_vinfo, tree *type_out)
+{
+  /* Look for the following patterns
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+	out[i] = DAD
+
+     In which
+      - X, Y, DIFF, DAD all have the same type
+      - x, y, out are all vectors of the same type
+  */
+
+  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
+  if (!last_stmt)
+    return NULL;
+
+  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
+
+  vect_unpromoted_value unprom[2];
+  gassign *diff_stmt;
+  tree half_type;
+  if (!vect_recog_absolute_difference (vinfo, last_stmt, &half_type,
+				       unprom, &diff_stmt))
+    return NULL;
+
+#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
+
+  tree abd_type = out_type, vectype;
+  tree abd_oprnds[2];
+  bool extend = false;
+  if (half_type)
+    {
+      vectype = get_vectype_for_scalar_type (vinfo, half_type);
+      if (!vectype)
+	return NULL;
+
+      abd_type = half_type;
+      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
+			   half_type, unprom, vectype);
+
+      extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type);
+    }
+  else
+    {
+      unprom[0].op = gimple_assign_rhs1 (diff_stmt);
+      unprom[1].op = gimple_assign_rhs2 (diff_stmt);
+      tree signed_out = signed_type_for (out_type);
+      vectype = get_vectype_for_scalar_type (vinfo, signed_out);
+      if (!vectype)
+	return NULL;
+
+      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
+			   signed_out, unprom, vectype);
+    }
+
+  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+
+  if (!vectype
+      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
+					  OPTIMIZE_FOR_SPEED))
+    return NULL;
+
+  *type_out = get_vectype_for_scalar_type (vinfo, out_type);
+
+  tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL);
+  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
+						abd_oprnds[0], abd_oprnds[1]);
+  gimple_call_set_lhs (abd_stmt, abd_result);
+  gimple_set_location (abd_stmt, gimple_location (last_stmt));
+
+  if (!extend)
+    return abd_stmt;
+
+  gimple *stmt = abd_stmt;
+  if (!TYPE_UNSIGNED (abd_type))
+    {
+      tree unsign = unsigned_type_for (abd_type);
+      tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
+      stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
+				  unsign_vectype);
+    }
+
+  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype);
+}
+
 /* Recognize an operation that performs ORIG_CODE on widened inputs,
    so that it can be treated as though it had the form:
 
@@ -6439,6 +6626,7 @@ struct vect_recog_func
 static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
   { vect_recog_bit_insert_pattern, "bit_insert" },
+  { vect_recog_abd_pattern, "abd" },
   { vect_recog_over_widening_pattern, "over_widening" },
   /* Must come after over_widening, which narrows the shift as much as
      possible beforehand.  */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] Missed opportunity to use [SU]ABD
  2023-06-06  9:53                       ` [PATCH 1/2] " Oluwatamilore Adebayo
@ 2023-06-06 12:56                         ` Richard Sandiford
  2023-06-06 14:34                           ` Oluwatamilore Adebayo
  0 siblings, 1 reply; 24+ messages in thread
From: Richard Sandiford @ 2023-06-06 12:56 UTC (permalink / raw)
  To: Oluwatamilore Adebayo; +Cc: gcc-patches, richard.guenther

Oluwatamilore Adebayo <oluwatamilore.adebayo@arm.com> writes:
> From: oluade01 <oluwatamilore.adebayo@arm.com>
>
> This adds a recognition pattern for the non-widening
> absolute difference (ABD).
>
> gcc/ChangeLog:
>
> 	* doc/md.texi (sabd, uabd): Document them.
> 	* internal-fn.def (ABD): Use new optab.
> 	* optabs.def (sabd_optab, uabd_optab): New optabs,
> 	* tree-vect-patterns.cc (vect_recog_absolute_difference):
> 	Recognize the following idiom abs (a - b).
> 	(vect_recog_sad_pattern): Refactor to use
> 	vect_recog_absolute_difference.
> 	(vect_recog_abd_pattern): Use patterns found by
> 	vect_recog_absolute_difference to build a new ABD
> 	internal call.

Thanks.  I was going to push this with a trivial change, namely that:

> +#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))

...this is no longer needed.

But it turns out that this conflicts with a patch that Andre
pushed yesterday (2f482a07365d9f4a94a56edd13b7f01b8f78b5a0).
Could you rebase on top of that and send the new patch?
It should just be a case of using IFN_WIDEN_MINUS instead of
WIDEN_MINUS_EXPR.

Thanks, and sorry for the hassle.

Richard

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/2] Missed opportunity to use [SU]ABD
  2023-06-06 12:56                         ` Richard Sandiford
@ 2023-06-06 14:34                           ` Oluwatamilore Adebayo
  2023-06-08 10:28                             ` [PATCH] vect: " Oluwatamilore Adebayo
  0 siblings, 1 reply; 24+ messages in thread
From: Oluwatamilore Adebayo @ 2023-06-06 14:34 UTC (permalink / raw)
  To: richard.sandiford; +Cc: gcc-patches, oluwatamilore.adebayo, richard.guenther

From: oluade01 <oluwatamilore.adebayo@arm.com>

This adds a recognition pattern for the non-widening
absolute difference (ABD).

gcc/ChangeLog:

	* doc/md.texi (sabd, uabd): Document them.
	* internal-fn.def (ABD): Use new optab.
	* optabs.def (sabd_optab, uabd_optab): New optabs,
	* tree-vect-patterns.cc (vect_recog_absolute_difference):
	Recognize the following idiom abs (a - b).
	(vect_recog_sad_pattern): Refactor to use
	vect_recog_absolute_difference.
	(vect_recog_abd_pattern): Use patterns found by
	vect_recog_absolute_difference to build a new ABD
	internal call.
---
 gcc/doc/md.texi           |  10 ++
 gcc/internal-fn.def       |   3 +
 gcc/optabs.def            |   2 +
 gcc/tree-vect-patterns.cc | 245 +++++++++++++++++++++++++++++++++-----
 4 files changed, 230 insertions(+), 30 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 6a435eb44610960513e9739ac9ac1e8a27182c10..e11b10d2fca11016232921bc85e47975f700e6c6 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5787,6 +5787,16 @@ Other shift and rotate instructions, analogous to the
 Vector shift and rotate instructions that take vectors as operand 2
 instead of a scalar type.
 
+@cindex @code{uabd@var{m}} instruction pattern
+@cindex @code{sabd@var{m}} instruction pattern
+@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
+Signed and unsigned absolute difference instructions.  These
+instructions find the difference between operands 1 and 2
+then return the absolute value.  A C code equivalent would be:
+@smallexample
+op0 = op1 > op2 ? op1 - op2 : op2 - op1;
+@end smallexample
+
 @cindex @code{avg@var{m}3_floor} instruction pattern
 @cindex @code{uavg@var{m}3_floor} instruction pattern
 @item @samp{avg@var{m}3_floor}
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 3ac9d82aace322bd8ef108596e5583daa18c76e3..116965f4830cec8f60642ff011a86b6562e2c509 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -191,6 +191,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
+			      sabd, uabd, binary)
+
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
 			      savg_floor, uavg_floor, binary)
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 6c064ff4993620067d38742a0bfe0a3efb511069..35b835a6ac56d72417dac8ddfd77a8a7e2475e65 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
 OPTAB_D (extract_last_optab, "extract_last_$a")
 OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
 
+OPTAB_D (uabd_optab, "uabd$a3")
+OPTAB_D (sabd_optab, "sabd$a3")
 OPTAB_D (savg_floor_optab, "avg$a3_floor")
 OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
 OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index dc102c919352a0328cf86eabceb3a38c41a7e4fd..4f74161196973de96cc21c09960b77f01e0ba613 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -782,6 +782,86 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
     }
 }
 
+/* Look for the following pattern
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+
+   ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
+   HALF_TYPE and UNPROM will be set should the statement be found to
+   be a widened operation.
+   DIFF_OPRNDS will be set to the two inputs of the MINUS_EXPR preceding
+   ABS_STMT, otherwise it will be set the operations found by
+   vect_widened_op_tree.
+ */
+static bool
+vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
+				tree *half_type,
+				vect_unpromoted_value unprom[2],
+				gassign **diff_stmt)
+{
+  if (!abs_stmt)
+    return false;
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
+  if (code != ABS_EXPR && code != ABSU_EXPR)
+    return false;
+
+  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
+  tree abs_type = TREE_TYPE (abs_oprnd);
+  if (!abs_oprnd)
+    return false;
+  if (!ANY_INTEGRAL_TYPE_P (abs_type)
+      || TYPE_OVERFLOW_WRAPS (abs_type)
+      || TYPE_UNSIGNED (abs_type))
+    return false;
+
+  /* Peel off conversions from the ABS input.  This can involve sign
+     changes (e.g. from an unsigned subtraction to a signed ABS input)
+     or signed promotion, but it can't include unsigned promotion.
+     (Note that ABS of an unsigned promotion should have been folded
+     away before now anyway.)  */
+  vect_unpromoted_value unprom_diff;
+  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
+						    &unprom_diff);
+  if (!abs_oprnd)
+    return false;
+  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
+      && TYPE_UNSIGNED (unprom_diff.type))
+    return false;
+
+  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
+  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
+  if (!diff_stmt_vinfo)
+    return false;
+
+  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
+
+  if (diff_stmt)
+    *diff_stmt = diff;
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
+			    MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
+			    false, 2, unprom, half_type))
+    return true;
+
+  /* Failed to find a widen operation so we check for a regular MINUS_EXPR.  */
+  if (diff
+      && gimple_assign_rhs_code (diff) == MINUS_EXPR
+      && (TYPE_UNSIGNED (abs_type) || TYPE_OVERFLOW_UNDEFINED (abs_type)))
+    {
+      *half_type = NULL_TREE;
+      return true;
+    }
+
+  return false;
+}
+
 /* Convert UNPROM to TYPE and return the result, adding new statements
    to STMT_INFO's pattern definition statements if no better way is
    available.  VECTYPE is the vector form of TYPE.
@@ -1320,41 +1400,31 @@ vect_recog_sad_pattern (vec_info *vinfo,
   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
      inside the loop (in case we are analyzing an outer-loop).  */
   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
-  if (!abs_stmt
-      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
-	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
-    return NULL;
+  vect_unpromoted_value unprom[2];
 
-  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
-  tree abs_type = TREE_TYPE (abs_oprnd);
-  if (TYPE_UNSIGNED (abs_type))
-    return NULL;
+  if (!abs_stmt)
+    {
+      gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
+      if (!abd_stmt
+	  || !gimple_call_internal_p (abd_stmt)
+	  || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
+	return NULL;
 
-  /* Peel off conversions from the ABS input.  This can involve sign
-     changes (e.g. from an unsigned subtraction to a signed ABS input)
-     or signed promotion, but it can't include unsigned promotion.
-     (Note that ABS of an unsigned promotion should have been folded
-     away before now anyway.)  */
-  vect_unpromoted_value unprom_diff;
-  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
-						    &unprom_diff);
-  if (!abs_oprnd)
-    return NULL;
-  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
-      && TYPE_UNSIGNED (unprom_diff.type))
-    return NULL;
+      tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
+      tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
 
-  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
-  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
-  if (!diff_stmt_vinfo)
+      if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, &unprom[0])
+	  || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
+						    &unprom[1]))
+	return NULL;
+
+      half_type = unprom[0].type;
+    }
+  else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
+					    unprom, NULL))
     return NULL;
 
-  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
-     inside the loop (in case we are analyzing an outer-loop).  */
-  vect_unpromoted_value unprom[2];
-  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
-			     IFN_VEC_WIDEN_MINUS,
-			     false, 2, unprom, &half_type))
+  if (!half_type)
     return NULL;
 
   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
@@ -1376,6 +1446,120 @@ vect_recog_sad_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+/* Function vect_recog_abd_pattern
+
+   Try to find the following ABsolute Difference (ABD) pattern:
+
+     VTYPE x, y, out;
+     type diff;
+   loop i in range:
+     S1 diff = x[i] - y[i]
+     S2 out[i] = ABS_EXPR <diff>;
+
+   where 'type' is a integer and 'VTYPE' is a vector of integers
+   the same size as 'type'
+
+   Input:
+
+   * STMT_VINFO: The stmt from which the pattern search begins
+
+   Output:
+
+   * TYPE_out: The type of the output of this pattern
+
+   * Return value: A new stmt that will be used to replace the sequence of
+     stmts that constitute the pattern; either SABD or UABD:
+	SABD_EXPR<x, y, out>
+	UABD_EXPR<x, y, out>
+ */
+
+static gimple *
+vect_recog_abd_pattern (vec_info *vinfo,
+			stmt_vec_info stmt_vinfo, tree *type_out)
+{
+  /* Look for the following patterns
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+	out[i] = DAD
+
+     In which
+      - X, Y, DIFF, DAD all have the same type
+      - x, y, out are all vectors of the same type
+  */
+
+  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
+  if (!last_stmt)
+    return NULL;
+
+  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
+
+  vect_unpromoted_value unprom[2];
+  gassign *diff_stmt;
+  tree half_type;
+  if (!vect_recog_absolute_difference (vinfo, last_stmt, &half_type,
+				       unprom, &diff_stmt))
+    return NULL;
+
+  tree abd_type = out_type, vectype;
+  tree abd_oprnds[2];
+  bool extend = false;
+  if (half_type)
+    {
+      vectype = get_vectype_for_scalar_type (vinfo, half_type);
+      if (!vectype)
+	return NULL;
+
+      abd_type = half_type;
+      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
+			   half_type, unprom, vectype);
+
+      extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type);
+    }
+  else
+    {
+      unprom[0].op = gimple_assign_rhs1 (diff_stmt);
+      unprom[1].op = gimple_assign_rhs2 (diff_stmt);
+      tree signed_out = signed_type_for (out_type);
+      vectype = get_vectype_for_scalar_type (vinfo, signed_out);
+      if (!vectype)
+	return NULL;
+
+      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
+			   signed_out, unprom, vectype);
+    }
+
+  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+
+  if (!vectype
+      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
+					  OPTIMIZE_FOR_SPEED))
+    return NULL;
+
+  *type_out = get_vectype_for_scalar_type (vinfo, out_type);
+
+  tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL);
+  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
+						abd_oprnds[0], abd_oprnds[1]);
+  gimple_call_set_lhs (abd_stmt, abd_result);
+  gimple_set_location (abd_stmt, gimple_location (last_stmt));
+
+  if (!extend)
+    return abd_stmt;
+
+  gimple *stmt = abd_stmt;
+  if (!TYPE_UNSIGNED (abd_type))
+    {
+      tree unsign = unsigned_type_for (abd_type);
+      tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
+      stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
+				  unsign_vectype);
+    }
+
+  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype);
+}
+
 /* Recognize an operation that performs ORIG_CODE on widened inputs,
    so that it can be treated as though it had the form:
 
@@ -6471,6 +6655,7 @@ struct vect_recog_func
 static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
   { vect_recog_bit_insert_pattern, "bit_insert" },
+  { vect_recog_abd_pattern, "abd" },
   { vect_recog_over_widening_pattern, "over_widening" },
   /* Must come after over_widening, which narrows the shift as much as
      possible beforehand.  */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] vect: Missed opportunity to use [SU]ABD
  2023-06-06 14:34                           ` Oluwatamilore Adebayo
@ 2023-06-08 10:28                             ` Oluwatamilore Adebayo
  2023-06-08 10:31                               ` [PATCH 1/2] " Oluwatamilore Adebayo
  0 siblings, 1 reply; 24+ messages in thread
From: Oluwatamilore Adebayo @ 2023-06-08 10:28 UTC (permalink / raw)
  To: oluwatamilore.adebayo; +Cc: gcc-patches, richard.guenther, richard.sandiford

New patch to address issue brought up in a different
thread: mptjzwgplp2.fsf@arm.com

> > +  /* Failed to find a widen operation so we check for a regular MINUS_EXPR.  */
> > +  if (diff
> > +      && gimple_assign_rhs_code (diff) == MINUS_EXPR
> > +      && (TYPE_UNSIGNED (abs_type) || TYPE_OVERFLOW_UNDEFINED (abs_type)))
> > +    {
> > +      *half_type = NULL_TREE;
> > +      return true;
> > +    }
> 
> the condition should instead be:
> 
>   if (diff
>       && gimple_assign_rhs_code (diff) == MINUS_EXPR
>       && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
>     {
>       *half_type = NULL_TREE;
>       return true;
>     }
> 
> That is, we rely on overflow being undefined, so we need to check
> TYPE_OVERFLOW_UNDEFINED on the type of the subtraction (rather than
> abs_type, which is the type of ABS input, and at this point can be
> different from TREE_TYPE (abs_oprnd)).

I found that doing this alone would get rid of cases which otherwise
should have gone through so I added an extra step such that if this
part fails we'll try to find the unpromoted diff operands and then
try the type overflow check on the types of the unpromoted operands.

Patch is in the next response.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/2] Missed opportunity to use [SU]ABD
  2023-06-08 10:28                             ` [PATCH] vect: " Oluwatamilore Adebayo
@ 2023-06-08 10:31                               ` Oluwatamilore Adebayo
  2023-06-13  8:26                                 ` Oluwatamilore Adebayo
  0 siblings, 1 reply; 24+ messages in thread
From: Oluwatamilore Adebayo @ 2023-06-08 10:31 UTC (permalink / raw)
  To: oluwatamilore.adebayo; +Cc: gcc-patches, richard.guenther, richard.sandiford

From: oluade01 <oluwatamilore.adebayo@arm.com>

This adds a recognition pattern for the non-widening
absolute difference (ABD).

gcc/ChangeLog:

	* doc/md.texi (sabd, uabd): Document them.
	* internal-fn.def (ABD): Use new optab.
	* optabs.def (sabd_optab, uabd_optab): New optabs,
	* tree-vect-patterns.cc (vect_recog_absolute_difference):
	Recognize the following idiom abs (a - b).
	(vect_recog_sad_pattern): Refactor to use
	vect_recog_absolute_difference.
	(vect_recog_abd_pattern): Use patterns found by
	vect_recog_absolute_difference to build a new ABD
	internal call.
---
 gcc/doc/md.texi           |  10 ++
 gcc/internal-fn.def       |   3 +
 gcc/optabs.def            |   2 +
 gcc/tree-vect-patterns.cc | 259 +++++++++++++++++++++++++++++++++-----
 4 files changed, 244 insertions(+), 30 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 6a435eb44610960513e9739ac9ac1e8a27182c10..e11b10d2fca11016232921bc85e47975f700e6c6 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5787,6 +5787,16 @@ Other shift and rotate instructions, analogous to the
 Vector shift and rotate instructions that take vectors as operand 2
 instead of a scalar type.
 
+@cindex @code{uabd@var{m}} instruction pattern
+@cindex @code{sabd@var{m}} instruction pattern
+@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
+Signed and unsigned absolute difference instructions.  These
+instructions find the difference between operands 1 and 2
+then return the absolute value.  A C code equivalent would be:
+@smallexample
+op0 = op1 > op2 ? op1 - op2 : op2 - op1;
+@end smallexample
+
 @cindex @code{avg@var{m}3_floor} instruction pattern
 @cindex @code{uavg@var{m}3_floor} instruction pattern
 @item @samp{avg@var{m}3_floor}
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 3ac9d82aace322bd8ef108596e5583daa18c76e3..116965f4830cec8f60642ff011a86b6562e2c509 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -191,6 +191,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
+			      sabd, uabd, binary)
+
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
 			      savg_floor, uavg_floor, binary)
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 6c064ff4993620067d38742a0bfe0a3efb511069..35b835a6ac56d72417dac8ddfd77a8a7e2475e65 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
 OPTAB_D (extract_last_optab, "extract_last_$a")
 OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
 
+OPTAB_D (uabd_optab, "uabd$a3")
+OPTAB_D (sabd_optab, "sabd$a3")
 OPTAB_D (savg_floor_optab, "avg$a3_floor")
 OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
 OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index dc102c919352a0328cf86eabceb3a38c41a7e4fd..7296892aaa07da59b8122d29a22a2f583e8ff5aa 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -782,6 +782,100 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
     }
 }
 
+/* Look for the following pattern
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+
+   ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
+   HALF_TYPE and UNPROM will be set should the statement be found to
+   be a widened operation.
+   DIFF_STMT will be set to the MINUS_EXPR
+   statement that precedes the ABS_STMT unless vect_widened_op_tree
+   succeeds.
+ */
+static bool
+vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
+				tree *half_type,
+				vect_unpromoted_value unprom[2],
+				gassign **diff_stmt)
+{
+  if (!abs_stmt)
+    return false;
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
+  if (code != ABS_EXPR && code != ABSU_EXPR)
+    return false;
+
+  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
+  tree abs_type = TREE_TYPE (abs_oprnd);
+  if (!abs_oprnd)
+    return false;
+  if (!ANY_INTEGRAL_TYPE_P (abs_type)
+      || TYPE_OVERFLOW_WRAPS (abs_type)
+      || TYPE_UNSIGNED (abs_type))
+    return false;
+
+  /* Peel off conversions from the ABS input.  This can involve sign
+     changes (e.g. from an unsigned subtraction to a signed ABS input)
+     or signed promotion, but it can't include unsigned promotion.
+     (Note that ABS of an unsigned promotion should have been folded
+     away before now anyway.)  */
+  vect_unpromoted_value unprom_diff;
+  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
+						    &unprom_diff);
+  if (!abs_oprnd)
+    return false;
+  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
+      && TYPE_UNSIGNED (unprom_diff.type))
+    return false;
+
+  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
+  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
+  if (!diff_stmt_vinfo)
+    return false;
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
+			    MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
+			    false, 2, unprom, half_type))
+    return true;
+
+  /* Failed to find a widen operation so we check for a regular MINUS_EXPR.  */
+  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
+  if (!diff || gimple_assign_rhs_code (diff) != MINUS_EXPR)
+    return false;
+
+  if (diff_stmt)
+    *diff_stmt = diff;
+
+  if (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
+    {
+      *half_type = NULL_TREE;
+      return true;
+    }
+
+  vect_unpromoted_value diff0, diff1;
+  if (vect_look_through_possible_promotion (vinfo, gimple_assign_rhs1 (diff),
+					     &diff0)
+      && vect_look_through_possible_promotion (vinfo, gimple_assign_rhs2 (diff),
+						&diff1))
+    {
+      if (TYPE_OVERFLOW_UNDEFINED (diff0.type)
+	  && TYPE_OVERFLOW_UNDEFINED (diff1.type))
+	{
+	  *half_type = NULL_TREE;
+	  return true;
+	}
+    }
+
+  return false;
+}
+
 /* Convert UNPROM to TYPE and return the result, adding new statements
    to STMT_INFO's pattern definition statements if no better way is
    available.  VECTYPE is the vector form of TYPE.
@@ -1320,41 +1414,31 @@ vect_recog_sad_pattern (vec_info *vinfo,
   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
      inside the loop (in case we are analyzing an outer-loop).  */
   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
-  if (!abs_stmt
-      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
-	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
-    return NULL;
+  vect_unpromoted_value unprom[2];
 
-  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
-  tree abs_type = TREE_TYPE (abs_oprnd);
-  if (TYPE_UNSIGNED (abs_type))
-    return NULL;
+  if (!abs_stmt)
+    {
+      gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
+      if (!abd_stmt
+	  || !gimple_call_internal_p (abd_stmt)
+	  || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
+	return NULL;
 
-  /* Peel off conversions from the ABS input.  This can involve sign
-     changes (e.g. from an unsigned subtraction to a signed ABS input)
-     or signed promotion, but it can't include unsigned promotion.
-     (Note that ABS of an unsigned promotion should have been folded
-     away before now anyway.)  */
-  vect_unpromoted_value unprom_diff;
-  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
-						    &unprom_diff);
-  if (!abs_oprnd)
-    return NULL;
-  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
-      && TYPE_UNSIGNED (unprom_diff.type))
-    return NULL;
+      tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
+      tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
 
-  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
-  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
-  if (!diff_stmt_vinfo)
+      if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, &unprom[0])
+	  || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
+						    &unprom[1]))
+	return NULL;
+
+      half_type = unprom[0].type;
+    }
+  else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
+					    unprom, NULL))
     return NULL;
 
-  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
-     inside the loop (in case we are analyzing an outer-loop).  */
-  vect_unpromoted_value unprom[2];
-  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
-			     IFN_VEC_WIDEN_MINUS,
-			     false, 2, unprom, &half_type))
+  if (!half_type)
     return NULL;
 
   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
@@ -1376,6 +1460,120 @@ vect_recog_sad_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+/* Function vect_recog_abd_pattern
+
+   Try to find the following ABsolute Difference (ABD) pattern:
+
+     VTYPE x, y, out;
+     type diff;
+   loop i in range:
+     S1 diff = x[i] - y[i]
+     S2 out[i] = ABS_EXPR <diff>;
+
+   where 'type' is a integer and 'VTYPE' is a vector of integers
+   the same size as 'type'
+
+   Input:
+
+   * STMT_VINFO: The stmt from which the pattern search begins
+
+   Output:
+
+   * TYPE_out: The type of the output of this pattern
+
+   * Return value: A new stmt that will be used to replace the sequence of
+     stmts that constitute the pattern; either SABD or UABD:
+	SABD_EXPR<x, y, out>
+	UABD_EXPR<x, y, out>
+ */
+
+static gimple *
+vect_recog_abd_pattern (vec_info *vinfo,
+			stmt_vec_info stmt_vinfo, tree *type_out)
+{
+  /* Look for the following patterns
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+	out[i] = DAD
+
+     In which
+      - X, Y, DIFF, DAD all have the same type
+      - x, y, out are all vectors of the same type
+  */
+
+  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
+  if (!last_stmt)
+    return NULL;
+
+  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
+
+  vect_unpromoted_value unprom[2];
+  gassign *diff_stmt;
+  tree half_type;
+  if (!vect_recog_absolute_difference (vinfo, last_stmt, &half_type,
+				       unprom, &diff_stmt))
+    return NULL;
+
+  tree abd_type = out_type, vectype;
+  tree abd_oprnds[2];
+  bool extend = false;
+  if (half_type)
+    {
+      vectype = get_vectype_for_scalar_type (vinfo, half_type);
+      if (!vectype)
+	return NULL;
+
+      abd_type = half_type;
+      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
+			   half_type, unprom, vectype);
+
+      extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type);
+    }
+  else
+    {
+      unprom[0].op = gimple_assign_rhs1 (diff_stmt);
+      unprom[1].op = gimple_assign_rhs2 (diff_stmt);
+      tree signed_out = signed_type_for (out_type);
+      vectype = get_vectype_for_scalar_type (vinfo, signed_out);
+      if (!vectype)
+	return NULL;
+
+      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
+			   signed_out, unprom, vectype);
+    }
+
+  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+
+  if (!vectype
+      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
+					  OPTIMIZE_FOR_SPEED))
+    return NULL;
+
+  *type_out = get_vectype_for_scalar_type (vinfo, out_type);
+
+  tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL);
+  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
+						abd_oprnds[0], abd_oprnds[1]);
+  gimple_call_set_lhs (abd_stmt, abd_result);
+  gimple_set_location (abd_stmt, gimple_location (last_stmt));
+
+  if (!extend)
+    return abd_stmt;
+
+  gimple *stmt = abd_stmt;
+  if (!TYPE_UNSIGNED (abd_type))
+    {
+      tree unsign = unsigned_type_for (abd_type);
+      tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
+      stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
+				  unsign_vectype);
+    }
+
+  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype);
+}
+
 /* Recognize an operation that performs ORIG_CODE on widened inputs,
    so that it can be treated as though it had the form:
 
@@ -6471,6 +6669,7 @@ struct vect_recog_func
 static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
   { vect_recog_bit_insert_pattern, "bit_insert" },
+  { vect_recog_abd_pattern, "abd" },
   { vect_recog_over_widening_pattern, "over_widening" },
   /* Must come after over_widening, which narrows the shift as much as
      possible beforehand.  */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/2] Missed opportunity to use [SU]ABD
  2023-06-08 10:31                               ` [PATCH 1/2] " Oluwatamilore Adebayo
@ 2023-06-13  8:26                                 ` Oluwatamilore Adebayo
  2023-06-14 11:15                                   ` Richard Sandiford
  0 siblings, 1 reply; 24+ messages in thread
From: Oluwatamilore Adebayo @ 2023-06-13  8:26 UTC (permalink / raw)
  To: oluwatamilore.adebayo; +Cc: gcc-patches, richard.guenther, richard.sandiford

From: oluade01 <oluwatamilore.adebayo@arm.com>

This adds a recognition pattern for the non-widening
absolute difference (ABD).

gcc/ChangeLog:

	* doc/md.texi (sabd, uabd): Document them.
	* internal-fn.def (ABD): Use new optab.
	* optabs.def (sabd_optab, uabd_optab): New optabs,
	* tree-vect-patterns.cc (vect_recog_absolute_difference):
	Recognize the following idiom abs (a - b).
	(vect_recog_sad_pattern): Refactor to use
	vect_recog_absolute_difference.
	(vect_recog_abd_pattern): Use patterns found by
	vect_recog_absolute_difference to build a new ABD
	internal call.
---
 gcc/doc/md.texi           |  10 ++
 gcc/internal-fn.def       |   3 +
 gcc/optabs.def            |   2 +
 gcc/tree-vect-patterns.cc | 245 +++++++++++++++++++++++++++++++++-----
 4 files changed, 230 insertions(+), 30 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 6a435eb44610960513e9739ac9ac1e8a27182c10..e11b10d2fca11016232921bc85e47975f700e6c6 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5787,6 +5787,16 @@ Other shift and rotate instructions, analogous to the
 Vector shift and rotate instructions that take vectors as operand 2
 instead of a scalar type.
 
+@cindex @code{uabd@var{m}} instruction pattern
+@cindex @code{sabd@var{m}} instruction pattern
+@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
+Signed and unsigned absolute difference instructions.  These
+instructions find the difference between operands 1 and 2
+then return the absolute value.  A C code equivalent would be:
+@smallexample
+op0 = op1 > op2 ? op1 - op2 : op2 - op1;
+@end smallexample
+
 @cindex @code{avg@var{m}3_floor} instruction pattern
 @cindex @code{uavg@var{m}3_floor} instruction pattern
 @item @samp{avg@var{m}3_floor}
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 3ac9d82aace322bd8ef108596e5583daa18c76e3..116965f4830cec8f60642ff011a86b6562e2c509 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -191,6 +191,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
+			      sabd, uabd, binary)
+
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
 			      savg_floor, uavg_floor, binary)
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 6c064ff4993620067d38742a0bfe0a3efb511069..35b835a6ac56d72417dac8ddfd77a8a7e2475e65 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
 OPTAB_D (extract_last_optab, "extract_last_$a")
 OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
 
+OPTAB_D (uabd_optab, "uabd$a3")
+OPTAB_D (sabd_optab, "sabd$a3")
 OPTAB_D (savg_floor_optab, "avg$a3_floor")
 OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
 OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index dc102c919352a0328cf86eabceb3a38c41a7e4fd..4b63febc33e90b3caa854404a241afb2f09d755e 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -782,6 +782,86 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
     }
 }
 
+/* Look for the following pattern
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+
+   ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
+   HALF_TYPE and UNPROM will be set should the statement be found to
+   be a widened operation.
+   DIFF_STMT will be set to the MINUS_EXPR
+   statement that precedes the ABS_STMT unless vect_widened_op_tree
+   succeeds.
+ */
+static bool
+vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
+				tree *half_type,
+				vect_unpromoted_value unprom[2],
+				gassign **diff_stmt)
+{
+  if (!abs_stmt)
+    return false;
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
+  if (code != ABS_EXPR && code != ABSU_EXPR)
+    return false;
+
+  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
+  tree abs_type = TREE_TYPE (abs_oprnd);
+  if (!abs_oprnd)
+    return false;
+  if (!ANY_INTEGRAL_TYPE_P (abs_type)
+      || TYPE_OVERFLOW_WRAPS (abs_type)
+      || TYPE_UNSIGNED (abs_type))
+    return false;
+
+  /* Peel off conversions from the ABS input.  This can involve sign
+     changes (e.g. from an unsigned subtraction to a signed ABS input)
+     or signed promotion, but it can't include unsigned promotion.
+     (Note that ABS of an unsigned promotion should have been folded
+     away before now anyway.)  */
+  vect_unpromoted_value unprom_diff;
+  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
+						    &unprom_diff);
+  if (!abs_oprnd)
+    return false;
+  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
+      && TYPE_UNSIGNED (unprom_diff.type))
+    return false;
+
+  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
+  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
+  if (!diff_stmt_vinfo)
+    return false;
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
+			    MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
+			    false, 2, unprom, half_type))
+    return true;
+
+  /* Failed to find a widen operation so we check for a regular MINUS_EXPR.  */
+  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
+  if (!diff || gimple_assign_rhs_code (diff) != MINUS_EXPR)
+    return false;
+
+  if (diff_stmt)
+    *diff_stmt = diff;
+
+  if (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
+    {
+      *half_type = NULL_TREE;
+      return true;
+    }
+
+  return false;
+}
+
 /* Convert UNPROM to TYPE and return the result, adding new statements
    to STMT_INFO's pattern definition statements if no better way is
    available.  VECTYPE is the vector form of TYPE.
@@ -1320,41 +1400,31 @@ vect_recog_sad_pattern (vec_info *vinfo,
   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
      inside the loop (in case we are analyzing an outer-loop).  */
   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
-  if (!abs_stmt
-      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
-	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
-    return NULL;
+  vect_unpromoted_value unprom[2];
 
-  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
-  tree abs_type = TREE_TYPE (abs_oprnd);
-  if (TYPE_UNSIGNED (abs_type))
-    return NULL;
+  if (!abs_stmt)
+    {
+      gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
+      if (!abd_stmt
+	  || !gimple_call_internal_p (abd_stmt)
+	  || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
+	return NULL;
 
-  /* Peel off conversions from the ABS input.  This can involve sign
-     changes (e.g. from an unsigned subtraction to a signed ABS input)
-     or signed promotion, but it can't include unsigned promotion.
-     (Note that ABS of an unsigned promotion should have been folded
-     away before now anyway.)  */
-  vect_unpromoted_value unprom_diff;
-  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
-						    &unprom_diff);
-  if (!abs_oprnd)
-    return NULL;
-  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
-      && TYPE_UNSIGNED (unprom_diff.type))
-    return NULL;
+      tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
+      tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
 
-  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
-  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
-  if (!diff_stmt_vinfo)
+      if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, &unprom[0])
+	  || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
+						    &unprom[1]))
+	return NULL;
+
+      half_type = unprom[0].type;
+    }
+  else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
+					    unprom, NULL))
     return NULL;
 
-  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
-     inside the loop (in case we are analyzing an outer-loop).  */
-  vect_unpromoted_value unprom[2];
-  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
-			     IFN_VEC_WIDEN_MINUS,
-			     false, 2, unprom, &half_type))
+  if (!half_type)
     return NULL;
 
   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
@@ -1376,6 +1446,120 @@ vect_recog_sad_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+/* Function vect_recog_abd_pattern
+
+   Try to find the following ABsolute Difference (ABD) pattern:
+
+     VTYPE x, y, out;
+     type diff;
+   loop i in range:
+     S1 diff = x[i] - y[i]
+     S2 out[i] = ABS_EXPR <diff>;
+
+   where 'type' is a integer and 'VTYPE' is a vector of integers
+   the same size as 'type'
+
+   Input:
+
+   * STMT_VINFO: The stmt from which the pattern search begins
+
+   Output:
+
+   * TYPE_out: The type of the output of this pattern
+
+   * Return value: A new stmt that will be used to replace the sequence of
+     stmts that constitute the pattern; either SABD or UABD:
+	SABD_EXPR<x, y, out>
+	UABD_EXPR<x, y, out>
+ */
+
+static gimple *
+vect_recog_abd_pattern (vec_info *vinfo,
+			stmt_vec_info stmt_vinfo, tree *type_out)
+{
+  /* Look for the following patterns
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+	out[i] = DAD
+
+     In which
+      - X, Y, DIFF, DAD all have the same type
+      - x, y, out are all vectors of the same type
+  */
+
+  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
+  if (!last_stmt)
+    return NULL;
+
+  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
+
+  vect_unpromoted_value unprom[2];
+  gassign *diff_stmt;
+  tree half_type;
+  if (!vect_recog_absolute_difference (vinfo, last_stmt, &half_type,
+				       unprom, &diff_stmt))
+    return NULL;
+
+  tree abd_type = out_type, vectype;
+  tree abd_oprnds[2];
+  bool extend = false;
+  if (half_type)
+    {
+      vectype = get_vectype_for_scalar_type (vinfo, half_type);
+      if (!vectype)
+	return NULL;
+
+      abd_type = half_type;
+      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
+			   half_type, unprom, vectype);
+
+      extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type);
+    }
+  else
+    {
+      unprom[0].op = gimple_assign_rhs1 (diff_stmt);
+      unprom[1].op = gimple_assign_rhs2 (diff_stmt);
+      tree signed_out = signed_type_for (out_type);
+      vectype = get_vectype_for_scalar_type (vinfo, signed_out);
+      if (!vectype)
+	return NULL;
+
+      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
+			   signed_out, unprom, vectype);
+    }
+
+  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+
+  if (!vectype
+      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
+					  OPTIMIZE_FOR_SPEED))
+    return NULL;
+
+  *type_out = get_vectype_for_scalar_type (vinfo, out_type);
+
+  tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL);
+  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
+						abd_oprnds[0], abd_oprnds[1]);
+  gimple_call_set_lhs (abd_stmt, abd_result);
+  gimple_set_location (abd_stmt, gimple_location (last_stmt));
+
+  if (!extend)
+    return abd_stmt;
+
+  gimple *stmt = abd_stmt;
+  if (!TYPE_UNSIGNED (abd_type))
+    {
+      tree unsign = unsigned_type_for (abd_type);
+      tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
+      stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
+				  unsign_vectype);
+    }
+
+  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype);
+}
+
 /* Recognize an operation that performs ORIG_CODE on widened inputs,
    so that it can be treated as though it had the form:
 
@@ -6471,6 +6655,7 @@ struct vect_recog_func
 static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
   { vect_recog_bit_insert_pattern, "bit_insert" },
+  { vect_recog_abd_pattern, "abd" },
   { vect_recog_over_widening_pattern, "over_widening" },
   /* Must come after over_widening, which narrows the shift as much as
      possible beforehand.  */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] Missed opportunity to use [SU]ABD
  2023-06-13  8:26                                 ` Oluwatamilore Adebayo
@ 2023-06-14 11:15                                   ` Richard Sandiford
  2023-06-14 15:26                                     ` Oluwatamilore Adebayo
  0 siblings, 1 reply; 24+ messages in thread
From: Richard Sandiford @ 2023-06-14 11:15 UTC (permalink / raw)
  To: Oluwatamilore Adebayo; +Cc: gcc-patches, richard.guenther

Oluwatamilore Adebayo <oluwatamilore.adebayo@arm.com> writes:
> From: oluade01 <oluwatamilore.adebayo@arm.com>
>
> This adds a recognition pattern for the non-widening
> absolute difference (ABD).
>
> gcc/ChangeLog:
>
> 	* doc/md.texi (sabd, uabd): Document them.
> 	* internal-fn.def (ABD): Use new optab.
> 	* optabs.def (sabd_optab, uabd_optab): New optabs,
> 	* tree-vect-patterns.cc (vect_recog_absolute_difference):
> 	Recognize the following idiom abs (a - b).
> 	(vect_recog_sad_pattern): Refactor to use
> 	vect_recog_absolute_difference.
> 	(vect_recog_abd_pattern): Use patterns found by
> 	vect_recog_absolute_difference to build a new ABD
> 	internal call.
> ---
>  gcc/doc/md.texi           |  10 ++
>  gcc/internal-fn.def       |   3 +
>  gcc/optabs.def            |   2 +
>  gcc/tree-vect-patterns.cc | 245 +++++++++++++++++++++++++++++++++-----
>  4 files changed, 230 insertions(+), 30 deletions(-)
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 6a435eb44610960513e9739ac9ac1e8a27182c10..e11b10d2fca11016232921bc85e47975f700e6c6 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5787,6 +5787,16 @@ Other shift and rotate instructions, analogous to the
>  Vector shift and rotate instructions that take vectors as operand 2
>  instead of a scalar type.
>  
> +@cindex @code{uabd@var{m}} instruction pattern
> +@cindex @code{sabd@var{m}} instruction pattern
> +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
> +Signed and unsigned absolute difference instructions.  These
> +instructions find the difference between operands 1 and 2
> +then return the absolute value.  A C code equivalent would be:
> +@smallexample
> +op0 = op1 > op2 ? op1 - op2 : op2 - op1;
> +@end smallexample
> +
>  @cindex @code{avg@var{m}3_floor} instruction pattern
>  @cindex @code{uavg@var{m}3_floor} instruction pattern
>  @item @samp{avg@var{m}3_floor}
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 3ac9d82aace322bd8ef108596e5583daa18c76e3..116965f4830cec8f60642ff011a86b6562e2c509 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -191,6 +191,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
>  DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
>  DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
>  
> +DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
> +			      sabd, uabd, binary)
> +
>  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
>  			      savg_floor, uavg_floor, binary)
>  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 6c064ff4993620067d38742a0bfe0a3efb511069..35b835a6ac56d72417dac8ddfd77a8a7e2475e65 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
>  OPTAB_D (extract_last_optab, "extract_last_$a")
>  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
>  
> +OPTAB_D (uabd_optab, "uabd$a3")
> +OPTAB_D (sabd_optab, "sabd$a3")
>  OPTAB_D (savg_floor_optab, "avg$a3_floor")
>  OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
>  OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index dc102c919352a0328cf86eabceb3a38c41a7e4fd..4b63febc33e90b3caa854404a241afb2f09d755e 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -782,6 +782,86 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
>      }
>  }
>  
> +/* Look for the following pattern
> +	X = x[i]
> +	Y = y[i]
> +	DIFF = X - Y
> +	DAD = ABS_EXPR<DIFF>
> +
> +   ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
> +   HALF_TYPE and UNPROM will be set should the statement be found to
> +   be a widened operation.
> +   DIFF_STMT will be set to the MINUS_EXPR
> +   statement that precedes the ABS_STMT unless vect_widened_op_tree
> +   succeeds.
> + */
> +static bool
> +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> +				tree *half_type,
> +				vect_unpromoted_value unprom[2],
> +				gassign **diff_stmt)
> +{
> +  if (!abs_stmt)
> +    return false;
> +
> +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> +     inside the loop (in case we are analyzing an outer-loop).  */
> +  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
> +  if (code != ABS_EXPR && code != ABSU_EXPR)
> +    return false;
> +
> +  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> +  tree abs_type = TREE_TYPE (abs_oprnd);
> +  if (!abs_oprnd)
> +    return false;
> +  if (!ANY_INTEGRAL_TYPE_P (abs_type)
> +      || TYPE_OVERFLOW_WRAPS (abs_type)
> +      || TYPE_UNSIGNED (abs_type))
> +    return false;
> +
> +  /* Peel off conversions from the ABS input.  This can involve sign
> +     changes (e.g. from an unsigned subtraction to a signed ABS input)
> +     or signed promotion, but it can't include unsigned promotion.
> +     (Note that ABS of an unsigned promotion should have been folded
> +     away before now anyway.)  */
> +  vect_unpromoted_value unprom_diff;
> +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> +						    &unprom_diff);
> +  if (!abs_oprnd)
> +    return false;
> +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> +      && TYPE_UNSIGNED (unprom_diff.type))
> +    return false;
> +
> +  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> +  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> +  if (!diff_stmt_vinfo)
> +    return false;
> +
> +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> +     inside the loop (in case we are analyzing an outer-loop).  */
> +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
> +			    MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
> +			    false, 2, unprom, half_type))
> +    return true;
> +
> +  /* Failed to find a widen operation so we check for a regular MINUS_EXPR.  */
> +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> +  if (!diff || gimple_assign_rhs_code (diff) != MINUS_EXPR)
> +    return false;
> +
> +  if (diff_stmt)
> +    *diff_stmt = diff;
> +
> +  if (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
> +    {
> +      *half_type = NULL_TREE;
> +      return true;
> +    }

An earlier version had:

+  if (diff_oprnds && diff
+      && gimple_assign_rhs_code (diff) == MINUS_EXPR)

which IMO correctly made this path conditional on whether the caller
was prepared to accept a MINUS_EXPR on unwidened inputs.  The code
above instead returns true for such a MINUS_EXPR even if diff_stmt
is null.

So I think this should be:

  /* Failed to find a widen operation so we check for a regular MINUS_EXPR.  */
  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
  if (diff_stmt
      && diff
      && gimple_assign_rhs_code (diff) == MINUS_EXPR
      && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
    {
      *diff_stmt = diff;
      *half_type = NULL_TREE;
      return true;
    }

With that change, there would be no need for a separate half_type
check...

> +
> +  return false;
> +}
> +
>  /* Convert UNPROM to TYPE and return the result, adding new statements
>     to STMT_INFO's pattern definition statements if no better way is
>     available.  VECTYPE is the vector form of TYPE.
> @@ -1320,41 +1400,31 @@ vect_recog_sad_pattern (vec_info *vinfo,
>    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
>       inside the loop (in case we are analyzing an outer-loop).  */
>    gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
> -  if (!abs_stmt
> -      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
> -	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
> -    return NULL;
> +  vect_unpromoted_value unprom[2];
>  
> -  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> -  tree abs_type = TREE_TYPE (abs_oprnd);
> -  if (TYPE_UNSIGNED (abs_type))
> -    return NULL;
> +  if (!abs_stmt)
> +    {
> +      gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
> +      if (!abd_stmt
> +	  || !gimple_call_internal_p (abd_stmt)
> +	  || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
> +	return NULL;
>  
> -  /* Peel off conversions from the ABS input.  This can involve sign
> -     changes (e.g. from an unsigned subtraction to a signed ABS input)
> -     or signed promotion, but it can't include unsigned promotion.
> -     (Note that ABS of an unsigned promotion should have been folded
> -     away before now anyway.)  */
> -  vect_unpromoted_value unprom_diff;
> -  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> -						    &unprom_diff);
> -  if (!abs_oprnd)
> -    return NULL;
> -  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> -      && TYPE_UNSIGNED (unprom_diff.type))
> -    return NULL;
> +      tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
> +      tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
>  
> -  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> -  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> -  if (!diff_stmt_vinfo)
> +      if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, &unprom[0])
> +	  || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
> +						    &unprom[1]))
> +	return NULL;
> +
> +      half_type = unprom[0].type;
> +    }
> +  else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> +					    unprom, NULL))
>      return NULL;
>  
> -  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> -     inside the loop (in case we are analyzing an outer-loop).  */
> -  vect_unpromoted_value unprom[2];
> -  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
> -			     IFN_VEC_WIDEN_MINUS,
> -			     false, 2, unprom, &half_type))
> +  if (!half_type)
>      return NULL;

...here.

>  
>    vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
> @@ -1376,6 +1446,120 @@ vect_recog_sad_pattern (vec_info *vinfo,
>    return pattern_stmt;
>  }
>  
> +/* Function vect_recog_abd_pattern
> +
> +   Try to find the following ABsolute Difference (ABD) pattern:
> +
> +     VTYPE x, y, out;
> +     type diff;
> +   loop i in range:
> +     S1 diff = x[i] - y[i]
> +     S2 out[i] = ABS_EXPR <diff>;
> +
> +   where 'type' is a integer and 'VTYPE' is a vector of integers
> +   the same size as 'type'
> +
> +   Input:
> +
> +   * STMT_VINFO: The stmt from which the pattern search begins
> +
> +   Output:
> +
> +   * TYPE_out: The type of the output of this pattern
> +
> +   * Return value: A new stmt that will be used to replace the sequence of
> +     stmts that constitute the pattern; either SABD or UABD:
> +	SABD_EXPR<x, y, out>
> +	UABD_EXPR<x, y, out>
> + */
> +
> +static gimple *
> +vect_recog_abd_pattern (vec_info *vinfo,
> +			stmt_vec_info stmt_vinfo, tree *type_out)
> +{
> +  /* Look for the following patterns
> +	X = x[i]
> +	Y = y[i]
> +	DIFF = X - Y
> +	DAD = ABS_EXPR<DIFF>
> +	out[i] = DAD
> +
> +     In which
> +      - X, Y, DIFF, DAD all have the same type
> +      - x, y, out are all vectors of the same type
> +  */
> +
> +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> +  if (!last_stmt)
> +    return NULL;
> +
> +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> +
> +  vect_unpromoted_value unprom[2];
> +  gassign *diff_stmt;
> +  tree half_type;
> +  if (!vect_recog_absolute_difference (vinfo, last_stmt, &half_type,
> +				       unprom, &diff_stmt))
> +    return NULL;
> +
> +  tree abd_type = out_type, vectype;
> +  tree abd_oprnds[2];
> +  bool extend = false;
> +  if (half_type)
> +    {
> +      vectype = get_vectype_for_scalar_type (vinfo, half_type);
> +      if (!vectype)
> +	return NULL;
> +
> +      abd_type = half_type;
> +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> +			   half_type, unprom, vectype);
> +
> +      extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type);
> +    }
> +  else
> +    {
> +      unprom[0].op = gimple_assign_rhs1 (diff_stmt);
> +      unprom[1].op = gimple_assign_rhs2 (diff_stmt);
> +      tree signed_out = signed_type_for (out_type);
> +      vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> +      if (!vectype)
> +	return NULL;
> +
> +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> +			   signed_out, unprom, vectype);
> +    }

We should postpone doing the vect_convert_inputs until...

> +
> +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> +
> +  if (!vectype
> +      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> +					  OPTIMIZE_FOR_SPEED))
> +    return NULL;

...here.  The following should work for both cases:

      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
			   TREE_TYPE (vectype), unprom, vectype);

LGTM otherwise, thanks.

Richard

> +
> +  *type_out = get_vectype_for_scalar_type (vinfo, out_type);
> +
> +  tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL);
> +  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> +						abd_oprnds[0], abd_oprnds[1]);
> +  gimple_call_set_lhs (abd_stmt, abd_result);
> +  gimple_set_location (abd_stmt, gimple_location (last_stmt));
> +
> +  if (!extend)
> +    return abd_stmt;
> +
> +  gimple *stmt = abd_stmt;
> +  if (!TYPE_UNSIGNED (abd_type))
> +    {
> +      tree unsign = unsigned_type_for (abd_type);
> +      tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
> +      stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
> +				  unsign_vectype);
> +    }
> +
> +  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype);
> +}
> +
>  /* Recognize an operation that performs ORIG_CODE on widened inputs,
>     so that it can be treated as though it had the form:
>  
> @@ -6471,6 +6655,7 @@ struct vect_recog_func
>  static vect_recog_func vect_vect_recog_func_ptrs[] = {
>    { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
>    { vect_recog_bit_insert_pattern, "bit_insert" },
> +  { vect_recog_abd_pattern, "abd" },
>    { vect_recog_over_widening_pattern, "over_widening" },
>    /* Must come after over_widening, which narrows the shift as much as
>       possible beforehand.  */

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/2] Missed opportunity to use [SU]ABD
  2023-06-14 11:15                                   ` Richard Sandiford
@ 2023-06-14 15:26                                     ` Oluwatamilore Adebayo
  2023-06-15  6:38                                       ` Richard Sandiford
  0 siblings, 1 reply; 24+ messages in thread
From: Oluwatamilore Adebayo @ 2023-06-14 15:26 UTC (permalink / raw)
  To: richard.sandiford; +Cc: gcc-patches, oluwatamilore.adebayo, richard.guenther

From: oluade01 <oluwatamilore.adebayo@arm.com>

This adds a recognition pattern for the non-widening
absolute difference (ABD).

gcc/ChangeLog:

	* doc/md.texi (sabd, uabd): Document them.
	* internal-fn.def (ABD): Use new optab.
	* optabs.def (sabd_optab, uabd_optab): New optabs,
	* tree-vect-patterns.cc (vect_recog_absolute_difference):
	Recognize the following idiom abs (a - b).
	(vect_recog_sad_pattern): Refactor to use
	vect_recog_absolute_difference.
	(vect_recog_abd_pattern): Use patterns found by
	vect_recog_absolute_difference to build a new ABD
	internal call.
---
 gcc/doc/md.texi           |  10 ++
 gcc/internal-fn.def       |   3 +
 gcc/optabs.def            |   2 +
 gcc/tree-vect-patterns.cc | 233 +++++++++++++++++++++++++++++++++-----
 4 files changed, 217 insertions(+), 31 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 6a435eb44610960513e9739ac9ac1e8a27182c10..e11b10d2fca11016232921bc85e47975f700e6c6 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5787,6 +5787,16 @@ Other shift and rotate instructions, analogous to the
 Vector shift and rotate instructions that take vectors as operand 2
 instead of a scalar type.
 
+@cindex @code{uabd@var{m}} instruction pattern
+@cindex @code{sabd@var{m}} instruction pattern
+@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
+Signed and unsigned absolute difference instructions.  These
+instructions find the difference between operands 1 and 2
+then return the absolute value.  A C code equivalent would be:
+@smallexample
+op0 = op1 > op2 ? op1 - op2 : op2 - op1;
+@end smallexample
+
 @cindex @code{avg@var{m}3_floor} instruction pattern
 @cindex @code{uavg@var{m}3_floor} instruction pattern
 @item @samp{avg@var{m}3_floor}
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 3ac9d82aace322bd8ef108596e5583daa18c76e3..116965f4830cec8f60642ff011a86b6562e2c509 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -191,6 +191,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
+			      sabd, uabd, binary)
+
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
 			      savg_floor, uavg_floor, binary)
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 6c064ff4993620067d38742a0bfe0a3efb511069..35b835a6ac56d72417dac8ddfd77a8a7e2475e65 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
 OPTAB_D (extract_last_optab, "extract_last_$a")
 OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
 
+OPTAB_D (uabd_optab, "uabd$a3")
+OPTAB_D (sabd_optab, "sabd$a3")
 OPTAB_D (savg_floor_optab, "avg$a3_floor")
 OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
 OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index dc102c919352a0328cf86eabceb3a38c41a7e4fd..e2392113bff4065c909aefc760b4c48978b73a5a 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -782,6 +782,83 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
     }
 }
 
+/* Look for the following pattern
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+
+   ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
+   HALF_TYPE and UNPROM will be set should the statement be found to
+   be a widened operation.
+   DIFF_STMT will be set to the MINUS_EXPR
+   statement that precedes the ABS_STMT unless vect_widened_op_tree
+   succeeds.
+ */
+static bool
+vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
+				tree *half_type,
+				vect_unpromoted_value unprom[2],
+				gassign **diff_stmt)
+{
+  if (!abs_stmt)
+    return false;
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
+  if (code != ABS_EXPR && code != ABSU_EXPR)
+    return false;
+
+  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
+  tree abs_type = TREE_TYPE (abs_oprnd);
+  if (!abs_oprnd)
+    return false;
+  if (!ANY_INTEGRAL_TYPE_P (abs_type)
+      || TYPE_OVERFLOW_WRAPS (abs_type)
+      || TYPE_UNSIGNED (abs_type))
+    return false;
+
+  /* Peel off conversions from the ABS input.  This can involve sign
+     changes (e.g. from an unsigned subtraction to a signed ABS input)
+     or signed promotion, but it can't include unsigned promotion.
+     (Note that ABS of an unsigned promotion should have been folded
+     away before now anyway.)  */
+  vect_unpromoted_value unprom_diff;
+  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
+						    &unprom_diff);
+  if (!abs_oprnd)
+    return false;
+  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
+      && TYPE_UNSIGNED (unprom_diff.type))
+    return false;
+
+  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
+  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
+  if (!diff_stmt_vinfo)
+    return false;
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
+			    MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
+			    false, 2, unprom, half_type))
+    return true;
+
+  /* Failed to find a widen operation so we check for a regular MINUS_EXPR.  */
+  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
+  if (diff_stmt && diff
+      && gimple_assign_rhs_code (diff) == MINUS_EXPR
+      && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
+    {
+      *diff_stmt = diff;
+      *half_type = NULL_TREE;
+      return true;
+    }
+
+  return false;
+}
+
 /* Convert UNPROM to TYPE and return the result, adding new statements
    to STMT_INFO's pattern definition statements if no better way is
    available.  VECTYPE is the vector form of TYPE.
@@ -1320,41 +1397,28 @@ vect_recog_sad_pattern (vec_info *vinfo,
   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
      inside the loop (in case we are analyzing an outer-loop).  */
   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
-  if (!abs_stmt
-      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
-	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
-    return NULL;
+  vect_unpromoted_value unprom[2];
 
-  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
-  tree abs_type = TREE_TYPE (abs_oprnd);
-  if (TYPE_UNSIGNED (abs_type))
-    return NULL;
+  if (!abs_stmt)
+    {
+      gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
+      if (!abd_stmt
+	  || !gimple_call_internal_p (abd_stmt)
+	  || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
+	return NULL;
 
-  /* Peel off conversions from the ABS input.  This can involve sign
-     changes (e.g. from an unsigned subtraction to a signed ABS input)
-     or signed promotion, but it can't include unsigned promotion.
-     (Note that ABS of an unsigned promotion should have been folded
-     away before now anyway.)  */
-  vect_unpromoted_value unprom_diff;
-  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
-						    &unprom_diff);
-  if (!abs_oprnd)
-    return NULL;
-  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
-      && TYPE_UNSIGNED (unprom_diff.type))
-    return NULL;
+      tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
+      tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
 
-  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
-  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
-  if (!diff_stmt_vinfo)
-    return NULL;
+      if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, &unprom[0])
+	  || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
+						    &unprom[1]))
+	return NULL;
 
-  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
-     inside the loop (in case we are analyzing an outer-loop).  */
-  vect_unpromoted_value unprom[2];
-  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
-			     IFN_VEC_WIDEN_MINUS,
-			     false, 2, unprom, &half_type))
+      half_type = unprom[0].type;
+    }
+  else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
+					    unprom, NULL))
     return NULL;
 
   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
@@ -1376,6 +1440,112 @@ vect_recog_sad_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+/* Function vect_recog_abd_pattern
+
+   Try to find the following ABsolute Difference (ABD) pattern:
+
+     VTYPE x, y, out;
+     type diff;
+   loop i in range:
+     S1 diff = x[i] - y[i]
+     S2 out[i] = ABS_EXPR <diff>;
+
+   where 'type' is a integer and 'VTYPE' is a vector of integers
+   the same size as 'type'
+
+   Input:
+
+   * STMT_VINFO: The stmt from which the pattern search begins
+
+   Output:
+
+   * TYPE_out: The type of the output of this pattern
+
+   * Return value: A new stmt that will be used to replace the sequence of
+     stmts that constitute the pattern; either SABD or UABD:
+	SABD_EXPR<x, y, out>
+	UABD_EXPR<x, y, out>
+ */
+
+static gimple *
+vect_recog_abd_pattern (vec_info *vinfo,
+			stmt_vec_info stmt_vinfo, tree *type_out)
+{
+  /* Look for the following patterns
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+	out[i] = DAD
+
+     In which
+      - X, Y, DIFF, DAD all have the same type
+      - x, y, out are all vectors of the same type
+  */
+
+  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
+  if (!last_stmt)
+    return NULL;
+
+  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
+
+  vect_unpromoted_value unprom[2];
+  gassign *diff_stmt;
+  tree half_type;
+  if (!vect_recog_absolute_difference (vinfo, last_stmt, &half_type,
+				       unprom, &diff_stmt))
+    return NULL;
+
+  tree abd_type = out_type, vectype;
+  tree abd_oprnds[2];
+  bool extend = false;
+  if (half_type)
+    {
+      vectype = get_vectype_for_scalar_type (vinfo, half_type);
+      abd_type = half_type;
+      extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type);
+    }
+  else
+    {
+      unprom[0].op = gimple_assign_rhs1 (diff_stmt);
+      unprom[1].op = gimple_assign_rhs2 (diff_stmt);
+      tree signed_out = signed_type_for (out_type);
+      vectype = get_vectype_for_scalar_type (vinfo, signed_out);
+    }
+
+  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+
+  if (!vectype
+      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
+					  OPTIMIZE_FOR_SPEED))
+    return NULL;
+
+  vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
+		       TREE_TYPE (vectype), unprom, vectype);
+
+  *type_out = get_vectype_for_scalar_type (vinfo, out_type);
+
+  tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL);
+  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
+						abd_oprnds[0], abd_oprnds[1]);
+  gimple_call_set_lhs (abd_stmt, abd_result);
+  gimple_set_location (abd_stmt, gimple_location (last_stmt));
+
+  if (!extend)
+    return abd_stmt;
+
+  gimple *stmt = abd_stmt;
+  if (!TYPE_UNSIGNED (abd_type))
+    {
+      tree unsign = unsigned_type_for (abd_type);
+      tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
+      stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
+				  unsign_vectype);
+    }
+
+  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype);
+}
+
 /* Recognize an operation that performs ORIG_CODE on widened inputs,
    so that it can be treated as though it had the form:
 
@@ -6471,6 +6641,7 @@ struct vect_recog_func
 static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
   { vect_recog_bit_insert_pattern, "bit_insert" },
+  { vect_recog_abd_pattern, "abd" },
   { vect_recog_over_widening_pattern, "over_widening" },
   /* Must come after over_widening, which narrows the shift as much as
      possible beforehand.  */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] Missed opportunity to use [SU]ABD
  2023-06-14 15:26                                     ` Oluwatamilore Adebayo
@ 2023-06-15  6:38                                       ` Richard Sandiford
  0 siblings, 0 replies; 24+ messages in thread
From: Richard Sandiford @ 2023-06-15  6:38 UTC (permalink / raw)
  To: Oluwatamilore Adebayo; +Cc: gcc-patches, richard.guenther

Oluwatamilore Adebayo <oluwatamilore.adebayo@arm.com> writes:
> From: oluade01 <oluwatamilore.adebayo@arm.com>
>
> This adds a recognition pattern for the non-widening
> absolute difference (ABD).
>
> gcc/ChangeLog:
>
> 	* doc/md.texi (sabd, uabd): Document them.
> 	* internal-fn.def (ABD): Use new optab.
> 	* optabs.def (sabd_optab, uabd_optab): New optabs,
> 	* tree-vect-patterns.cc (vect_recog_absolute_difference):
> 	Recognize the following idiom abs (a - b).
> 	(vect_recog_sad_pattern): Refactor to use
> 	vect_recog_absolute_difference.
> 	(vect_recog_abd_pattern): Use patterns found by
> 	vect_recog_absolute_difference to build a new ABD
> 	internal call.

Thanks, both patches pushed to trunk after bootstrap & regtest on
aarch64-linux-gnu and x86_64-linux-gnu.

Richard

> ---
>  gcc/doc/md.texi           |  10 ++
>  gcc/internal-fn.def       |   3 +
>  gcc/optabs.def            |   2 +
>  gcc/tree-vect-patterns.cc | 233 +++++++++++++++++++++++++++++++++-----
>  4 files changed, 217 insertions(+), 31 deletions(-)
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 6a435eb44610960513e9739ac9ac1e8a27182c10..e11b10d2fca11016232921bc85e47975f700e6c6 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5787,6 +5787,16 @@ Other shift and rotate instructions, analogous to the
>  Vector shift and rotate instructions that take vectors as operand 2
>  instead of a scalar type.
>  
> +@cindex @code{uabd@var{m}} instruction pattern
> +@cindex @code{sabd@var{m}} instruction pattern
> +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
> +Signed and unsigned absolute difference instructions.  These
> +instructions find the difference between operands 1 and 2
> +then return the absolute value.  A C code equivalent would be:
> +@smallexample
> +op0 = op1 > op2 ? op1 - op2 : op2 - op1;
> +@end smallexample
> +
>  @cindex @code{avg@var{m}3_floor} instruction pattern
>  @cindex @code{uavg@var{m}3_floor} instruction pattern
>  @item @samp{avg@var{m}3_floor}
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 3ac9d82aace322bd8ef108596e5583daa18c76e3..116965f4830cec8f60642ff011a86b6562e2c509 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -191,6 +191,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
>  DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
>  DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
>  
> +DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
> +			      sabd, uabd, binary)
> +
>  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
>  			      savg_floor, uavg_floor, binary)
>  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 6c064ff4993620067d38742a0bfe0a3efb511069..35b835a6ac56d72417dac8ddfd77a8a7e2475e65 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
>  OPTAB_D (extract_last_optab, "extract_last_$a")
>  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
>  
> +OPTAB_D (uabd_optab, "uabd$a3")
> +OPTAB_D (sabd_optab, "sabd$a3")
>  OPTAB_D (savg_floor_optab, "avg$a3_floor")
>  OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
>  OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index dc102c919352a0328cf86eabceb3a38c41a7e4fd..e2392113bff4065c909aefc760b4c48978b73a5a 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -782,6 +782,83 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
>      }
>  }
>  
> +/* Look for the following pattern
> +	X = x[i]
> +	Y = y[i]
> +	DIFF = X - Y
> +	DAD = ABS_EXPR<DIFF>
> +
> +   ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
> +   HALF_TYPE and UNPROM will be set should the statement be found to
> +   be a widened operation.
> +   DIFF_STMT will be set to the MINUS_EXPR
> +   statement that precedes the ABS_STMT unless vect_widened_op_tree
> +   succeeds.
> + */
> +static bool
> +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> +				tree *half_type,
> +				vect_unpromoted_value unprom[2],
> +				gassign **diff_stmt)
> +{
> +  if (!abs_stmt)
> +    return false;
> +
> +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> +     inside the loop (in case we are analyzing an outer-loop).  */
> +  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
> +  if (code != ABS_EXPR && code != ABSU_EXPR)
> +    return false;
> +
> +  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> +  tree abs_type = TREE_TYPE (abs_oprnd);
> +  if (!abs_oprnd)
> +    return false;
> +  if (!ANY_INTEGRAL_TYPE_P (abs_type)
> +      || TYPE_OVERFLOW_WRAPS (abs_type)
> +      || TYPE_UNSIGNED (abs_type))
> +    return false;
> +
> +  /* Peel off conversions from the ABS input.  This can involve sign
> +     changes (e.g. from an unsigned subtraction to a signed ABS input)
> +     or signed promotion, but it can't include unsigned promotion.
> +     (Note that ABS of an unsigned promotion should have been folded
> +     away before now anyway.)  */
> +  vect_unpromoted_value unprom_diff;
> +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> +						    &unprom_diff);
> +  if (!abs_oprnd)
> +    return false;
> +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> +      && TYPE_UNSIGNED (unprom_diff.type))
> +    return false;
> +
> +  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> +  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> +  if (!diff_stmt_vinfo)
> +    return false;
> +
> +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> +     inside the loop (in case we are analyzing an outer-loop).  */
> +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
> +			    MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
> +			    false, 2, unprom, half_type))
> +    return true;
> +
> +  /* Failed to find a widen operation so we check for a regular MINUS_EXPR.  */
> +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> +  if (diff_stmt && diff
> +      && gimple_assign_rhs_code (diff) == MINUS_EXPR
> +      && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
> +    {
> +      *diff_stmt = diff;
> +      *half_type = NULL_TREE;
> +      return true;
> +    }
> +
> +  return false;
> +}
> +
>  /* Convert UNPROM to TYPE and return the result, adding new statements
>     to STMT_INFO's pattern definition statements if no better way is
>     available.  VECTYPE is the vector form of TYPE.
> @@ -1320,41 +1397,28 @@ vect_recog_sad_pattern (vec_info *vinfo,
>    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
>       inside the loop (in case we are analyzing an outer-loop).  */
>    gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
> -  if (!abs_stmt
> -      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
> -	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
> -    return NULL;
> +  vect_unpromoted_value unprom[2];
>  
> -  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> -  tree abs_type = TREE_TYPE (abs_oprnd);
> -  if (TYPE_UNSIGNED (abs_type))
> -    return NULL;
> +  if (!abs_stmt)
> +    {
> +      gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
> +      if (!abd_stmt
> +	  || !gimple_call_internal_p (abd_stmt)
> +	  || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
> +	return NULL;
>  
> -  /* Peel off conversions from the ABS input.  This can involve sign
> -     changes (e.g. from an unsigned subtraction to a signed ABS input)
> -     or signed promotion, but it can't include unsigned promotion.
> -     (Note that ABS of an unsigned promotion should have been folded
> -     away before now anyway.)  */
> -  vect_unpromoted_value unprom_diff;
> -  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> -						    &unprom_diff);
> -  if (!abs_oprnd)
> -    return NULL;
> -  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> -      && TYPE_UNSIGNED (unprom_diff.type))
> -    return NULL;
> +      tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
> +      tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
>  
> -  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> -  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> -  if (!diff_stmt_vinfo)
> -    return NULL;
> +      if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, &unprom[0])
> +	  || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
> +						    &unprom[1]))
> +	return NULL;
>  
> -  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> -     inside the loop (in case we are analyzing an outer-loop).  */
> -  vect_unpromoted_value unprom[2];
> -  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
> -			     IFN_VEC_WIDEN_MINUS,
> -			     false, 2, unprom, &half_type))
> +      half_type = unprom[0].type;
> +    }
> +  else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> +					    unprom, NULL))
>      return NULL;
>  
>    vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
> @@ -1376,6 +1440,112 @@ vect_recog_sad_pattern (vec_info *vinfo,
>    return pattern_stmt;
>  }
>  
> +/* Function vect_recog_abd_pattern
> +
> +   Try to find the following ABsolute Difference (ABD) pattern:
> +
> +     VTYPE x, y, out;
> +     type diff;
> +   loop i in range:
> +     S1 diff = x[i] - y[i]
> +     S2 out[i] = ABS_EXPR <diff>;
> +
> +   where 'type' is a integer and 'VTYPE' is a vector of integers
> +   the same size as 'type'
> +
> +   Input:
> +
> +   * STMT_VINFO: The stmt from which the pattern search begins
> +
> +   Output:
> +
> +   * TYPE_out: The type of the output of this pattern
> +
> +   * Return value: A new stmt that will be used to replace the sequence of
> +     stmts that constitute the pattern; either SABD or UABD:
> +	SABD_EXPR<x, y, out>
> +	UABD_EXPR<x, y, out>
> + */
> +
> +static gimple *
> +vect_recog_abd_pattern (vec_info *vinfo,
> +			stmt_vec_info stmt_vinfo, tree *type_out)
> +{
> +  /* Look for the following patterns
> +	X = x[i]
> +	Y = y[i]
> +	DIFF = X - Y
> +	DAD = ABS_EXPR<DIFF>
> +	out[i] = DAD
> +
> +     In which
> +      - X, Y, DIFF, DAD all have the same type
> +      - x, y, out are all vectors of the same type
> +  */
> +
> +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> +  if (!last_stmt)
> +    return NULL;
> +
> +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> +
> +  vect_unpromoted_value unprom[2];
> +  gassign *diff_stmt;
> +  tree half_type;
> +  if (!vect_recog_absolute_difference (vinfo, last_stmt, &half_type,
> +				       unprom, &diff_stmt))
> +    return NULL;
> +
> +  tree abd_type = out_type, vectype;
> +  tree abd_oprnds[2];
> +  bool extend = false;
> +  if (half_type)
> +    {
> +      vectype = get_vectype_for_scalar_type (vinfo, half_type);
> +      abd_type = half_type;
> +      extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type);
> +    }
> +  else
> +    {
> +      unprom[0].op = gimple_assign_rhs1 (diff_stmt);
> +      unprom[1].op = gimple_assign_rhs2 (diff_stmt);
> +      tree signed_out = signed_type_for (out_type);
> +      vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> +    }
> +
> +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> +
> +  if (!vectype
> +      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> +					  OPTIMIZE_FOR_SPEED))
> +    return NULL;
> +
> +  vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> +		       TREE_TYPE (vectype), unprom, vectype);
> +
> +  *type_out = get_vectype_for_scalar_type (vinfo, out_type);
> +
> +  tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL);
> +  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> +						abd_oprnds[0], abd_oprnds[1]);
> +  gimple_call_set_lhs (abd_stmt, abd_result);
> +  gimple_set_location (abd_stmt, gimple_location (last_stmt));
> +
> +  if (!extend)
> +    return abd_stmt;
> +
> +  gimple *stmt = abd_stmt;
> +  if (!TYPE_UNSIGNED (abd_type))
> +    {
> +      tree unsign = unsigned_type_for (abd_type);
> +      tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
> +      stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
> +				  unsign_vectype);
> +    }
> +
> +  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype);
> +}
> +
>  /* Recognize an operation that performs ORIG_CODE on widened inputs,
>     so that it can be treated as though it had the form:
>  
> @@ -6471,6 +6641,7 @@ struct vect_recog_func
>  static vect_recog_func vect_vect_recog_func_ptrs[] = {
>    { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
>    { vect_recog_bit_insert_pattern, "bit_insert" },
> +  { vect_recog_abd_pattern, "abd" },
>    { vect_recog_over_widening_pattern, "over_widening" },
>    /* Must come after over_widening, which narrows the shift as much as
>       possible beforehand.  */

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2023-06-15  6:38 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-09 16:07 [PATCH] vect: Missed opportunity to use [SU]ABD Oluwatamilore Adebayo
2023-05-10  9:01 ` Richard Sandiford
2023-05-10  9:49   ` Richard Biener
2023-05-10  9:51     ` Richard Biener
2023-05-10 15:27       ` Richard Sandiford
2023-05-17 12:21         ` oluwatamilore.adebayo
2023-05-18  8:39           ` [PATCH 1/4] " Oluwatamilore.Adebayo
2023-05-18 17:59             ` Richard Sandiford
2023-05-22 13:32               ` Richard Biener
2023-05-23 14:27               ` [PATCH] vect: " Oluwatamilore Adebayo
2023-05-23 14:34                 ` [PATCH 1/2] " Oluwatamilore Adebayo
2023-05-24  9:48                   ` Richard Sandiford
2023-06-06  9:50                     ` [PATCH] vect: " Oluwatamilore Adebayo
2023-06-06  9:53                       ` [PATCH 1/2] " Oluwatamilore Adebayo
2023-06-06 12:56                         ` Richard Sandiford
2023-06-06 14:34                           ` Oluwatamilore Adebayo
2023-06-08 10:28                             ` [PATCH] vect: " Oluwatamilore Adebayo
2023-06-08 10:31                               ` [PATCH 1/2] " Oluwatamilore Adebayo
2023-06-13  8:26                                 ` Oluwatamilore Adebayo
2023-06-14 11:15                                   ` Richard Sandiford
2023-06-14 15:26                                     ` Oluwatamilore Adebayo
2023-06-15  6:38                                       ` Richard Sandiford
2023-05-10 13:29     ` [PATCH] vect: " Oluwatamilore Adebayo
2023-05-15 12:35       ` Oluwatamilore Adebayo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).