[PATCH 1/2] Mid engine setup [SU]ABDL

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH 1/2] Mid engine setup [SU]ABDL
@ 2023-06-26 15:34 Oluwatamilore Adebayo
  2023-06-26 20:50 ` Richard Sandiford
  0 siblings, 1 reply; 9+ messages in thread
From: Oluwatamilore Adebayo @ 2023-06-26 15:34 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.sandiford, richard.guenther, oluade01

From: oluade01 <oluwatamilore.adebayo@arm.com>

This updates vect_recog_abd_pattern to recognize the widening
variant of absolute difference (ABDL, ABDL2).

gcc/ChangeLog:

	* internal-fn.cc (widening_fn_p, decomposes_to_hilo_fn_p):
	Add IFN_VEC_WIDEN_ABD to the switch statement.
	* internal-fn.def (VEC_WIDEN_ABD): New internal hilo optab.
	* optabs.def (vec_widen_sabd_optab,
	vec_widen_sabd_hi_optab, vec_widen_sabd_lo_optab,
	vec_widen_sabd_odd_even, vec_widen_sabd_even_optab,
	vec_widen_uabd_optab,
	vec_widen_uabd_hi_optab, vec_widen_uabd_lo_optab,
	vec_widen_uabd_odd_even, vec_widen_uabd_even_optab):
	New optabs.
	* tree-vect-patterns.cc (vect_recog_abd_pattern): Update to
	to build a VEC_WIDEN_ABD call if the input precision is smaller
	than the precision of the output.
	(vect_recog_widen_abd_pattern): Should an ABD expression be
	found preceeding an extension, replace the two with a
	VEC_WIDEN_ABD.
---
 gcc/internal-fn.def       |   5 ++
 gcc/optabs.def            |  10 +++
 gcc/tree-vect-patterns.cc | 174 ++++++++++++++++++++++++++++++++------
 3 files changed, 161 insertions(+), 28 deletions(-)

diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 116965f4830cec8f60642ff011a86b6562e2c509..d67274d68b49943a88c531e903fd03b42343ab97 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -352,6 +352,11 @@ DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_MINUS,
 				first,
 				vec_widen_ssub, vec_widen_usub,
 				binary)
+DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_ABD,
+				ECF_CONST | ECF_NOTHROW,
+				first,
+				vec_widen_sabd, vec_widen_uabd,
+				binary)
 DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub, ternary)
 DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary)
 
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 35b835a6ac56d72417dac8ddfd77a8a7e2475e65..68dfa1550f791a2fe833012157601ecfa68f1e09 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -418,6 +418,11 @@ OPTAB_D (vec_widen_sadd_hi_optab, "vec_widen_sadd_hi_$a")
 OPTAB_D (vec_widen_sadd_lo_optab, "vec_widen_sadd_lo_$a")
 OPTAB_D (vec_widen_sadd_odd_optab, "vec_widen_sadd_odd_$a")
 OPTAB_D (vec_widen_sadd_even_optab, "vec_widen_sadd_even_$a")
+OPTAB_D (vec_widen_sabd_optab, "vec_widen_sabd_$a")
+OPTAB_D (vec_widen_sabd_hi_optab, "vec_widen_sabd_hi_$a")
+OPTAB_D (vec_widen_sabd_lo_optab, "vec_widen_sabd_lo_$a")
+OPTAB_D (vec_widen_sabd_odd_optab, "vec_widen_sabd_odd_$a")
+OPTAB_D (vec_widen_sabd_even_optab, "vec_widen_sabd_even_$a")
 OPTAB_D (vec_widen_sshiftl_hi_optab, "vec_widen_sshiftl_hi_$a")
 OPTAB_D (vec_widen_sshiftl_lo_optab, "vec_widen_sshiftl_lo_$a")
 OPTAB_D (vec_widen_umult_even_optab, "vec_widen_umult_even_$a")
@@ -436,6 +441,11 @@ OPTAB_D (vec_widen_uadd_hi_optab, "vec_widen_uadd_hi_$a")
 OPTAB_D (vec_widen_uadd_lo_optab, "vec_widen_uadd_lo_$a")
 OPTAB_D (vec_widen_uadd_odd_optab, "vec_widen_uadd_odd_$a")
 OPTAB_D (vec_widen_uadd_even_optab, "vec_widen_uadd_even_$a")
+OPTAB_D (vec_widen_uabd_optab, "vec_widen_uabd_$a")
+OPTAB_D (vec_widen_uabd_hi_optab, "vec_widen_uabd_hi_$a")
+OPTAB_D (vec_widen_uabd_lo_optab, "vec_widen_uabd_lo_$a")
+OPTAB_D (vec_widen_uabd_odd_optab, "vec_widen_uabd_odd_$a")
+OPTAB_D (vec_widen_uabd_even_optab, "vec_widen_uabd_even_$a")
 OPTAB_D (vec_addsub_optab, "vec_addsub$a3")
 OPTAB_D (vec_fmaddsub_optab, "vec_fmaddsub$a4")
 OPTAB_D (vec_fmsubadd_optab, "vec_fmsubadd$a4")
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index e2392113bff4065c909aefc760b4c48978b73a5a..852c4e99edb19d215c354b666b991c87c48620b4 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -1404,15 +1404,28 @@ vect_recog_sad_pattern (vec_info *vinfo,
       gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
       if (!abd_stmt
 	  || !gimple_call_internal_p (abd_stmt)
-	  || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
+	  || gimple_call_num_args (abd_stmt) != 2)
 	return NULL;
 
       tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
       tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
 
-      if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, &unprom[0])
-	  || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
-						    &unprom[1]))
+      if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
+	{
+	  if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
+						     &unprom[0])
+	      || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
+							&unprom[1]))
+	    return NULL;
+	}
+      else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
+	{
+	  unprom[0].op = abd_oprnd0;
+	  unprom[0].type = TREE_TYPE (abd_oprnd0);
+	  unprom[1].op = abd_oprnd1;
+	  unprom[1].type = TREE_TYPE (abd_oprnd1);
+	}
+      else
 	return NULL;
 
       half_type = unprom[0].type;
@@ -1442,16 +1455,25 @@ vect_recog_sad_pattern (vec_info *vinfo,
 
 /* Function vect_recog_abd_pattern
 
-   Try to find the following ABsolute Difference (ABD) pattern:
+   Try to find the following ABsolute Difference (ABD) or
+   ABsolute Difference Long (ABDL) pattern:
 
-     VTYPE x, y, out;
+     VTYPE x, y;
+     WTYPE out;
      type diff;
    loop i in range:
      S1 diff = x[i] - y[i]
      S2 out[i] = ABS_EXPR <diff>;
 
-   where 'type' is a integer and 'VTYPE' is a vector of integers
-   the same size as 'type'
+   where 'VTYPE' and 'WTYPE' are vectors of integers.
+	 'WTYPE' may be wider than 'VTYPE'.
+	 'type' is as wide as 'WTYPE'.
+
+   In the case of ABD, should the precision of 'WTYPE' be wider than
+   'VTYPE' or narrower, conversion operations will succeed the ABD or
+   ABDL expression.
+   In the case of ABDL, the same occurs if the precision of 'WTYPE' is
+   more than twice as wide or narrower.
 
    Input:
 
@@ -1459,12 +1481,14 @@ vect_recog_sad_pattern (vec_info *vinfo,
 
    Output:
 
-   * TYPE_out: The type of the output of this pattern
+   * TYPE_OUT: The type of the output of this pattern
 
    * Return value: A new stmt that will be used to replace the sequence of
-     stmts that constitute the pattern; either SABD or UABD:
+     stmts that constitute the pattern; either SABD, UABD, SABDL or UABDL:
 	SABD_EXPR<x, y, out>
 	UABD_EXPR<x, y, out>
+	SABDL_EXPR<x, y, out>
+	UABDL_EXPR<x, y, out>
  */
 
 static gimple *
@@ -1479,8 +1503,9 @@ vect_recog_abd_pattern (vec_info *vinfo,
 	out[i] = DAD
 
      In which
-      - X, Y, DIFF, DAD all have the same type
-      - x, y, out are all vectors of the same type
+      - X, Y have the same type
+      - x, y are all vectors of the same type
+      - DIFF, DAD, out may be wider than X, Y
   */
 
   gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
@@ -1496,37 +1521,74 @@ vect_recog_abd_pattern (vec_info *vinfo,
 				       unprom, &diff_stmt))
     return NULL;
 
-  tree abd_type = out_type, vectype;
-  tree abd_oprnds[2];
+  internal_fn ifn = IFN_ABD;
+  tree abd_in_type, abd_out_type;
   bool extend = false;
+
   if (half_type)
     {
-      vectype = get_vectype_for_scalar_type (vinfo, half_type);
-      abd_type = half_type;
-      extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type);
+      abd_in_type = half_type;
+      abd_out_type = abd_in_type;
+
+      if (stmt_vinfo->min_output_precision == TYPE_PRECISION (abd_out_type))
+	extend = TYPE_PRECISION (abd_in_type) < TYPE_PRECISION (out_type);
+      else
+	{
+	  ifn = IFN_VEC_WIDEN_ABD;
+	  abd_out_type
+	    = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
+					      TYPE_UNSIGNED (abd_in_type));
+	  extend = TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type);
+	}
     }
   else
     {
       unprom[0].op = gimple_assign_rhs1 (diff_stmt);
       unprom[1].op = gimple_assign_rhs2 (diff_stmt);
-      tree signed_out = signed_type_for (out_type);
-      vectype = get_vectype_for_scalar_type (vinfo, signed_out);
+      abd_in_type = signed_type_for (out_type);
+      abd_out_type = abd_in_type;
     }
 
-  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+  tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
+  tree vectype_out = get_vectype_for_scalar_type (vinfo, abd_out_type);
+  if (!vectype_in || !vectype_out)
+    return NULL;
 
-  if (!vectype
-      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
+  if (ifn == IFN_VEC_WIDEN_ABD)
+    {
+      code_helper dummy_code;
+      int dummy_int;
+      auto_vec<tree> dummy_vec;
+      if (!supportable_widening_operation (vinfo, ifn, stmt_vinfo,
+					   vectype_out, vectype_in,
+					   &dummy_code, &dummy_code,
+					   &dummy_int, &dummy_vec))
+	{
+	  /* There are architectures that have the ABD instructions
+	     but not the ABDL instructions.  If we just return NULL here
+	     we will miss an occasion where we should have used ABD.
+	     So we change back to ABD and try again.  */
+	  ifn = IFN_ABD;
+	  abd_out_type = abd_in_type;
+	  extend = true;
+	}
+    }
+
+  if (ifn == IFN_ABD
+      && !direct_internal_fn_supported_p (ifn, vectype_in,
 					  OPTIMIZE_FOR_SPEED))
     return NULL;
 
+  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+
+  tree abd_oprnds[2];
   vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
-		       TREE_TYPE (vectype), unprom, vectype);
+		       abd_in_type, unprom, vectype_in);
 
   *type_out = get_vectype_for_scalar_type (vinfo, out_type);
 
-  tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL);
-  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
+  tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
+  gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
 						abd_oprnds[0], abd_oprnds[1]);
   gimple_call_set_lhs (abd_stmt, abd_result);
   gimple_set_location (abd_stmt, gimple_location (last_stmt));
@@ -1535,15 +1597,15 @@ vect_recog_abd_pattern (vec_info *vinfo,
     return abd_stmt;
 
   gimple *stmt = abd_stmt;
-  if (!TYPE_UNSIGNED (abd_type))
+  if (!TYPE_UNSIGNED (abd_out_type))
     {
-      tree unsign = unsigned_type_for (abd_type);
+      tree unsign = unsigned_type_for (abd_out_type);
       tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
       stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
 				  unsign_vectype);
     }
 
-  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype);
+  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
 }
 
 /* Recognize an operation that performs ORIG_CODE on widened inputs,
@@ -1703,6 +1765,61 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
 				      &subtype);
 }
 
+/* Try to detect abd on widened inputs, converting IFN_ABD
+   to IFN_VEC_WIDEN_ABD.  */
+static gimple *
+vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
+			      tree *type_out)
+{
+  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
+  if (!last_stmt || !gimple_assign_cast_p (last_stmt))
+    return NULL;
+
+  tree last_rhs = gimple_assign_rhs1 (last_stmt);
+  stmt_vec_info abs_vinfo = vect_get_internal_def (vinfo, last_rhs);
+  if (!abs_vinfo)
+    return NULL;
+
+  stmt_vec_info abd_pattern_vinfo = STMT_VINFO_RELATED_STMT (abs_vinfo);
+  if (!abd_pattern_vinfo)
+    return NULL;
+
+  gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
+  if (!abd_stmt || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
+    return NULL;
+
+  tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
+  tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
+
+  tree in_type = TREE_TYPE (abd_oprnd0);
+  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
+  if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type))
+    return NULL;
+
+  tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
+  tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
+
+  code_helper dummy_code;
+  int dummy_int;
+  auto_vec<tree> dummy_vec;
+  if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
+				       vectype_out, vectype_in,
+				       &dummy_code, &dummy_code,
+				       &dummy_int, &dummy_vec))
+    return NULL;
+
+  vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
+
+  *type_out = vectype_out;
+
+  tree abdl_result = vect_recog_temp_ssa_var (out_type, NULL);
+  gcall *abdl_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
+						 abd_oprnd0, abd_oprnd1);
+  gimple_call_set_lhs (abdl_stmt, abdl_result);
+  gimple_set_location (abdl_stmt, gimple_location (last_stmt));
+  return abdl_stmt;
+}
+
 /* Function vect_recog_ctz_ffs_pattern
 
    Try to find the following pattern:
@@ -6670,6 +6787,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_mask_conversion_pattern, "mask_conversion" },
   { vect_recog_widen_plus_pattern, "widen_plus" },
   { vect_recog_widen_minus_pattern, "widen_minus" },
+  { vect_recog_widen_abd_pattern, "widen_abd" },
   /* These must come after the double widening ones.  */
 };
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] Mid engine setup [SU]ABDL
  2023-06-26 15:34 [PATCH 1/2] Mid engine setup [SU]ABDL Oluwatamilore Adebayo
@ 2023-06-26 20:50 ` Richard Sandiford
  2023-06-27  7:46   ` Richard Sandiford
  0 siblings, 1 reply; 9+ messages in thread
From: Richard Sandiford @ 2023-06-26 20:50 UTC (permalink / raw)
  To: Oluwatamilore Adebayo; +Cc: gcc-patches, richard.guenther

Thanks for doing this.  Generally looks good, but some comments below.

Oluwatamilore Adebayo <oluwatamilore.adebayo@arm.com> writes:
> From: oluade01 <oluwatamilore.adebayo@arm.com>
>
> This updates vect_recog_abd_pattern to recognize the widening
> variant of absolute difference (ABDL, ABDL2).
>
> gcc/ChangeLog:
>
> 	* internal-fn.cc (widening_fn_p, decomposes_to_hilo_fn_p):
> 	Add IFN_VEC_WIDEN_ABD to the switch statement.
> 	* internal-fn.def (VEC_WIDEN_ABD): New internal hilo optab.
> 	* optabs.def (vec_widen_sabd_optab,
> 	vec_widen_sabd_hi_optab, vec_widen_sabd_lo_optab,
> 	vec_widen_sabd_odd_even, vec_widen_sabd_even_optab,
> 	vec_widen_uabd_optab,
> 	vec_widen_uabd_hi_optab, vec_widen_uabd_lo_optab,
> 	vec_widen_uabd_odd_even, vec_widen_uabd_even_optab):
> 	New optabs.
> 	* tree-vect-patterns.cc (vect_recog_abd_pattern): Update to
> 	to build a VEC_WIDEN_ABD call if the input precision is smaller
> 	than the precision of the output.
> 	(vect_recog_widen_abd_pattern): Should an ABD expression be
> 	found preceeding an extension, replace the two with a
> 	VEC_WIDEN_ABD.
> ---
>  gcc/internal-fn.def       |   5 ++
>  gcc/optabs.def            |  10 +++
>  gcc/tree-vect-patterns.cc | 174 ++++++++++++++++++++++++++++++++------
>  3 files changed, 161 insertions(+), 28 deletions(-)
>
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 116965f4830cec8f60642ff011a86b6562e2c509..d67274d68b49943a88c531e903fd03b42343ab97 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -352,6 +352,11 @@ DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_MINUS,
>  				first,
>  				vec_widen_ssub, vec_widen_usub,
>  				binary)
> +DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_ABD,
> +				ECF_CONST | ECF_NOTHROW,
> +				first,
> +				vec_widen_sabd, vec_widen_uabd,
> +				binary)
>  DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub, ternary)
>  DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary)
>  
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 35b835a6ac56d72417dac8ddfd77a8a7e2475e65..68dfa1550f791a2fe833012157601ecfa68f1e09 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -418,6 +418,11 @@ OPTAB_D (vec_widen_sadd_hi_optab, "vec_widen_sadd_hi_$a")
>  OPTAB_D (vec_widen_sadd_lo_optab, "vec_widen_sadd_lo_$a")
>  OPTAB_D (vec_widen_sadd_odd_optab, "vec_widen_sadd_odd_$a")
>  OPTAB_D (vec_widen_sadd_even_optab, "vec_widen_sadd_even_$a")
> +OPTAB_D (vec_widen_sabd_optab, "vec_widen_sabd_$a")
> +OPTAB_D (vec_widen_sabd_hi_optab, "vec_widen_sabd_hi_$a")
> +OPTAB_D (vec_widen_sabd_lo_optab, "vec_widen_sabd_lo_$a")
> +OPTAB_D (vec_widen_sabd_odd_optab, "vec_widen_sabd_odd_$a")
> +OPTAB_D (vec_widen_sabd_even_optab, "vec_widen_sabd_even_$a")
>  OPTAB_D (vec_widen_sshiftl_hi_optab, "vec_widen_sshiftl_hi_$a")
>  OPTAB_D (vec_widen_sshiftl_lo_optab, "vec_widen_sshiftl_lo_$a")
>  OPTAB_D (vec_widen_umult_even_optab, "vec_widen_umult_even_$a")
> @@ -436,6 +441,11 @@ OPTAB_D (vec_widen_uadd_hi_optab, "vec_widen_uadd_hi_$a")
>  OPTAB_D (vec_widen_uadd_lo_optab, "vec_widen_uadd_lo_$a")
>  OPTAB_D (vec_widen_uadd_odd_optab, "vec_widen_uadd_odd_$a")
>  OPTAB_D (vec_widen_uadd_even_optab, "vec_widen_uadd_even_$a")
> +OPTAB_D (vec_widen_uabd_optab, "vec_widen_uabd_$a")
> +OPTAB_D (vec_widen_uabd_hi_optab, "vec_widen_uabd_hi_$a")
> +OPTAB_D (vec_widen_uabd_lo_optab, "vec_widen_uabd_lo_$a")
> +OPTAB_D (vec_widen_uabd_odd_optab, "vec_widen_uabd_odd_$a")
> +OPTAB_D (vec_widen_uabd_even_optab, "vec_widen_uabd_even_$a")
>  OPTAB_D (vec_addsub_optab, "vec_addsub$a3")
>  OPTAB_D (vec_fmaddsub_optab, "vec_fmaddsub$a4")
>  OPTAB_D (vec_fmsubadd_optab, "vec_fmsubadd$a4")

The new optabs need to be documented in doc/md.texi.

> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index e2392113bff4065c909aefc760b4c48978b73a5a..852c4e99edb19d215c354b666b991c87c48620b4 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -1404,15 +1404,28 @@ vect_recog_sad_pattern (vec_info *vinfo,
>        gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
>        if (!abd_stmt
>  	  || !gimple_call_internal_p (abd_stmt)
> -	  || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
> +	  || gimple_call_num_args (abd_stmt) != 2)
>  	return NULL;
>  
>        tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
>        tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
>  
> -      if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, &unprom[0])
> -	  || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
> -						    &unprom[1]))
> +      if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
> +	{
> +	  if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
> +						     &unprom[0])
> +	      || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
> +							&unprom[1]))
> +	    return NULL;
> +	}
> +      else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
> +	{
> +	  unprom[0].op = abd_oprnd0;
> +	  unprom[0].type = TREE_TYPE (abd_oprnd0);
> +	  unprom[1].op = abd_oprnd1;
> +	  unprom[1].type = TREE_TYPE (abd_oprnd1);
> +	}
> +      else
>  	return NULL;
>  
>        half_type = unprom[0].type;
> @@ -1442,16 +1455,25 @@ vect_recog_sad_pattern (vec_info *vinfo,
>  
>  /* Function vect_recog_abd_pattern
>  
> -   Try to find the following ABsolute Difference (ABD) pattern:
> +   Try to find the following ABsolute Difference (ABD) or
> +   ABsolute Difference Long (ABDL) pattern:

“Long” is a bit of an architecture-specific term.  Maybe just:

   Try to find the following ABsolute Difference (ABD) or
   widening ABD (WIDEN_ABD) pattern:
  
> -     VTYPE x, y, out;
> +     VTYPE x, y;
> +     WTYPE out;
>       type diff;
>     loop i in range:
>       S1 diff = x[i] - y[i]
>       S2 out[i] = ABS_EXPR <diff>;
>  
> -   where 'type' is a integer and 'VTYPE' is a vector of integers
> -   the same size as 'type'
> +   where 'VTYPE' and 'WTYPE' are vectors of integers.
> +	 'WTYPE' may be wider than 'VTYPE'.
> +	 'type' is as wide as 'WTYPE'.

I don't think the existing comment is right about the types.  What we're
matching is scalar code, so VTYPE and (now) WTYPE are integers rather
than vectors of integers.

I think it would be clearer to write:

       S1 diff = (type) x[i] - (type) y[i]
       S2 out[i] = ABS_EXPR <(WTYPE) diff>;

since the promotions happen on the operands.

It'd be good to keep the part about 'type' being an integer.

Rather than:

	 'WTYPE' may be wider than 'VTYPE'.
	 'type' is as wide as 'WTYPE'.

maybe:

	 'type' is no narrower than 'VTYPE' (but may be wider)
	 'WTYPE' is no narrower than 'type' (but may be wider)

> +
> +   In the case of ABD, should the precision of 'WTYPE' be wider than
> +   'VTYPE' or narrower, conversion operations will succeed the ABD or
> +   ABDL expression.
> +   In the case of ABDL, the same occurs if the precision of 'WTYPE' is
> +   more than twice as wide or narrower.

WTYPE can't be narrower than VTYPE though.  I think with the changes
suggested above, the text before this block describes the conditions
in enough detail, and so we can just say:

   WIDEN_ABD exists to optimize the case where WTYPE is at least twice as
   wide as VTYPE.

> @@ -1459,12 +1481,14 @@ vect_recog_sad_pattern (vec_info *vinfo,
>  
>     Output:
>  
> -   * TYPE_out: The type of the output of this pattern
> +   * TYPE_OUT: The type of the output of this pattern
>  
>     * Return value: A new stmt that will be used to replace the sequence of
> -     stmts that constitute the pattern; either SABD or UABD:
> +     stmts that constitute the pattern; either SABD, UABD, SABDL or UABDL:
>  	SABD_EXPR<x, y, out>
>  	UABD_EXPR<x, y, out>
> +	SABDL_EXPR<x, y, out>
> +	UABDL_EXPR<x, y, out>

SABD_EXPR/UABD_EXPR should be IFN_ABD
SABDL_EXPR/UABDL_EXPR should be IFN_WIDEN_ABD

>   */
>  
>  static gimple *
> @@ -1479,8 +1503,9 @@ vect_recog_abd_pattern (vec_info *vinfo,
>  	out[i] = DAD
>  
>       In which
> -      - X, Y, DIFF, DAD all have the same type
> -      - x, y, out are all vectors of the same type
> +      - X, Y have the same type
> +      - x, y are all vectors of the same type
> +      - DIFF, DAD, out may be wider than X, Y
>    */

Maybe it would be easier to remove this comment, since I think the
comment above the function says enough.

> @@ -1496,37 +1521,74 @@ vect_recog_abd_pattern (vec_info *vinfo,
>  				       unprom, &diff_stmt))
>      return NULL;
>  
> -  tree abd_type = out_type, vectype;
> -  tree abd_oprnds[2];
> +  internal_fn ifn = IFN_ABD;
> +  tree abd_in_type, abd_out_type;
>    bool extend = false;
> +
>    if (half_type)
>      {
> -      vectype = get_vectype_for_scalar_type (vinfo, half_type);
> -      abd_type = half_type;
> -      extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type);
> +      abd_in_type = half_type;
> +      abd_out_type = abd_in_type;
> +
> +      if (stmt_vinfo->min_output_precision == TYPE_PRECISION (abd_out_type))
> +	extend = TYPE_PRECISION (abd_in_type) < TYPE_PRECISION (out_type);
> +      else
> +	{
> +	  ifn = IFN_VEC_WIDEN_ABD;
> +	  abd_out_type
> +	    = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
> +					      TYPE_UNSIGNED (abd_in_type));
> +	  extend = TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type);
> +	}
>      }
>    else
>      {
>        unprom[0].op = gimple_assign_rhs1 (diff_stmt);
>        unprom[1].op = gimple_assign_rhs2 (diff_stmt);
> -      tree signed_out = signed_type_for (out_type);
> -      vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> +      abd_in_type = signed_type_for (out_type);
> +      abd_out_type = abd_in_type;
>      }

Rather than have the "extend" variable, how about:

>  
> -  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> +  tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
> +  tree vectype_out = get_vectype_for_scalar_type (vinfo, abd_out_type);
> +  if (!vectype_in || !vectype_out)
> +    return NULL;
>  
> -  if (!vectype
> -      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> +  if (ifn == IFN_VEC_WIDEN_ABD)
> +    {
> +      code_helper dummy_code;
> +      int dummy_int;
> +      auto_vec<tree> dummy_vec;
> +      if (!supportable_widening_operation (vinfo, ifn, stmt_vinfo,
> +					   vectype_out, vectype_in,
> +					   &dummy_code, &dummy_code,
> +					   &dummy_int, &dummy_vec))
> +	{
> +	  /* There are architectures that have the ABD instructions
> +	     but not the ABDL instructions.  If we just return NULL here
> +	     we will miss an occasion where we should have used ABD.
> +	     So we change back to ABD and try again.  */
> +	  ifn = IFN_ABD;
> +	  abd_out_type = abd_in_type;
> +	  extend = true;
> +	}
> +    }

making this:

  if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2)
    {
      tree mid_type
	= build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
					  TYPE_UNSIGNED (abd_in_type));
      tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_vectype);
      code_helper dummy_code;
      int dummy_int;
      auto_vec<tree> dummy_vec;
      if (mid_vectype
	  && supportable_widening_operation (vinfo, IFN_WIDEN_ABD, stmt_vinfo,
					     mid_vectype, vectype_in,
					     &dummy_code, &dummy_code,
					     &dummy_int, &dummy_vec))
	{
	  ifn = IFN_WIDEN_ABD;
	  abd_out_type = mid_type;
	  vectype_out = mid_vectype;
	}
    }

The idea is to keep the assumption that we're using IFN_ABD
until we've proven conclusively otherwise.

I think the later:

  if (!extend)
    return abd_stmt;

should then be deleted, since we should still use vect_convert_output
if abd_out_type has a different sign from out_type.

> +
> +  if (ifn == IFN_ABD
> +      && !direct_internal_fn_supported_p (ifn, vectype_in,
>  					  OPTIMIZE_FOR_SPEED))
>      return NULL;
>  
> +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> +
> +  tree abd_oprnds[2];
>    vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> -		       TREE_TYPE (vectype), unprom, vectype);
> +		       abd_in_type, unprom, vectype_in);
>  
>    *type_out = get_vectype_for_scalar_type (vinfo, out_type);
>  
> -  tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL);
> -  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> +  tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
> +  gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
>  						abd_oprnds[0], abd_oprnds[1]);
>    gimple_call_set_lhs (abd_stmt, abd_result);
>    gimple_set_location (abd_stmt, gimple_location (last_stmt));
> @@ -1535,15 +1597,15 @@ vect_recog_abd_pattern (vec_info *vinfo,
>      return abd_stmt;
>  
>    gimple *stmt = abd_stmt;
> -  if (!TYPE_UNSIGNED (abd_type))
> +  if (!TYPE_UNSIGNED (abd_out_type))

And this condition would then be:

  if (TYPE_PRECISION (abd_out_type) == TYPE_PRECISION (abd_in_type)
      && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
      && !TYPE_UNSIGNED (abd_out_type))

since:

(a) we don't need to force the type to unsigned if the final
    vect_convert_output is just a sign change

(b) we don't need to force the type to unsigned if abd_out_type
    is large enough to hold the results without loss of precision
    (or loss of sign correctness)

>      {
> -      tree unsign = unsigned_type_for (abd_type);
> +      tree unsign = unsigned_type_for (abd_out_type);
>        tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
>        stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
>  				  unsign_vectype);
>      }
>  
> -  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype);
> +  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
>  }
>  
>  /* Recognize an operation that performs ORIG_CODE on widened inputs,
> @@ -1703,6 +1765,61 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
>  				      &subtype);
>  }
>  
> +/* Try to detect abd on widened inputs, converting IFN_ABD
> +   to IFN_VEC_WIDEN_ABD.  */
> +static gimple *
> +vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
> +			      tree *type_out)
> +{
> +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> +  if (!last_stmt || !gimple_assign_cast_p (last_stmt))
> +    return NULL;
> +
> +  tree last_rhs = gimple_assign_rhs1 (last_stmt);
> +  stmt_vec_info abs_vinfo = vect_get_internal_def (vinfo, last_rhs);
> +  if (!abs_vinfo)
> +    return NULL;
> +
> +  stmt_vec_info abd_pattern_vinfo = STMT_VINFO_RELATED_STMT (abs_vinfo);
> +  if (!abd_pattern_vinfo)
> +    return NULL;
> +
> +  gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
> +  if (!abd_stmt || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
> +    return NULL;
> +
> +  tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
> +  tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
> +
> +  tree in_type = TREE_TYPE (abd_oprnd0);
> +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> +  if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type))
> +    return NULL;

I think it'd be more natural to test this on TREE_TYPE (last_rhs)
rather than TREE_TYPE (abd_oprnd0), and do it after the assignment
to last_rhs above.

> +
> +  tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
> +  tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
> +
> +  code_helper dummy_code;
> +  int dummy_int;
> +  auto_vec<tree> dummy_vec;
> +  if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
> +				       vectype_out, vectype_in,
> +				       &dummy_code, &dummy_code,
> +				       &dummy_int, &dummy_vec))
> +    return NULL;
> +
> +  vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
> +
> +  *type_out = vectype_out;
> +
> +  tree abdl_result = vect_recog_temp_ssa_var (out_type, NULL);
> +  gcall *abdl_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
> +						 abd_oprnd0, abd_oprnd1);
> +  gimple_call_set_lhs (abdl_stmt, abdl_result);
> +  gimple_set_location (abdl_stmt, gimple_location (last_stmt));
> +  return abdl_stmt;

I think “widen_abd” would be better than “abdl” here.

Richard

> +}
> +
>  /* Function vect_recog_ctz_ffs_pattern
>  
>     Try to find the following pattern:
> @@ -6670,6 +6787,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
>    { vect_recog_mask_conversion_pattern, "mask_conversion" },
>    { vect_recog_widen_plus_pattern, "widen_plus" },
>    { vect_recog_widen_minus_pattern, "widen_minus" },
> +  { vect_recog_widen_abd_pattern, "widen_abd" },
>    /* These must come after the double widening ones.  */
>  };

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] Mid engine setup [SU]ABDL
  2023-06-26 20:50 ` Richard Sandiford
@ 2023-06-27  7:46   ` Richard Sandiford
  2023-06-28 15:07     ` Oluwatamilore Adebayo
  0 siblings, 1 reply; 9+ messages in thread
From: Richard Sandiford @ 2023-06-27  7:46 UTC (permalink / raw)
  To: Oluwatamilore Adebayo; +Cc: gcc-patches, richard.guenther

Richard Sandiford <richard.sandiford@arm.com> writes:
>> -     VTYPE x, y, out;
>> +     VTYPE x, y;
>> +     WTYPE out;
>>       type diff;
>>     loop i in range:
>>       S1 diff = x[i] - y[i]
>>       S2 out[i] = ABS_EXPR <diff>;
>>  
>> -   where 'type' is a integer and 'VTYPE' is a vector of integers
>> -   the same size as 'type'
>> +   where 'VTYPE' and 'WTYPE' are vectors of integers.
>> +	 'WTYPE' may be wider than 'VTYPE'.
>> +	 'type' is as wide as 'WTYPE'.
>
> I don't think the existing comment is right about the types.  What we're
> matching is scalar code, so VTYPE and (now) WTYPE are integers rather
> than vectors of integers.

Gah, sorry, I realise now that the point was that VTYPE and WTYPE
are sequences rather than scalars.  But patterns are used for SLP
as well as loops, and the inputs and outputs might not be memory
objects.  So:

> I think it would be clearer to write:
>
>        S1 diff = (type) x[i] - (type) y[i]
>        S2 out[i] = ABS_EXPR <(WTYPE) diff>;
>
> since the promotions happen on the operands.
>
> It'd be good to keep the part about 'type' being an integer.
>
> Rather than:
>
> 	 'WTYPE' may be wider than 'VTYPE'.
> 	 'type' is as wide as 'WTYPE'.
>
> maybe:
>
> 	 'type' is no narrower than 'VTYPE' (but may be wider)
> 	 'WTYPE' is no narrower than 'type' (but may be wider)

...how about:

  TYPE1 x;
  TYPE2 y;
  TYPE3 x_cast = (TYPE3) x;              // widening or no-op
  TYPE3 y_cast = (TYPE3) y;              // widening or no-op
  TYPE3 diff = x_cast - y_cast;
  TYPE4 diff_cast = (TYPE4) diff;        // widening or no-op
  TYPE5 abs = ABS(U)_EXPR <diff_cast>;

(based on the comment above vect_recog_widen_op_pattern).

Thanks,
Richard

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] Mid engine setup [SU]ABDL
  2023-06-27  7:46   ` Richard Sandiford
@ 2023-06-28 15:07     ` Oluwatamilore Adebayo
  2023-06-28 15:09       ` Oluwatamilore Adebayo
  0 siblings, 1 reply; 9+ messages in thread
From: Oluwatamilore Adebayo @ 2023-06-28 15:07 UTC (permalink / raw)
  To: richard.sandiford; +Cc: gcc-patches, oluwatamilore.adebayo, richard.guenther

> The new optabs need to be documented in doc/md.texi.

Done.

> “Long” is a bit of an architecture-specific term.  Maybe just:
> 
>    Try to find the following ABsolute Difference (ABD) or
>    widening ABD (WIDEN_ABD) pattern:

Change made.

> >> -     VTYPE x, y, out;
> >> +     VTYPE x, y;
> >> +     WTYPE out;
> >>       type diff;
> >>     loop i in range:
> >>       S1 diff = x[i] - y[i]
> >>       S2 out[i] = ABS_EXPR <diff>;
> >>  
> >> -   where 'type' is a integer and 'VTYPE' is a vector of integers
> >> -   the same size as 'type'
> >> +   where 'VTYPE' and 'WTYPE' are vectors of integers.
> >> +	 'WTYPE' may be wider than 'VTYPE'.
> >> +	 'type' is as wide as 'WTYPE'.
> >
> > I don't think the existing comment is right about the types.  What we're
> > matching is scalar code, so VTYPE and (now) WTYPE are integers rather
> > than vectors of integers.
> 
> Gah, sorry, I realise now that the point was that VTYPE and WTYPE
> are sequences rather than scalars.  But patterns are used for SLP
> as well as loops, and the inputs and outputs might not be memory
> objects.  So:
> 
> > I think it would be clearer to write:
> >
> >        S1 diff = (type) x[i] - (type) y[i]
> >        S2 out[i] = ABS_EXPR <(WTYPE) diff>;
> >
> > since the promotions happen on the operands.
> >
> > It'd be good to keep the part about 'type' being an integer.
> >
> > Rather than:
> >
> > 	 'WTYPE' may be wider than 'VTYPE'.
> > 	 'type' is as wide as 'WTYPE'.
> >
> > maybe:
> >
> > 	 'type' is no narrower than 'VTYPE' (but may be wider)
> > 	 'WTYPE' is no narrower than 'type' (but may be wider)
> 
> ...how about:
> 
>   TYPE1 x;
>   TYPE2 y;
>   TYPE3 x_cast = (TYPE3) x;              // widening or no-op
>   TYPE3 y_cast = (TYPE3) y;              // widening or no-op
>   TYPE3 diff = x_cast - y_cast;
>   TYPE4 diff_cast = (TYPE4) diff;        // widening or no-op
>   TYPE5 abs = ABS(U)_EXPR <diff_cast>;
> 
> (based on the comment above vect_recog_widen_op_pattern).

Done.

> WTYPE can't be narrower than VTYPE though.  I think with the changes
> suggested above, the text before this block describes the conditions
> in enough detail, and so we can just say:
> 
>    WIDEN_ABD exists to optimize the case where WTYPE is at least twice as
>    wide as VTYPE.

Change made.

> SABD_EXPR/UABD_EXPR should be IFN_ABD
> SABDL_EXPR/UABDL_EXPR should be IFN_WIDEN_ABD

Change made.

> Maybe it would be easier to remove this comment, since I think the
> comment above the function says enough.

Done.

> Rather than have the "extend" variable, how about:
> 
> >  
> > -  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> > +  tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
> > +  tree vectype_out = get_vectype_for_scalar_type (vinfo, abd_out_type);
> > +  if (!vectype_in || !vectype_out)
> > +    return NULL;
> >  
> > -  if (!vectype
> > -      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> > +  if (ifn == IFN_VEC_WIDEN_ABD)
> > +    {
> > +      code_helper dummy_code;
> > +      int dummy_int;
> > +      auto_vec<tree> dummy_vec;
> > +      if (!supportable_widening_operation (vinfo, ifn, stmt_vinfo,
> > +					   vectype_out, vectype_in,
> > +					   &dummy_code, &dummy_code,
> > +					   &dummy_int, &dummy_vec))
> > +	{
> > +	  /* There are architectures that have the ABD instructions
> > +	     but not the ABDL instructions.  If we just return NULL here
> > +	     we will miss an occasion where we should have used ABD.
> > +	     So we change back to ABD and try again.  */
> > +	  ifn = IFN_ABD;
> > +	  abd_out_type = abd_in_type;
> > +	  extend = true;
> > +	}
> > +    }
> 
> making this:
> 
>   if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2)
>     {
>       tree mid_type
> 	= build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
> 					  TYPE_UNSIGNED (abd_in_type));
>       tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_vectype);
>       code_helper dummy_code;
>       int dummy_int;
>       auto_vec<tree> dummy_vec;
>       if (mid_vectype
> 	  && supportable_widening_operation (vinfo, IFN_WIDEN_ABD, stmt_vinfo,
> 					     mid_vectype, vectype_in,
> 					     &dummy_code, &dummy_code,
> 					     &dummy_int, &dummy_vec))
> 	{
> 	  ifn = IFN_WIDEN_ABD;
> 	  abd_out_type = mid_type;
> 	  vectype_out = mid_vectype;
> 	}
>     }
> 
> The idea is to keep the assumption that we're using IFN_ABD
> until we've proven conclusively otherwise.
> 
> I think the later:
> 
>   if (!extend)
>     return abd_stmt;
> 
> should then be deleted, since we should still use vect_convert_output
> if abd_out_type has a different sign from out_type.
> 
> .....
> 
> And this condition would then be:
> 
>   if (TYPE_PRECISION (abd_out_type) == TYPE_PRECISION (abd_in_type)
>       && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
>       && !TYPE_UNSIGNED (abd_out_type))
> 
> since:
> 
> (a) we don't need to force the type to unsigned if the final
>     vect_convert_output is just a sign change
> 
> (b) we don't need to force the type to unsigned if abd_out_type
>     is large enough to hold the resu

Done.

> I think it'd be more natural to test this on TREE_TYPE (last_rhs)
> rather than TREE_TYPE (abd_oprnd0), and do it after the assignment
> to last_rhs above.

Done.

> > +  tree abdl_result = vect_recog_temp_ssa_var (out_type, NULL);
> > +  gcall *abdl_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
> > +						 abd_oprnd0, abd_oprnd1);
> > +  gimple_call_set_lhs (abdl_stmt, abdl_result);
> > +  gimple_set_location (abdl_stmt, gimple_location (last_stmt));
> > +  return abdl_stmt;
> 
> I think “widen_abd” would be better than “abdl” here

Done.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/2] Mid engine setup [SU]ABDL
  2023-06-28 15:07     ` Oluwatamilore Adebayo
@ 2023-06-28 15:09       ` Oluwatamilore Adebayo
  2023-06-29 10:14         ` Richard Sandiford
  0 siblings, 1 reply; 9+ messages in thread
From: Oluwatamilore Adebayo @ 2023-06-28 15:09 UTC (permalink / raw)
  To: oluwatamilore.adebayo; +Cc: gcc-patches, richard.guenther, richard.sandiford

From: oluade01 <oluwatamilore.adebayo@arm.com>

This updates vect_recog_abd_pattern to recognize the widening
variant of absolute difference (ABDL, ABDL2).

gcc/ChangeLog:

	* internal-fn.cc (widening_fn_p, decomposes_to_hilo_fn_p):
	Add IFN_VEC_WIDEN_ABD to the switch statement.
	* internal-fn.def (VEC_WIDEN_ABD): New internal hilo optab.
	* optabs.def (vec_widen_sabd_optab,
	vec_widen_sabd_hi_optab, vec_widen_sabd_lo_optab,
	vec_widen_sabd_odd_even, vec_widen_sabd_even_optab,
	vec_widen_uabd_optab,
	vec_widen_uabd_hi_optab, vec_widen_uabd_lo_optab,
	vec_widen_uabd_odd_even, vec_widen_uabd_even_optab):
	New optabs.
	* tree-vect-patterns.cc (vect_recog_abd_pattern): Update to
	to build a VEC_WIDEN_ABD call if the input precision is smaller
	than the precision of the output.
	(vect_recog_widen_abd_pattern): Should an ABD expression be
	found preceeding an extension, replace the two with a
	VEC_WIDEN_ABD.
---
 gcc/doc/md.texi           |  11 ++
 gcc/internal-fn.def       |   5 +
 gcc/optabs.def            |  10 ++
 gcc/tree-vect-patterns.cc | 205 +++++++++++++++++++++++++++++---------
 4 files changed, 183 insertions(+), 48 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index e11b10d2fca11016232921bc85e47975f700e6c6..2ae6182b925d0cf8950dc830d083cf93baf2eaa1 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5617,6 +5617,17 @@ signed/unsigned elements of size S@.  Subtract the high/low elements of 2 from
 1 and widen the resulting elements. Put the N/2 results of size 2*S in the
 output vector (operand 0).
 
+@cindex @code{vec_widen_sabdl_hi_@var{m}} instruction pattern
+@cindex @code{vec_widen_sabdl_lo_@var{m}} instruction pattern
+@cindex @code{vec_widen_uabdl_hi_@var{m}} instruction pattern
+@cindex @code{vec_widen_uabdl_lo_@var{m}} instruction pattern
+@item @samp{vec_widen_uabdl_hi_@var{m}}, @samp{vec_widen_uabdl_lo_@var{m}}
+@itemx @samp{vec_widen_sabdl_hi_@var{m}}, @samp{vec_widen_sabdl_lo_@var{m}}
+Signed/Unsigned widening absolute difference long.  Operands 1 and 2 are
+vectors with N signed/unsigned elements of size S@.  Find the absolute
+difference between 1 and 2 and widen the resulting elements.  Put the N/2
+results of size 2*S in the output vector (operand 0).
+
 @cindex @code{vec_addsub@var{m}3} instruction pattern
 @item @samp{vec_addsub@var{m}3}
 Alternating subtract, add with even lanes doing subtract and odd
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 116965f4830cec8f60642ff011a86b6562e2c509..d67274d68b49943a88c531e903fd03b42343ab97 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -352,6 +352,11 @@ DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_MINUS,
 				first,
 				vec_widen_ssub, vec_widen_usub,
 				binary)
+DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_ABD,
+				ECF_CONST | ECF_NOTHROW,
+				first,
+				vec_widen_sabd, vec_widen_uabd,
+				binary)
 DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub, ternary)
 DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary)
 
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 35b835a6ac56d72417dac8ddfd77a8a7e2475e65..68dfa1550f791a2fe833012157601ecfa68f1e09 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -418,6 +418,11 @@ OPTAB_D (vec_widen_sadd_hi_optab, "vec_widen_sadd_hi_$a")
 OPTAB_D (vec_widen_sadd_lo_optab, "vec_widen_sadd_lo_$a")
 OPTAB_D (vec_widen_sadd_odd_optab, "vec_widen_sadd_odd_$a")
 OPTAB_D (vec_widen_sadd_even_optab, "vec_widen_sadd_even_$a")
+OPTAB_D (vec_widen_sabd_optab, "vec_widen_sabd_$a")
+OPTAB_D (vec_widen_sabd_hi_optab, "vec_widen_sabd_hi_$a")
+OPTAB_D (vec_widen_sabd_lo_optab, "vec_widen_sabd_lo_$a")
+OPTAB_D (vec_widen_sabd_odd_optab, "vec_widen_sabd_odd_$a")
+OPTAB_D (vec_widen_sabd_even_optab, "vec_widen_sabd_even_$a")
 OPTAB_D (vec_widen_sshiftl_hi_optab, "vec_widen_sshiftl_hi_$a")
 OPTAB_D (vec_widen_sshiftl_lo_optab, "vec_widen_sshiftl_lo_$a")
 OPTAB_D (vec_widen_umult_even_optab, "vec_widen_umult_even_$a")
@@ -436,6 +441,11 @@ OPTAB_D (vec_widen_uadd_hi_optab, "vec_widen_uadd_hi_$a")
 OPTAB_D (vec_widen_uadd_lo_optab, "vec_widen_uadd_lo_$a")
 OPTAB_D (vec_widen_uadd_odd_optab, "vec_widen_uadd_odd_$a")
 OPTAB_D (vec_widen_uadd_even_optab, "vec_widen_uadd_even_$a")
+OPTAB_D (vec_widen_uabd_optab, "vec_widen_uabd_$a")
+OPTAB_D (vec_widen_uabd_hi_optab, "vec_widen_uabd_hi_$a")
+OPTAB_D (vec_widen_uabd_lo_optab, "vec_widen_uabd_lo_$a")
+OPTAB_D (vec_widen_uabd_odd_optab, "vec_widen_uabd_odd_$a")
+OPTAB_D (vec_widen_uabd_even_optab, "vec_widen_uabd_even_$a")
 OPTAB_D (vec_addsub_optab, "vec_addsub$a3")
 OPTAB_D (vec_fmaddsub_optab, "vec_fmaddsub$a4")
 OPTAB_D (vec_fmsubadd_optab, "vec_fmsubadd$a4")
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index e2392113bff4065c909aefc760b4c48978b73a5a..281d7bc2e9945ee415be051f5ec1cce19251fbbf 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -1404,15 +1404,28 @@ vect_recog_sad_pattern (vec_info *vinfo,
       gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
       if (!abd_stmt
 	  || !gimple_call_internal_p (abd_stmt)
-	  || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
+	  || gimple_call_num_args (abd_stmt) != 2)
 	return NULL;
 
       tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
       tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
 
-      if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, &unprom[0])
-	  || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
-						    &unprom[1]))
+      if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
+	{
+	  if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
+						     &unprom[0])
+	      || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
+							&unprom[1]))
+	    return NULL;
+	}
+      else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
+	{
+	  unprom[0].op = abd_oprnd0;
+	  unprom[0].type = TREE_TYPE (abd_oprnd0);
+	  unprom[1].op = abd_oprnd1;
+	  unprom[1].type = TREE_TYPE (abd_oprnd1);
+	}
+      else
 	return NULL;
 
       half_type = unprom[0].type;
@@ -1442,16 +1455,19 @@ vect_recog_sad_pattern (vec_info *vinfo,
 
 /* Function vect_recog_abd_pattern
 
-   Try to find the following ABsolute Difference (ABD) pattern:
+   Try to find the following ABsolute Difference (ABD) or
+   widening ABD (WIDEN_ABD) pattern:
 
-     VTYPE x, y, out;
-     type diff;
-   loop i in range:
-     S1 diff = x[i] - y[i]
-     S2 out[i] = ABS_EXPR <diff>;
+   TYPE1 x;
+   TYPE2 y;
+   TYPE3 x_cast = (TYPE3) x;		  // widening or no-op
+   TYPE3 y_cast = (TYPE3) y;		  // widening or no-op
+   TYPE3 diff = x_cast - y_cast;
+   TYPE4 diff_cast = (TYPE4) diff;	  // widening or no-op
+   TYPE5 abs = ABS(U)_EXPR <diff_cast>;
 
-   where 'type' is a integer and 'VTYPE' is a vector of integers
-   the same size as 'type'
+   WIDEN_ABD exists to optimize the case where WTYPE is at least
+   twice as wide as VTYPE.
 
    Input:
 
@@ -1459,30 +1475,18 @@ vect_recog_sad_pattern (vec_info *vinfo,
 
    Output:
 
-   * TYPE_out: The type of the output of this pattern
+   * TYPE_OUT: The type of the output of this pattern
 
    * Return value: A new stmt that will be used to replace the sequence of
-     stmts that constitute the pattern; either SABD or UABD:
-	SABD_EXPR<x, y, out>
-	UABD_EXPR<x, y, out>
+     stmts that constitute the pattern; either SABD, UABD, SABDL or UABDL:
+	IFN_ABD<x, y, out>
+	IFN_WIDEN_ABD<x, y, out>
  */
 
 static gimple *
 vect_recog_abd_pattern (vec_info *vinfo,
 			stmt_vec_info stmt_vinfo, tree *type_out)
 {
-  /* Look for the following patterns
-	X = x[i]
-	Y = y[i]
-	DIFF = X - Y
-	DAD = ABS_EXPR<DIFF>
-	out[i] = DAD
-
-     In which
-      - X, Y, DIFF, DAD all have the same type
-      - x, y, out are all vectors of the same type
-  */
-
   gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
   if (!last_stmt)
     return NULL;
@@ -1496,54 +1500,83 @@ vect_recog_abd_pattern (vec_info *vinfo,
 				       unprom, &diff_stmt))
     return NULL;
 
-  tree abd_type = out_type, vectype;
-  tree abd_oprnds[2];
-  bool extend = false;
+  tree abd_in_type, abd_out_type;
+
   if (half_type)
     {
-      vectype = get_vectype_for_scalar_type (vinfo, half_type);
-      abd_type = half_type;
-      extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type);
+      abd_in_type = half_type;
+      abd_out_type = abd_in_type;
     }
   else
     {
       unprom[0].op = gimple_assign_rhs1 (diff_stmt);
       unprom[1].op = gimple_assign_rhs2 (diff_stmt);
-      tree signed_out = signed_type_for (out_type);
-      vectype = get_vectype_for_scalar_type (vinfo, signed_out);
+      abd_in_type = signed_type_for (out_type);
+      abd_out_type = abd_in_type;
     }
 
-  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+  tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
+  if (!vectype_in)
+    return NULL;
 
-  if (!vectype
-      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
+  internal_fn ifn = IFN_ABD;
+  tree vectype_out = vectype_in;
+
+  if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
+      && TYPE_PRECISION (abd_out_type) != stmt_vinfo->min_output_precision)
+    {
+      tree mid_type
+	= build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
+					  TYPE_UNSIGNED (abd_in_type));
+      tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
+
+      code_helper dummy_code;
+      int dummy_int;
+      auto_vec<tree> dummy_vec;
+      if (mid_vectype
+	  && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
+					     stmt_vinfo, mid_vectype,
+					     vectype_in,
+					     &dummy_code, &dummy_code,
+					     &dummy_int, &dummy_vec))
+	{
+	  ifn = IFN_VEC_WIDEN_ABD;
+	  abd_out_type = mid_type;
+	  vectype_out = mid_vectype;
+	}
+    }
+
+  if (ifn == IFN_ABD
+      && !direct_internal_fn_supported_p (ifn, vectype_in,
 					  OPTIMIZE_FOR_SPEED))
     return NULL;
 
+  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+
+  tree abd_oprnds[2];
   vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
-		       TREE_TYPE (vectype), unprom, vectype);
+		       abd_in_type, unprom, vectype_in);
 
   *type_out = get_vectype_for_scalar_type (vinfo, out_type);
 
-  tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL);
-  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
+  tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
+  gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
 						abd_oprnds[0], abd_oprnds[1]);
   gimple_call_set_lhs (abd_stmt, abd_result);
   gimple_set_location (abd_stmt, gimple_location (last_stmt));
 
-  if (!extend)
-    return abd_stmt;
-
   gimple *stmt = abd_stmt;
-  if (!TYPE_UNSIGNED (abd_type))
+  if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
+      && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
+      && !TYPE_UNSIGNED (abd_out_type))
     {
-      tree unsign = unsigned_type_for (abd_type);
+      tree unsign = unsigned_type_for (abd_out_type);
       tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
       stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
 				  unsign_vectype);
     }
 
-  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype);
+  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
 }
 
 /* Recognize an operation that performs ORIG_CODE on widened inputs,
@@ -1703,6 +1736,81 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
 				      &subtype);
 }
 
+/* Try to detect abd on widened inputs, converting IFN_ABD
+   to IFN_VEC_WIDEN_ABD.  */
+static gimple *
+vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
+			      tree *type_out)
+{
+  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
+  if (!last_stmt || !gimple_assign_cast_p (last_stmt))
+    return NULL;
+
+  tree last_rhs = gimple_assign_rhs1 (last_stmt);
+
+  tree in_type = TREE_TYPE (last_rhs);
+  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
+  if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type))
+    return NULL;
+
+  stmt_vec_info abs_vinfo = vect_get_internal_def (vinfo, last_rhs);
+  if (!abs_vinfo)
+    return NULL;
+
+  stmt_vec_info abd_pattern_vinfo = STMT_VINFO_RELATED_STMT (abs_vinfo);
+  if (!abd_pattern_vinfo)
+    return NULL;
+
+  gimple *pattern_stmt = STMT_VINFO_STMT (abd_pattern_vinfo);
+  if (gimple_assign_cast_p (pattern_stmt))
+    {
+       tree op = gimple_assign_rhs1 (pattern_stmt);
+       vect_unpromoted_value unprom;
+       op = vect_look_through_possible_promotion (vinfo, op, &unprom);
+
+      if (!op)
+	return NULL;
+
+      abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
+      if (!abd_pattern_vinfo)
+	return NULL;
+
+      pattern_stmt = STMT_VINFO_STMT (abd_pattern_vinfo);
+    }
+
+  gcall *abd_stmt = dyn_cast <gcall *> (pattern_stmt);
+  if (!abd_stmt || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
+    return NULL;
+
+  tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
+  tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
+  if (TYPE_PRECISION (TREE_TYPE (abd_oprnd0)) != TYPE_PRECISION (in_type))
+    return NULL;
+
+  tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
+  tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
+
+  code_helper dummy_code;
+  int dummy_int;
+  auto_vec<tree> dummy_vec;
+  if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
+				       vectype_out, vectype_in,
+				       &dummy_code, &dummy_code,
+				       &dummy_int, &dummy_vec))
+    return NULL;
+
+  vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
+
+  *type_out = vectype_out;
+
+  tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
+  gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
+						      abd_oprnd0, abd_oprnd1);
+  gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
+  gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
+  return widen_abd_stmt;
+}
+
 /* Function vect_recog_ctz_ffs_pattern
 
    Try to find the following pattern:
@@ -6670,6 +6778,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_mask_conversion_pattern, "mask_conversion" },
   { vect_recog_widen_plus_pattern, "widen_plus" },
   { vect_recog_widen_minus_pattern, "widen_minus" },
+  { vect_recog_widen_abd_pattern, "widen_abd" },
   /* These must come after the double widening ones.  */
 };
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] Mid engine setup [SU]ABDL
  2023-06-28 15:09       ` Oluwatamilore Adebayo
@ 2023-06-29 10:14         ` Richard Sandiford
  2023-06-30  8:23           ` Oluwatamilore Adebayo
  0 siblings, 1 reply; 9+ messages in thread
From: Richard Sandiford @ 2023-06-29 10:14 UTC (permalink / raw)
  To: Oluwatamilore Adebayo; +Cc: gcc-patches, richard.guenther

Oluwatamilore Adebayo <oluwatamilore.adebayo@arm.com> writes:
> From: oluade01 <oluwatamilore.adebayo@arm.com>
>
> This updates vect_recog_abd_pattern to recognize the widening
> variant of absolute difference (ABDL, ABDL2).
>
> gcc/ChangeLog:
>
> 	* internal-fn.cc (widening_fn_p, decomposes_to_hilo_fn_p):
> 	Add IFN_VEC_WIDEN_ABD to the switch statement.
> 	* internal-fn.def (VEC_WIDEN_ABD): New internal hilo optab.
> 	* optabs.def (vec_widen_sabd_optab,
> 	vec_widen_sabd_hi_optab, vec_widen_sabd_lo_optab,
> 	vec_widen_sabd_odd_even, vec_widen_sabd_even_optab,
> 	vec_widen_uabd_optab,
> 	vec_widen_uabd_hi_optab, vec_widen_uabd_lo_optab,
> 	vec_widen_uabd_odd_even, vec_widen_uabd_even_optab):
> 	New optabs.
> 	* tree-vect-patterns.cc (vect_recog_abd_pattern): Update to
> 	to build a VEC_WIDEN_ABD call if the input precision is smaller
> 	than the precision of the output.
> 	(vect_recog_widen_abd_pattern): Should an ABD expression be
> 	found preceeding an extension, replace the two with a
> 	VEC_WIDEN_ABD.
> ---
>  gcc/doc/md.texi           |  11 ++
>  gcc/internal-fn.def       |   5 +
>  gcc/optabs.def            |  10 ++
>  gcc/tree-vect-patterns.cc | 205 +++++++++++++++++++++++++++++---------
>  4 files changed, 183 insertions(+), 48 deletions(-)
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index e11b10d2fca11016232921bc85e47975f700e6c6..2ae6182b925d0cf8950dc830d083cf93baf2eaa1 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5617,6 +5617,17 @@ signed/unsigned elements of size S@.  Subtract the high/low elements of 2 from
>  1 and widen the resulting elements. Put the N/2 results of size 2*S in the
>  output vector (operand 0).
>  
> +@cindex @code{vec_widen_sabdl_hi_@var{m}} instruction pattern
> +@cindex @code{vec_widen_sabdl_lo_@var{m}} instruction pattern
> +@cindex @code{vec_widen_uabdl_hi_@var{m}} instruction pattern
> +@cindex @code{vec_widen_uabdl_lo_@var{m}} instruction pattern
> +@item @samp{vec_widen_uabdl_hi_@var{m}}, @samp{vec_widen_uabdl_lo_@var{m}}
> +@itemx @samp{vec_widen_sabdl_hi_@var{m}}, @samp{vec_widen_sabdl_lo_@var{m}}

The optabs don't have the trailing “l” (long).  (Which is a good thing!)

The list should include the even/odd patterns as well.

> +Signed/Unsigned widening absolute difference long.  Operands 1 and 2 are

Similarly no “long” here.

> +vectors with N signed/unsigned elements of size S@.  Find the absolute
> +difference between 1 and 2 and widen the resulting elements.  Put the N/2

Maybe “operands 1 and 2”, or just “them”.

> +results of size 2*S in the output vector (operand 0).
> +
>  @cindex @code{vec_addsub@var{m}3} instruction pattern
>  @item @samp{vec_addsub@var{m}3}
>  Alternating subtract, add with even lanes doing subtract and odd
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 116965f4830cec8f60642ff011a86b6562e2c509..d67274d68b49943a88c531e903fd03b42343ab97 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -352,6 +352,11 @@ DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_MINUS,
>  				first,
>  				vec_widen_ssub, vec_widen_usub,
>  				binary)
> +DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_ABD,
> +				ECF_CONST | ECF_NOTHROW,
> +				first,
> +				vec_widen_sabd, vec_widen_uabd,
> +				binary)
>  DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub, ternary)
>  DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary)
>  
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 35b835a6ac56d72417dac8ddfd77a8a7e2475e65..68dfa1550f791a2fe833012157601ecfa68f1e09 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -418,6 +418,11 @@ OPTAB_D (vec_widen_sadd_hi_optab, "vec_widen_sadd_hi_$a")
>  OPTAB_D (vec_widen_sadd_lo_optab, "vec_widen_sadd_lo_$a")
>  OPTAB_D (vec_widen_sadd_odd_optab, "vec_widen_sadd_odd_$a")
>  OPTAB_D (vec_widen_sadd_even_optab, "vec_widen_sadd_even_$a")
> +OPTAB_D (vec_widen_sabd_optab, "vec_widen_sabd_$a")
> +OPTAB_D (vec_widen_sabd_hi_optab, "vec_widen_sabd_hi_$a")
> +OPTAB_D (vec_widen_sabd_lo_optab, "vec_widen_sabd_lo_$a")
> +OPTAB_D (vec_widen_sabd_odd_optab, "vec_widen_sabd_odd_$a")
> +OPTAB_D (vec_widen_sabd_even_optab, "vec_widen_sabd_even_$a")
>  OPTAB_D (vec_widen_sshiftl_hi_optab, "vec_widen_sshiftl_hi_$a")
>  OPTAB_D (vec_widen_sshiftl_lo_optab, "vec_widen_sshiftl_lo_$a")
>  OPTAB_D (vec_widen_umult_even_optab, "vec_widen_umult_even_$a")
> @@ -436,6 +441,11 @@ OPTAB_D (vec_widen_uadd_hi_optab, "vec_widen_uadd_hi_$a")
>  OPTAB_D (vec_widen_uadd_lo_optab, "vec_widen_uadd_lo_$a")
>  OPTAB_D (vec_widen_uadd_odd_optab, "vec_widen_uadd_odd_$a")
>  OPTAB_D (vec_widen_uadd_even_optab, "vec_widen_uadd_even_$a")
> +OPTAB_D (vec_widen_uabd_optab, "vec_widen_uabd_$a")
> +OPTAB_D (vec_widen_uabd_hi_optab, "vec_widen_uabd_hi_$a")
> +OPTAB_D (vec_widen_uabd_lo_optab, "vec_widen_uabd_lo_$a")
> +OPTAB_D (vec_widen_uabd_odd_optab, "vec_widen_uabd_odd_$a")
> +OPTAB_D (vec_widen_uabd_even_optab, "vec_widen_uabd_even_$a")
>  OPTAB_D (vec_addsub_optab, "vec_addsub$a3")
>  OPTAB_D (vec_fmaddsub_optab, "vec_fmaddsub$a4")
>  OPTAB_D (vec_fmsubadd_optab, "vec_fmsubadd$a4")
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index e2392113bff4065c909aefc760b4c48978b73a5a..281d7bc2e9945ee415be051f5ec1cce19251fbbf 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -1404,15 +1404,28 @@ vect_recog_sad_pattern (vec_info *vinfo,
>        gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
>        if (!abd_stmt
>  	  || !gimple_call_internal_p (abd_stmt)
> -	  || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
> +	  || gimple_call_num_args (abd_stmt) != 2)
>  	return NULL;
>  
>        tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
>        tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
>  
> -      if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, &unprom[0])
> -	  || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
> -						    &unprom[1]))
> +      if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
> +	{
> +	  if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
> +						     &unprom[0])
> +	      || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
> +							&unprom[1]))
> +	    return NULL;
> +	}
> +      else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
> +	{
> +	  unprom[0].op = abd_oprnd0;
> +	  unprom[0].type = TREE_TYPE (abd_oprnd0);
> +	  unprom[1].op = abd_oprnd1;
> +	  unprom[1].type = TREE_TYPE (abd_oprnd1);
> +	}
> +      else
>  	return NULL;
>  
>        half_type = unprom[0].type;
> @@ -1442,16 +1455,19 @@ vect_recog_sad_pattern (vec_info *vinfo,
>  
>  /* Function vect_recog_abd_pattern
>  
> -   Try to find the following ABsolute Difference (ABD) pattern:
> +   Try to find the following ABsolute Difference (ABD) or
> +   widening ABD (WIDEN_ABD) pattern:
>  
> -     VTYPE x, y, out;
> -     type diff;
> -   loop i in range:
> -     S1 diff = x[i] - y[i]
> -     S2 out[i] = ABS_EXPR <diff>;
> +   TYPE1 x;
> +   TYPE2 y;
> +   TYPE3 x_cast = (TYPE3) x;		  // widening or no-op
> +   TYPE3 y_cast = (TYPE3) y;		  // widening or no-op
> +   TYPE3 diff = x_cast - y_cast;
> +   TYPE4 diff_cast = (TYPE4) diff;	  // widening or no-op
> +   TYPE5 abs = ABS(U)_EXPR <diff_cast>;
>  
> -   where 'type' is a integer and 'VTYPE' is a vector of integers
> -   the same size as 'type'
> +   WIDEN_ABD exists to optimize the case where WTYPE is at least
> +   twice as wide as VTYPE.

Sorry, my fault.  I was using the original type names in this
suggestion, rather than the TYPE1…TYPE5 ones.  Should be:

   WIDEN_ABD exists to optimize the case where TYPE4 is at least
   twice as wide as TYPE3.

>  
>     Input:
>  
> @@ -1459,30 +1475,18 @@ vect_recog_sad_pattern (vec_info *vinfo,
>  
>     Output:
>  
> -   * TYPE_out: The type of the output of this pattern
> +   * TYPE_OUT: The type of the output of this pattern
>  
>     * Return value: A new stmt that will be used to replace the sequence of
> -     stmts that constitute the pattern; either SABD or UABD:
> -	SABD_EXPR<x, y, out>
> -	UABD_EXPR<x, y, out>
> +     stmts that constitute the pattern; either SABD, UABD, SABDL or UABDL:
> +	IFN_ABD<x, y, out>
> +	IFN_WIDEN_ABD<x, y, out>

Lingering use of “L” suffixes here.  Maybe:

     stmts that constitute the pattern, principally:
	out = IFN_ABD (x, y)
	out = IFN_WIDEN_ABD (x, y)
>   */
>  
>  static gimple *
>  vect_recog_abd_pattern (vec_info *vinfo,
>  			stmt_vec_info stmt_vinfo, tree *type_out)
>  {
> -  /* Look for the following patterns
> -	X = x[i]
> -	Y = y[i]
> -	DIFF = X - Y
> -	DAD = ABS_EXPR<DIFF>
> -	out[i] = DAD
> -
> -     In which
> -      - X, Y, DIFF, DAD all have the same type
> -      - x, y, out are all vectors of the same type
> -  */
> -
>    gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
>    if (!last_stmt)
>      return NULL;
> @@ -1496,54 +1500,83 @@ vect_recog_abd_pattern (vec_info *vinfo,
>  				       unprom, &diff_stmt))
>      return NULL;
>  
> -  tree abd_type = out_type, vectype;
> -  tree abd_oprnds[2];
> -  bool extend = false;
> +  tree abd_in_type, abd_out_type;
> +
>    if (half_type)
>      {
> -      vectype = get_vectype_for_scalar_type (vinfo, half_type);
> -      abd_type = half_type;
> -      extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type);
> +      abd_in_type = half_type;
> +      abd_out_type = abd_in_type;
>      }
>    else
>      {
>        unprom[0].op = gimple_assign_rhs1 (diff_stmt);
>        unprom[1].op = gimple_assign_rhs2 (diff_stmt);
> -      tree signed_out = signed_type_for (out_type);
> -      vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> +      abd_in_type = signed_type_for (out_type);
> +      abd_out_type = abd_in_type;
>      }
>  
> -  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> +  tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
> +  if (!vectype_in)
> +    return NULL;
>  
> -  if (!vectype
> -      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> +  internal_fn ifn = IFN_ABD;
> +  tree vectype_out = vectype_in;
> +
> +  if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
> +      && TYPE_PRECISION (abd_out_type) != stmt_vinfo->min_output_precision)

Sorry for not noticing last time, but I think the second condition
would be more natural as:

      && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)

(There's no distinction between abs_in_type and abs_out_type at this point,
so it seems clearer to use the same value in both conditions.)

> +    {
> +      tree mid_type
> +	= build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
> +					  TYPE_UNSIGNED (abd_in_type));
> +      tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
> +
> +      code_helper dummy_code;
> +      int dummy_int;
> +      auto_vec<tree> dummy_vec;
> +      if (mid_vectype
> +	  && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
> +					     stmt_vinfo, mid_vectype,
> +					     vectype_in,
> +					     &dummy_code, &dummy_code,
> +					     &dummy_int, &dummy_vec))
> +	{
> +	  ifn = IFN_VEC_WIDEN_ABD;
> +	  abd_out_type = mid_type;
> +	  vectype_out = mid_vectype;
> +	}
> +    }
> +
> +  if (ifn == IFN_ABD
> +      && !direct_internal_fn_supported_p (ifn, vectype_in,
>  					  OPTIMIZE_FOR_SPEED))
>      return NULL;
>  
> +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> +
> +  tree abd_oprnds[2];
>    vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> -		       TREE_TYPE (vectype), unprom, vectype);
> +		       abd_in_type, unprom, vectype_in);
>  
>    *type_out = get_vectype_for_scalar_type (vinfo, out_type);
>  
> -  tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL);
> -  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> +  tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
> +  gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
>  						abd_oprnds[0], abd_oprnds[1]);
>    gimple_call_set_lhs (abd_stmt, abd_result);
>    gimple_set_location (abd_stmt, gimple_location (last_stmt));
>  
> -  if (!extend)
> -    return abd_stmt;
> -
>    gimple *stmt = abd_stmt;
> -  if (!TYPE_UNSIGNED (abd_type))
> +  if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
> +      && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
> +      && !TYPE_UNSIGNED (abd_out_type))
>      {
> -      tree unsign = unsigned_type_for (abd_type);
> +      tree unsign = unsigned_type_for (abd_out_type);
>        tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
>        stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
>  				  unsign_vectype);
>      }
>  
> -  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype);
> +  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
>  }
>  
>  /* Recognize an operation that performs ORIG_CODE on widened inputs,
> @@ -1703,6 +1736,81 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
>  				      &subtype);
>  }
>  
> +/* Try to detect abd on widened inputs, converting IFN_ABD
> +   to IFN_VEC_WIDEN_ABD.  */
> +static gimple *
> +vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
> +			      tree *type_out)
> +{
> +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> +  if (!last_stmt || !gimple_assign_cast_p (last_stmt))

I think this should be:

  if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))

gimple_assign_cast_p is more general, and allows conversions
between integral and non-integral types.

> +    return NULL;
> +
> +  tree last_rhs = gimple_assign_rhs1 (last_stmt);
> +
> +  tree in_type = TREE_TYPE (last_rhs);
> +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> +  if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type))
> +    return NULL;

I think this also needs to require TYPE_UNSIGNED (in_type):

  if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
      || !TYPE_UNSIGNED (in_type))
    return NULL;

That is, the extension has to be a zero extension rather than
a sign extension.

For example:

  int32_t a, b, c;
  int64_t d;

  c = IFN_ABD (a, b);
  d = (int64_t) c;

sign-extends the ABD result to 64 bits, and so a == INT_MAX
&& b == INT_MIN gives:

  c = -1    (UINT_MAX converted to signed)
  d = -1

But IFN_WIDEN_ABD would give d == UINT_MAX instead.

> +
> +  stmt_vec_info abs_vinfo = vect_get_internal_def (vinfo, last_rhs);
> +  if (!abs_vinfo)
> +    return NULL;
> +
> +  stmt_vec_info abd_pattern_vinfo = STMT_VINFO_RELATED_STMT (abs_vinfo);
> +  if (!abd_pattern_vinfo)
> +    return NULL;
> +
> +  gimple *pattern_stmt = STMT_VINFO_STMT (abd_pattern_vinfo);
> +  if (gimple_assign_cast_p (pattern_stmt))
> +    {
> +       tree op = gimple_assign_rhs1 (pattern_stmt);
> +       vect_unpromoted_value unprom;
> +       op = vect_look_through_possible_promotion (vinfo, op, &unprom);
> +
> +      if (!op)
> +	return NULL;
> +
> +      abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
> +      if (!abd_pattern_vinfo)
> +	return NULL;
> +
> +      pattern_stmt = STMT_VINFO_STMT (abd_pattern_vinfo);
> +    }

I think the code quoted above reduces to:

  vect_unpromoted_value unprom;
  tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
  if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
    return NULL;

  stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
  if (!abd_pattern_vinfo)
    return NULL;
  abd_pattern_vinfo = vect_stmt_to_vectorize (abd_pattern_vinfo);

> +
> +  gcall *abd_stmt = dyn_cast <gcall *> (pattern_stmt);
> +  if (!abd_stmt || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
> +    return NULL;
> +
> +  tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
> +  tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
> +  if (TYPE_PRECISION (TREE_TYPE (abd_oprnd0)) != TYPE_PRECISION (in_type))
> +    return NULL;

With the changes above, this check would not be necessary.

LGTM otherwise, thanks.

Richard

> +
> +  tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
> +  tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
> +
> +  code_helper dummy_code;
> +  int dummy_int;
> +  auto_vec<tree> dummy_vec;
> +  if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
> +				       vectype_out, vectype_in,
> +				       &dummy_code, &dummy_code,
> +				       &dummy_int, &dummy_vec))
> +    return NULL;
> +
> +  vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
> +
> +  *type_out = vectype_out;
> +
> +  tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
> +  gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
> +						      abd_oprnd0, abd_oprnd1);
> +  gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
> +  gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
> +  return widen_abd_stmt;
> +}
> +
>  /* Function vect_recog_ctz_ffs_pattern
>  
>     Try to find the following pattern:
> @@ -6670,6 +6778,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
>    { vect_recog_mask_conversion_pattern, "mask_conversion" },
>    { vect_recog_widen_plus_pattern, "widen_plus" },
>    { vect_recog_widen_minus_pattern, "widen_minus" },
> +  { vect_recog_widen_abd_pattern, "widen_abd" },
>    /* These must come after the double widening ones.  */
>  };

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] Mid engine setup [SU]ABDL
  2023-06-29 10:14         ` Richard Sandiford
@ 2023-06-30  8:23           ` Oluwatamilore Adebayo
  2023-06-30  8:26             ` Oluwatamilore Adebayo
  0 siblings, 1 reply; 9+ messages in thread
From: Oluwatamilore Adebayo @ 2023-06-30  8:23 UTC (permalink / raw)
  To: richard.sandiford; +Cc: gcc-patches, oluwatamilore.adebayo, richard.guenther

> Sorry, my fault.  I was using the original type names in this
> suggestion, rather than the TYPE1…TYPE5 ones.  Should be:
> 
>    WIDEN_ABD exists to optimize the case where TYPE4 is at least
>    twice as wide as TYPE3.

Change made.

> Lingering use of “L” suffixes here.  Maybe:
> 
>      stmts that constitute the pattern, principally:
> 	out = IFN_ABD (x, y)
> 	out = IFN_WIDEN_ABD (x, y)

Change made.

> > +  if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
> > +      && TYPE_PRECISION (abd_out_type) != stmt_vinfo->min_output_precision)
> 
> Sorry for not noticing last time, but I think the second condition
> would be more natural as:
> 
>       && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
> 
> (There's no distinction between abs_in_type and abs_out_type at this point,
> so it seems clearer to use the same value in both conditions.)

Change made.

> > +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> > +  if (!last_stmt || !gimple_assign_cast_p (last_stmt))
> 
> I think this should be:
> 
>   if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
> 
> gimple_assign_cast_p is more general, and allows conversions
> between integral and non-integral types.

Change made.

> > +  tree in_type = TREE_TYPE (last_rhs);
> > +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> > +  if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type))
> > +    return NULL;
> 
> I think this also needs to require TYPE_UNSIGNED (in_type):
> 
>   if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
>       || !TYPE_UNSIGNED (in_type))
>     return NULL;
> 
> That is, the extension has to be a zero extension rather than
> a sign extension.
> 
> For example:
> 
>   int32_t a, b, c;
>   int64_t d;
> 
>   c = IFN_ABD (a, b);
>   d = (int64_t) c;
> 
> sign-extends the ABD result to 64 bits, and so a == INT_MAX
> && b == INT_MIN gives:
> 
>   c = -1    (UINT_MAX converted to signed)
>   d = -1
> 
> But IFN_WIDEN_ABD would give d == UINT_MAX instead.

Change made.

> > +  gimple *pattern_stmt = STMT_VINFO_STMT (abd_pattern_vinfo);
> > +  if (gimple_assign_cast_p (pattern_stmt))
> > +    {
> > +       tree op = gimple_assign_rhs1 (pattern_stmt);
> > +       vect_unpromoted_value unprom;
> > +       op = vect_look_through_possible_promotion (vinfo, op, &unprom);
> > +
> > +      if (!op)
> > +	return NULL;
> > +
> > +      abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
> > +      if (!abd_pattern_vinfo)
> > +	return NULL;
> > +
> > +      pattern_stmt = STMT_VINFO_STMT (abd_pattern_vinfo);
> > +    }
> 
> I think the code quoted above reduces to:
> 
>   vect_unpromoted_value unprom;
>   tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
>   if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
>     return NULL;
> 
>   stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
>   if (!abd_pattern_vinfo)
>     return NULL;
>   abd_pattern_vinfo = vect_stmt_to_vectorize (abd_pattern_vinfo);
> 
> ...
> 
> > +  tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
> > +  tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
> > +  if (TYPE_PRECISION (TREE_TYPE (abd_oprnd0)) != TYPE_PRECISION (in_type))
> > +    return NULL;
> 
> With the changes above, this check would not be necessary.

Both changes made.

Updated patch will be in the next email.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/2] Mid engine setup [SU]ABDL
  2023-06-30  8:23           ` Oluwatamilore Adebayo
@ 2023-06-30  8:26             ` Oluwatamilore Adebayo
  2023-06-30 11:28               ` Richard Sandiford
  0 siblings, 1 reply; 9+ messages in thread
From: Oluwatamilore Adebayo @ 2023-06-30  8:26 UTC (permalink / raw)
  To: oluwatamilore.adebayo; +Cc: gcc-patches, richard.guenther, richard.sandiford

From: oluade01 <oluwatamilore.adebayo@arm.com>

This updates vect_recog_abd_pattern to recognize the widening
variant of absolute difference (ABDL, ABDL2).

gcc/ChangeLog:

	* internal-fn.cc (widening_fn_p, decomposes_to_hilo_fn_p):
	Add IFN_VEC_WIDEN_ABD to the switch statement.
	* internal-fn.def (VEC_WIDEN_ABD): New internal hilo optab.
	* optabs.def (vec_widen_sabd_optab,
	vec_widen_sabd_hi_optab, vec_widen_sabd_lo_optab,
	vec_widen_sabd_odd_even, vec_widen_sabd_even_optab,
	vec_widen_uabd_optab,
	vec_widen_uabd_hi_optab, vec_widen_uabd_lo_optab,
	vec_widen_uabd_odd_even, vec_widen_uabd_even_optab):
	New optabs.
	* tree-vect-patterns.cc (vect_recog_abd_pattern): Update to
	to build a VEC_WIDEN_ABD call if the input precision is smaller
	than the precision of the output.
	(vect_recog_widen_abd_pattern): Should an ABD expression be
	found preceeding an extension, replace the two with a
	VEC_WIDEN_ABD.
---
 gcc/doc/md.texi           |  17 ++++
 gcc/internal-fn.def       |   5 +
 gcc/optabs.def            |  10 ++
 gcc/tree-vect-patterns.cc | 190 ++++++++++++++++++++++++++++----------
 4 files changed, 174 insertions(+), 48 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index e11b10d2fca11016232921bc85e47975f700e6c6..f518d9ed6e170b2e2b9557fffed2cf47aa02633d 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5617,6 +5617,23 @@ signed/unsigned elements of size S@.  Subtract the high/low elements of 2 from
 1 and widen the resulting elements. Put the N/2 results of size 2*S in the
 output vector (operand 0).
 
+@cindex @code{vec_widen_sabd_hi_@var{m}} instruction pattern
+@cindex @code{vec_widen_sabd_lo_@var{m}} instruction pattern
+@cindex @code{vec_widen_sabd_odd_@var{m}} instruction pattern
+@cindex @code{vec_widen_sabd_even_@var{m}} instruction pattern
+@cindex @code{vec_widen_uabd_hi_@var{m}} instruction pattern
+@cindex @code{vec_widen_uabd_lo_@var{m}} instruction pattern
+@cindex @code{vec_widen_uabd_odd_@var{m}} instruction pattern
+@cindex @code{vec_widen_uabd_even_@var{m}} instruction pattern
+@item @samp{vec_widen_uabd_hi_@var{m}}, @samp{vec_widen_uabd_lo_@var{m}}
+@itemx @samp{vec_widen_uabd_odd_@var{m}}, @samp{vec_widen_uabd_even_@var{m}}
+@itemx @samp{vec_widen_sabd_hi_@var{m}}, @samp{vec_widen_sabd_lo_@var{m}}
+@itemx @samp{vec_widen_sabd_odd_@var{m}}, @samp{vec_widen_sabd_even_@var{m}}
+Signed/Unsigned widening absolute difference.  Operands 1 and 2 are
+vectors with N signed/unsigned elements of size S@.  Find the absolute
+difference between operands 1 and 2 and widen the resulting elements.
+Put the N/2 results of size 2*S in the output vector (operand 0).
+
 @cindex @code{vec_addsub@var{m}3} instruction pattern
 @item @samp{vec_addsub@var{m}3}
 Alternating subtract, add with even lanes doing subtract and odd
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 116965f4830cec8f60642ff011a86b6562e2c509..d67274d68b49943a88c531e903fd03b42343ab97 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -352,6 +352,11 @@ DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_MINUS,
 				first,
 				vec_widen_ssub, vec_widen_usub,
 				binary)
+DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_ABD,
+				ECF_CONST | ECF_NOTHROW,
+				first,
+				vec_widen_sabd, vec_widen_uabd,
+				binary)
 DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub, ternary)
 DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary)
 
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 35b835a6ac56d72417dac8ddfd77a8a7e2475e65..68dfa1550f791a2fe833012157601ecfa68f1e09 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -418,6 +418,11 @@ OPTAB_D (vec_widen_sadd_hi_optab, "vec_widen_sadd_hi_$a")
 OPTAB_D (vec_widen_sadd_lo_optab, "vec_widen_sadd_lo_$a")
 OPTAB_D (vec_widen_sadd_odd_optab, "vec_widen_sadd_odd_$a")
 OPTAB_D (vec_widen_sadd_even_optab, "vec_widen_sadd_even_$a")
+OPTAB_D (vec_widen_sabd_optab, "vec_widen_sabd_$a")
+OPTAB_D (vec_widen_sabd_hi_optab, "vec_widen_sabd_hi_$a")
+OPTAB_D (vec_widen_sabd_lo_optab, "vec_widen_sabd_lo_$a")
+OPTAB_D (vec_widen_sabd_odd_optab, "vec_widen_sabd_odd_$a")
+OPTAB_D (vec_widen_sabd_even_optab, "vec_widen_sabd_even_$a")
 OPTAB_D (vec_widen_sshiftl_hi_optab, "vec_widen_sshiftl_hi_$a")
 OPTAB_D (vec_widen_sshiftl_lo_optab, "vec_widen_sshiftl_lo_$a")
 OPTAB_D (vec_widen_umult_even_optab, "vec_widen_umult_even_$a")
@@ -436,6 +441,11 @@ OPTAB_D (vec_widen_uadd_hi_optab, "vec_widen_uadd_hi_$a")
 OPTAB_D (vec_widen_uadd_lo_optab, "vec_widen_uadd_lo_$a")
 OPTAB_D (vec_widen_uadd_odd_optab, "vec_widen_uadd_odd_$a")
 OPTAB_D (vec_widen_uadd_even_optab, "vec_widen_uadd_even_$a")
+OPTAB_D (vec_widen_uabd_optab, "vec_widen_uabd_$a")
+OPTAB_D (vec_widen_uabd_hi_optab, "vec_widen_uabd_hi_$a")
+OPTAB_D (vec_widen_uabd_lo_optab, "vec_widen_uabd_lo_$a")
+OPTAB_D (vec_widen_uabd_odd_optab, "vec_widen_uabd_odd_$a")
+OPTAB_D (vec_widen_uabd_even_optab, "vec_widen_uabd_even_$a")
 OPTAB_D (vec_addsub_optab, "vec_addsub$a3")
 OPTAB_D (vec_fmaddsub_optab, "vec_fmaddsub$a4")
 OPTAB_D (vec_fmsubadd_optab, "vec_fmsubadd$a4")
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index e2392113bff4065c909aefc760b4c48978b73a5a..7a0c65563ec1ecccdfc91d38bc32a044d0767a83 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -1404,15 +1404,28 @@ vect_recog_sad_pattern (vec_info *vinfo,
       gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
       if (!abd_stmt
 	  || !gimple_call_internal_p (abd_stmt)
-	  || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
+	  || gimple_call_num_args (abd_stmt) != 2)
 	return NULL;
 
       tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
       tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
 
-      if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, &unprom[0])
-	  || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
-						    &unprom[1]))
+      if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
+	{
+	  if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
+						     &unprom[0])
+	      || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
+							&unprom[1]))
+	    return NULL;
+	}
+      else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
+	{
+	  unprom[0].op = abd_oprnd0;
+	  unprom[0].type = TREE_TYPE (abd_oprnd0);
+	  unprom[1].op = abd_oprnd1;
+	  unprom[1].type = TREE_TYPE (abd_oprnd1);
+	}
+      else
 	return NULL;
 
       half_type = unprom[0].type;
@@ -1442,16 +1455,19 @@ vect_recog_sad_pattern (vec_info *vinfo,
 
 /* Function vect_recog_abd_pattern
 
-   Try to find the following ABsolute Difference (ABD) pattern:
+   Try to find the following ABsolute Difference (ABD) or
+   widening ABD (WIDEN_ABD) pattern:
 
-     VTYPE x, y, out;
-     type diff;
-   loop i in range:
-     S1 diff = x[i] - y[i]
-     S2 out[i] = ABS_EXPR <diff>;
+   TYPE1 x;
+   TYPE2 y;
+   TYPE3 x_cast = (TYPE3) x;		  // widening or no-op
+   TYPE3 y_cast = (TYPE3) y;		  // widening or no-op
+   TYPE3 diff = x_cast - y_cast;
+   TYPE4 diff_cast = (TYPE4) diff;	  // widening or no-op
+   TYPE5 abs = ABS(U)_EXPR <diff_cast>;
 
-   where 'type' is a integer and 'VTYPE' is a vector of integers
-   the same size as 'type'
+   WIDEN_ABD exists to optimize the case where TYPE4 is at least
+   twice as wide as TYPE3.
 
    Input:
 
@@ -1459,30 +1475,18 @@ vect_recog_sad_pattern (vec_info *vinfo,
 
    Output:
 
-   * TYPE_out: The type of the output of this pattern
+   * TYPE_OUT: The type of the output of this pattern
 
    * Return value: A new stmt that will be used to replace the sequence of
-     stmts that constitute the pattern; either SABD or UABD:
-	SABD_EXPR<x, y, out>
-	UABD_EXPR<x, y, out>
+     stmts that constitute the pattern, principally:
+	out = IFN_ABD (x, y)
+	out = IFN_WIDEN_ABD (x, y)
  */
 
 static gimple *
 vect_recog_abd_pattern (vec_info *vinfo,
 			stmt_vec_info stmt_vinfo, tree *type_out)
 {
-  /* Look for the following patterns
-	X = x[i]
-	Y = y[i]
-	DIFF = X - Y
-	DAD = ABS_EXPR<DIFF>
-	out[i] = DAD
-
-     In which
-      - X, Y, DIFF, DAD all have the same type
-      - x, y, out are all vectors of the same type
-  */
-
   gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
   if (!last_stmt)
     return NULL;
@@ -1496,54 +1500,83 @@ vect_recog_abd_pattern (vec_info *vinfo,
 				       unprom, &diff_stmt))
     return NULL;
 
-  tree abd_type = out_type, vectype;
-  tree abd_oprnds[2];
-  bool extend = false;
+  tree abd_in_type, abd_out_type;
+
   if (half_type)
     {
-      vectype = get_vectype_for_scalar_type (vinfo, half_type);
-      abd_type = half_type;
-      extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type);
+      abd_in_type = half_type;
+      abd_out_type = abd_in_type;
     }
   else
     {
       unprom[0].op = gimple_assign_rhs1 (diff_stmt);
       unprom[1].op = gimple_assign_rhs2 (diff_stmt);
-      tree signed_out = signed_type_for (out_type);
-      vectype = get_vectype_for_scalar_type (vinfo, signed_out);
+      abd_in_type = signed_type_for (out_type);
+      abd_out_type = abd_in_type;
     }
 
-  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+  tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
+  if (!vectype_in)
+    return NULL;
 
-  if (!vectype
-      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
+  internal_fn ifn = IFN_ABD;
+  tree vectype_out = vectype_in;
+
+  if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
+      && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
+    {
+      tree mid_type
+	= build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
+					  TYPE_UNSIGNED (abd_in_type));
+      tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
+
+      code_helper dummy_code;
+      int dummy_int;
+      auto_vec<tree> dummy_vec;
+      if (mid_vectype
+	  && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
+					     stmt_vinfo, mid_vectype,
+					     vectype_in,
+					     &dummy_code, &dummy_code,
+					     &dummy_int, &dummy_vec))
+	{
+	  ifn = IFN_VEC_WIDEN_ABD;
+	  abd_out_type = mid_type;
+	  vectype_out = mid_vectype;
+	}
+    }
+
+  if (ifn == IFN_ABD
+      && !direct_internal_fn_supported_p (ifn, vectype_in,
 					  OPTIMIZE_FOR_SPEED))
     return NULL;
 
+  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+
+  tree abd_oprnds[2];
   vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
-		       TREE_TYPE (vectype), unprom, vectype);
+		       abd_in_type, unprom, vectype_in);
 
   *type_out = get_vectype_for_scalar_type (vinfo, out_type);
 
-  tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL);
-  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
+  tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
+  gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
 						abd_oprnds[0], abd_oprnds[1]);
   gimple_call_set_lhs (abd_stmt, abd_result);
   gimple_set_location (abd_stmt, gimple_location (last_stmt));
 
-  if (!extend)
-    return abd_stmt;
-
   gimple *stmt = abd_stmt;
-  if (!TYPE_UNSIGNED (abd_type))
+  if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
+      && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
+      && !TYPE_UNSIGNED (abd_out_type))
     {
-      tree unsign = unsigned_type_for (abd_type);
+      tree unsign = unsigned_type_for (abd_out_type);
       tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
       stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
 				  unsign_vectype);
     }
 
-  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype);
+  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
 }
 
 /* Recognize an operation that performs ORIG_CODE on widened inputs,
@@ -1703,6 +1736,66 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
 				      &subtype);
 }
 
+/* Try to detect abd on widened inputs, converting IFN_ABD
+   to IFN_VEC_WIDEN_ABD.  */
+static gimple *
+vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
+			      tree *type_out)
+{
+  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
+  if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
+    return NULL;
+
+  tree last_rhs = gimple_assign_rhs1 (last_stmt);
+
+  tree in_type = TREE_TYPE (last_rhs);
+  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
+  if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
+      || !TYPE_UNSIGNED (in_type))
+    return NULL;
+
+  vect_unpromoted_value unprom;
+  tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
+  if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
+    return NULL;
+
+  stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
+  if (!abd_pattern_vinfo)
+    return NULL;
+
+  abd_pattern_vinfo = vect_stmt_to_vectorize (abd_pattern_vinfo);
+  gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
+  if (!abd_stmt
+      || !gimple_call_internal_p (abd_stmt)
+      || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
+    return NULL;
+
+  tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
+  tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
+
+  code_helper dummy_code;
+  int dummy_int;
+  auto_vec<tree> dummy_vec;
+  if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
+				       vectype_out, vectype_in,
+				       &dummy_code, &dummy_code,
+				       &dummy_int, &dummy_vec))
+    return NULL;
+
+  vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
+
+  *type_out = vectype_out;
+
+  tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
+  tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
+  tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
+  gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
+						      abd_oprnd0, abd_oprnd1);
+  gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
+  gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
+  return widen_abd_stmt;
+}
+
 /* Function vect_recog_ctz_ffs_pattern
 
    Try to find the following pattern:
@@ -6670,6 +6763,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_mask_conversion_pattern, "mask_conversion" },
   { vect_recog_widen_plus_pattern, "widen_plus" },
   { vect_recog_widen_minus_pattern, "widen_minus" },
+  { vect_recog_widen_abd_pattern, "widen_abd" },
   /* These must come after the double widening ones.  */
 };
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] Mid engine setup [SU]ABDL
  2023-06-30  8:26             ` Oluwatamilore Adebayo
@ 2023-06-30 11:28               ` Richard Sandiford
  0 siblings, 0 replies; 9+ messages in thread
From: Richard Sandiford @ 2023-06-30 11:28 UTC (permalink / raw)
  To: Oluwatamilore Adebayo; +Cc: gcc-patches, richard.guenther

Oluwatamilore Adebayo <oluwatamilore.adebayo@arm.com> writes:
> From: oluade01 <oluwatamilore.adebayo@arm.com>
>
> This updates vect_recog_abd_pattern to recognize the widening
> variant of absolute difference (ABDL, ABDL2).
>
> gcc/ChangeLog:
>
> 	* internal-fn.cc (widening_fn_p, decomposes_to_hilo_fn_p):
> 	Add IFN_VEC_WIDEN_ABD to the switch statement.
> 	* internal-fn.def (VEC_WIDEN_ABD): New internal hilo optab.
> 	* optabs.def (vec_widen_sabd_optab,
> 	vec_widen_sabd_hi_optab, vec_widen_sabd_lo_optab,
> 	vec_widen_sabd_odd_even, vec_widen_sabd_even_optab,
> 	vec_widen_uabd_optab,
> 	vec_widen_uabd_hi_optab, vec_widen_uabd_lo_optab,
> 	vec_widen_uabd_odd_even, vec_widen_uabd_even_optab):
> 	New optabs.
> 	* tree-vect-patterns.cc (vect_recog_abd_pattern): Update to
> 	to build a VEC_WIDEN_ABD call if the input precision is smaller
> 	than the precision of the output.
> 	(vect_recog_widen_abd_pattern): Should an ABD expression be
> 	found preceeding an extension, replace the two with a
> 	VEC_WIDEN_ABD.

Thanks.  Testing on an updated trunk shows that we need to check
INTEGRAL_TYPE_P…

> @@ -1703,6 +1736,66 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
>  				      &subtype);
>  }
>  
> +/* Try to detect abd on widened inputs, converting IFN_ABD
> +   to IFN_VEC_WIDEN_ABD.  */
> +static gimple *
> +vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
> +			      tree *type_out)
> +{
> +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> +  if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
> +    return NULL;
> +
> +  tree last_rhs = gimple_assign_rhs1 (last_stmt);
> +
> +  tree in_type = TREE_TYPE (last_rhs);
> +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> +  if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
> +      || !TYPE_UNSIGNED (in_type))
> +    return NULL;

…here to avoid new stricter testing for TYPE_PRECISION on VECTOR_TYPEs.
I've pushed the series with that change.

Thanks again for working on this.  It's a really nice improvement.

Richard

> +
> +  vect_unpromoted_value unprom;
> +  tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
> +  if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
> +    return NULL;
> +
> +  stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
> +  if (!abd_pattern_vinfo)
> +    return NULL;
> +
> +  abd_pattern_vinfo = vect_stmt_to_vectorize (abd_pattern_vinfo);
> +  gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
> +  if (!abd_stmt
> +      || !gimple_call_internal_p (abd_stmt)
> +      || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
> +    return NULL;
> +
> +  tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
> +  tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
> +
> +  code_helper dummy_code;
> +  int dummy_int;
> +  auto_vec<tree> dummy_vec;
> +  if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
> +				       vectype_out, vectype_in,
> +				       &dummy_code, &dummy_code,
> +				       &dummy_int, &dummy_vec))
> +    return NULL;
> +
> +  vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
> +
> +  *type_out = vectype_out;
> +
> +  tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
> +  tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
> +  tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
> +  gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
> +						      abd_oprnd0, abd_oprnd1);
> +  gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
> +  gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
> +  return widen_abd_stmt;
> +}
> +
>  /* Function vect_recog_ctz_ffs_pattern
>  
>     Try to find the following pattern:
> @@ -6670,6 +6763,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
>    { vect_recog_mask_conversion_pattern, "mask_conversion" },
>    { vect_recog_widen_plus_pattern, "widen_plus" },
>    { vect_recog_widen_minus_pattern, "widen_minus" },
> +  { vect_recog_widen_abd_pattern, "widen_abd" },
>    /* These must come after the double widening ones.  */
>  };

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2023-06-30 11:30 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-26 15:34 [PATCH 1/2] Mid engine setup [SU]ABDL Oluwatamilore Adebayo
2023-06-26 20:50 ` Richard Sandiford
2023-06-27  7:46   ` Richard Sandiford
2023-06-28 15:07     ` Oluwatamilore Adebayo
2023-06-28 15:09       ` Oluwatamilore Adebayo
2023-06-29 10:14         ` Richard Sandiford
2023-06-30  8:23           ` Oluwatamilore Adebayo
2023-06-30  8:26             ` Oluwatamilore Adebayo
2023-06-30 11:28               ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).