public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH]middle-end: Implement conditonal store vectorizer pattern [PR115531]
@ 2024-06-25 12:18 Tamar Christina
  2024-06-26 13:22 ` Richard Biener
  0 siblings, 1 reply; 4+ messages in thread
From: Tamar Christina @ 2024-06-25 12:18 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, rguenther, jlaw

[-- Attachment #1: Type: text/plain, Size: 13680 bytes --]

Hi All,

This adds a conditional store optimization for the vectorizer as a pattern.
The vectorizer already supports modifying memory accesses because of the pattern
based gather/scatter recognition.

Doing it in the vectorizer allows us to still keep the ability to vectorize such
loops for architectures that don't have MASK_STORE support, whereas doing this
in ifcvt makes us commit to MASK_STORE.

Concretely for this loop:

void foo1 (char *restrict a, int *restrict b, int *restrict c, int n, int stride)
{
  if (stride <= 1)
    return;

  for (int i = 0; i < n; i++)
    {
      int res = c[i];
      int t = b[i+stride];
      if (a[i] != 0)
        res = t;
      c[i] = res;
    }
}

today we generate:

.L3:
        ld1b    z29.s, p7/z, [x0, x5]
        ld1w    z31.s, p7/z, [x2, x5, lsl 2]
        ld1w    z30.s, p7/z, [x1, x5, lsl 2]
        cmpne   p15.b, p6/z, z29.b, #0
        sel     z30.s, p15, z30.s, z31.s
        st1w    z30.s, p7, [x2, x5, lsl 2]
        add     x5, x5, x4
        whilelo p7.s, w5, w3
        b.any   .L3

which in gimple is:

  vect_res_18.9_68 = .MASK_LOAD (vectp_c.7_65, 32B, loop_mask_67);
  vect_t_20.12_74 = .MASK_LOAD (vectp.10_72, 32B, loop_mask_67);
  vect__9.15_77 = .MASK_LOAD (vectp_a.13_75, 8B, loop_mask_67);
  mask__34.16_79 = vect__9.15_77 != { 0, ... };
  vect_res_11.17_80 = VEC_COND_EXPR <mask__34.16_79, vect_t_20.12_74, vect_res_18.9_68>;
  .MASK_STORE (vectp_c.18_81, 32B, loop_mask_67, vect_res_11.17_80);

A MASK_STORE is already conditional, so there's no need to perform the load of
the old values and the VEC_COND_EXPR.  This patch makes it so we generate:

  vect_res_18.9_68 = .MASK_LOAD (vectp_c.7_65, 32B, loop_mask_67);
  vect__9.15_77 = .MASK_LOAD (vectp_a.13_75, 8B, loop_mask_67);
  mask__34.16_79 = vect__9.15_77 != { 0, ... };
  .MASK_STORE (vectp_c.18_81, 32B, mask__34.16_79, vect_res_18.9_68);

which generates:

.L3:
        ld1b    z30.s, p7/z, [x0, x5]
        ld1w    z31.s, p7/z, [x1, x5, lsl 2]
        cmpne   p7.b, p7/z, z30.b, #0
        st1w    z31.s, p7, [x2, x5, lsl 2]
        add     x5, x5, x4
        whilelo p7.s, w5, w3
        b.any   .L3

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR tree-optimization/115531
	* tree-vect-patterns.cc (vect_cond_store_pattern_same_ref): New.
	(vect_recog_cond_store_pattern): New.
	(vect_vect_recog_func_ptrs): Use it.

gcc/testsuite/ChangeLog:

	PR tree-optimization/115531
	* gcc.dg/vect/vect-conditional_store_1.c: New test.
	* gcc.dg/vect/vect-conditional_store_2.c: New test.
	* gcc.dg/vect/vect-conditional_store_3.c: New test.
	* gcc.dg/vect/vect-conditional_store_4.c: New test.

---
diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_1.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..3884a3c3d0a2dc2258097348c75bb7c0b3b37c72
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_1.c
@@ -0,0 +1,24 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_masked_store } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+void foo1 (char *restrict a, int *restrict b, int *restrict c, int n, int stride)
+{
+  if (stride <= 1)
+    return;
+
+  for (int i = 0; i < n; i++)
+    {
+      int res = c[i];
+      int t = b[i+stride];
+      if (a[i] != 0)
+        res = t;
+      c[i] = res;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_2.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..bc965a244f147c199b1726e5f6b44229539cd225
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_2.c
@@ -0,0 +1,24 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_masked_store } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+void foo2 (char *restrict a, int *restrict b, int *restrict c, int n, int stride)
+{
+  if (stride <= 1)
+    return;
+
+  for (int i = 0; i < n; i++)
+    {
+      int res = c[i];
+      int t = b[i+stride];
+      if (a[i] != 0)
+        t = res;
+      c[i] = t;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_3.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..ab6889f967b330a652917925c2748b16af59b9fd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_3.c
@@ -0,0 +1,24 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_masked_store } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+void foo3 (float *restrict a, int *restrict b, int *restrict c, int n, int stride)
+{
+  if (stride <= 1)
+    return;
+
+  for (int i = 0; i < n; i++)
+    {
+      int res = c[i];
+      int t = b[i+stride];
+      if (a[i] >= 0)
+        t = res;
+      c[i] = t;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_4.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..3bfe2f81dc2d47096aa23529d43263be52cd422c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_4.c
@@ -0,0 +1,28 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_masked_store } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+void foo4 (signed char *restrict a, int *restrict b, int *restrict c, int *restrict d, int n, int stride)
+{
+  if (stride <= 1)
+    return;
+
+  for (int i = 0; i < n; i++)
+    {
+      int res1 = c[i];
+      int res2 = d[i];
+      int t = b[i+stride];
+      if (a[i] > 0)
+        t = res1;
+      else if (a[i] < 0)
+        t = res2 * 2;
+
+      c[i] = t;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" } } */
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index cef901808eb97c0c92b51da20535ea7f397b4742..f0da6c932f48d2992a501d0ced3efc8924912c77 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -6397,6 +6397,177 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+/* Helper method of vect_recog_cond_store_pattern,  checks to see if COND_ARG
+   is points to a load statement that reads the same data as that of
+   STORE_VINFO.  */
+
+static bool
+vect_cond_store_pattern_same_ref (loop_vec_info loop_vinfo,
+				  stmt_vec_info store_vinfo, tree cond_arg)
+{
+  stmt_vec_info load_stmt_vinfo = loop_vinfo->lookup_def (cond_arg);
+  if (!load_stmt_vinfo
+      || !STMT_VINFO_DATA_REF (load_stmt_vinfo)
+      || DR_IS_WRITE (STMT_VINFO_DATA_REF (load_stmt_vinfo))
+      || !same_data_refs (STMT_VINFO_DATA_REF (store_vinfo),
+			  STMT_VINFO_DATA_REF (load_stmt_vinfo)))
+    return false;
+
+  return true;
+}
+
+/* Function vect_recog_cond_store_pattern
+
+   Try to find the following pattern:
+
+   x = *_3;
+   c = a CMP b;
+   y = c ? t_20 : x;
+   *_3 = y;
+
+   where the store of _3 happens on a conditional select on a value loaded
+   from the same location.  In such case we can elide the initial load if
+   MASK_STORE is supported and instead only conditionally write out the result.
+
+   The pattern produces for the above:
+
+   c = a CMP b;
+   .MASK_STORE (_3, c, t_20)
+
+   Input:
+
+   * STMT_VINFO: The stmt from which the pattern search begins.  In the
+   example, when this function is called with _3 then the search begins.
+
+   Output:
+
+   * TYPE_OUT: The type of the output  of this pattern.
+
+   * Return value: A new stmt that will be used to replace the sequence.  */
+
+static gimple *
+vect_recog_cond_store_pattern (vec_info *vinfo,
+			       stmt_vec_info stmt_vinfo, tree *type_out)
+{
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
+  if (!loop_vinfo)
+    return NULL;
+
+  gimple *store_stmt = STMT_VINFO_STMT (stmt_vinfo);
+
+  /* Needs to be a gimple store where we have DR info for.  */
+  if (!STMT_VINFO_DATA_REF (stmt_vinfo)
+      || !DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_vinfo))
+      || !gimple_store_p (store_stmt))
+    return NULL;
+
+  tree st_rhs = gimple_assign_rhs1 (store_stmt);
+  tree st_lhs = gimple_assign_lhs (store_stmt);
+
+  if (TREE_CODE (st_rhs) != SSA_NAME)
+    return NULL;
+
+  gassign *cond_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (st_rhs));
+  if (!cond_stmt || gimple_assign_rhs_code (cond_stmt) != COND_EXPR)
+    return NULL;
+
+  /* Check if the else value matches the original loaded one.  */
+  bool invert = false;
+  tree cmp_ls = gimple_arg (cond_stmt, 0);
+  tree cond_arg1 = gimple_arg (cond_stmt, 1);
+  tree cond_arg2 = gimple_arg (cond_stmt, 2);
+
+  if (!vect_cond_store_pattern_same_ref (loop_vinfo, stmt_vinfo, cond_arg2)
+      && !(invert = vect_cond_store_pattern_same_ref (loop_vinfo, stmt_vinfo,
+						      cond_arg1)))
+    return NULL;
+
+  vect_pattern_detected ("vect_recog_cond_store_pattern", store_stmt);
+
+  tree scalar_type = TREE_TYPE (st_rhs);
+  if (VECTOR_TYPE_P (scalar_type))
+    return NULL;
+
+  tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+  if (vectype == NULL_TREE)
+    return NULL;
+
+  machine_mode mask_mode;
+  machine_mode vecmode = TYPE_MODE (vectype);
+  if (!targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
+      || !can_vec_mask_load_store_p (vecmode, mask_mode, false))
+    return NULL;
+
+  /* Convert the mask to the right form.  */
+  tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
+  tree cookie = build_int_cst (build_pointer_type (scalar_type),
+			       TYPE_ALIGN (scalar_type));
+  tree base = TREE_OPERAND (st_lhs, 0);
+  tree cond_store_arg = cond_arg1;
+
+  /* If we have to invert the condition, i.e. use the true argument rather than
+     the false argument, we should check whether we can just invert the
+     comparison or if we have to negate the result.  */
+  if (invert)
+    {
+      gimple *cond = SSA_NAME_DEF_STMT (cmp_ls);
+      /* We need to use the false parameter of the conditional select.  */
+      cond_store_arg = cond_arg2;
+      tree_code new_code = ERROR_MARK;
+      tree mask_vec_type, itype;
+      gassign *conv;
+      tree var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
+
+      if (is_gimple_assign (cond)
+	  && TREE_CODE_CLASS (gimple_assign_rhs_code (cond)) == tcc_comparison)
+	{
+	  tree_code cond_code = gimple_assign_rhs_code (cond);
+	  tree cond_expr0 = gimple_assign_rhs1 (cond);
+	  tree cond_expr1 = gimple_assign_rhs2 (cond);
+
+	  /* We have to invert the comparison, see if we can flip it.  */
+	  bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
+	  new_code = invert_tree_comparison (cond_code, honor_nans);
+	  if (new_code != ERROR_MARK)
+	    {
+	      itype = TREE_TYPE(cond_expr0);
+	      conv = gimple_build_assign (var, new_code, cond_expr0,
+					  cond_expr1);
+	    }
+	}
+
+      if (new_code == ERROR_MARK)
+	{
+	  /* We couldn't flip the condition, so invert the mask instead.  */
+	  itype = TREE_TYPE (cmp_ls);
+	  conv = gimple_build_assign (var, BIT_XOR_EXPR, cmp_ls,
+				      build_int_cst (itype, 1));
+	}
+
+      mask_vec_type = get_mask_type_for_scalar_type (loop_vinfo, itype);
+      append_pattern_def_seq (vinfo, stmt_vinfo, conv, mask_vec_type, itype);
+      /* Then prepare the boolean mask as the mask conversion pattern
+	 won't hit on the pattern statement.  */
+      cmp_ls = build_mask_conversion (vinfo, var, gs_vectype, stmt_vinfo);
+    }
+
+  tree mask = vect_convert_mask_for_vectype (cmp_ls, gs_vectype, stmt_vinfo,
+					     loop_vinfo);
+  gcall *call
+    = gimple_build_call_internal (IFN_MASK_STORE, 4, base, cookie, mask,
+				  cond_store_arg);
+  gimple_set_location (call, gimple_location (store_stmt));
+  gimple_set_lhs (call, make_ssa_name (integer_type_node));
+
+  /* Copy across relevant vectorization info and associate DR with the
+     new pattern statement instead of the original statement.  */
+  stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (call);
+  loop_vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
+
+  *type_out = vectype;
+  return call;
+}
+
 /* Return true if TYPE is a non-boolean integer type.  These are the types
    that we want to consider for narrowing.  */
 
@@ -7061,6 +7232,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
      of mask conversion that are needed for gather and scatter
      internal functions.  */
   { vect_recog_gather_scatter_pattern, "gather_scatter" },
+  { vect_recog_cond_store_pattern, "cond_store" },
   { vect_recog_mask_conversion_pattern, "mask_conversion" },
   { vect_recog_widen_plus_pattern, "widen_plus" },
   { vect_recog_widen_minus_pattern, "widen_minus" },




-- 

[-- Attachment #2: rb18576.patch --]
[-- Type: text/x-diff, Size: 10977 bytes --]

diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_1.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..3884a3c3d0a2dc2258097348c75bb7c0b3b37c72
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_1.c
@@ -0,0 +1,24 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_masked_store } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+void foo1 (char *restrict a, int *restrict b, int *restrict c, int n, int stride)
+{
+  if (stride <= 1)
+    return;
+
+  for (int i = 0; i < n; i++)
+    {
+      int res = c[i];
+      int t = b[i+stride];
+      if (a[i] != 0)
+        res = t;
+      c[i] = res;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_2.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..bc965a244f147c199b1726e5f6b44229539cd225
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_2.c
@@ -0,0 +1,24 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_masked_store } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+void foo2 (char *restrict a, int *restrict b, int *restrict c, int n, int stride)
+{
+  if (stride <= 1)
+    return;
+
+  for (int i = 0; i < n; i++)
+    {
+      int res = c[i];
+      int t = b[i+stride];
+      if (a[i] != 0)
+        t = res;
+      c[i] = t;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_3.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..ab6889f967b330a652917925c2748b16af59b9fd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_3.c
@@ -0,0 +1,24 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_masked_store } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+void foo3 (float *restrict a, int *restrict b, int *restrict c, int n, int stride)
+{
+  if (stride <= 1)
+    return;
+
+  for (int i = 0; i < n; i++)
+    {
+      int res = c[i];
+      int t = b[i+stride];
+      if (a[i] >= 0)
+        t = res;
+      c[i] = t;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_4.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..3bfe2f81dc2d47096aa23529d43263be52cd422c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_4.c
@@ -0,0 +1,28 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_masked_store } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+void foo4 (signed char *restrict a, int *restrict b, int *restrict c, int *restrict d, int n, int stride)
+{
+  if (stride <= 1)
+    return;
+
+  for (int i = 0; i < n; i++)
+    {
+      int res1 = c[i];
+      int res2 = d[i];
+      int t = b[i+stride];
+      if (a[i] > 0)
+        t = res1;
+      else if (a[i] < 0)
+        t = res2 * 2;
+
+      c[i] = t;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" } } */
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index cef901808eb97c0c92b51da20535ea7f397b4742..f0da6c932f48d2992a501d0ced3efc8924912c77 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -6397,6 +6397,177 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+/* Helper method of vect_recog_cond_store_pattern,  checks to see if COND_ARG
+   is points to a load statement that reads the same data as that of
+   STORE_VINFO.  */
+
+static bool
+vect_cond_store_pattern_same_ref (loop_vec_info loop_vinfo,
+				  stmt_vec_info store_vinfo, tree cond_arg)
+{
+  stmt_vec_info load_stmt_vinfo = loop_vinfo->lookup_def (cond_arg);
+  if (!load_stmt_vinfo
+      || !STMT_VINFO_DATA_REF (load_stmt_vinfo)
+      || DR_IS_WRITE (STMT_VINFO_DATA_REF (load_stmt_vinfo))
+      || !same_data_refs (STMT_VINFO_DATA_REF (store_vinfo),
+			  STMT_VINFO_DATA_REF (load_stmt_vinfo)))
+    return false;
+
+  return true;
+}
+
+/* Function vect_recog_cond_store_pattern
+
+   Try to find the following pattern:
+
+   x = *_3;
+   c = a CMP b;
+   y = c ? t_20 : x;
+   *_3 = y;
+
+   where the store of _3 happens on a conditional select on a value loaded
+   from the same location.  In such case we can elide the initial load if
+   MASK_STORE is supported and instead only conditionally write out the result.
+
+   The pattern produces for the above:
+
+   c = a CMP b;
+   .MASK_STORE (_3, c, t_20)
+
+   Input:
+
+   * STMT_VINFO: The stmt from which the pattern search begins.  In the
+   example, when this function is called with _3 then the search begins.
+
+   Output:
+
+   * TYPE_OUT: The type of the output  of this pattern.
+
+   * Return value: A new stmt that will be used to replace the sequence.  */
+
+static gimple *
+vect_recog_cond_store_pattern (vec_info *vinfo,
+			       stmt_vec_info stmt_vinfo, tree *type_out)
+{
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
+  if (!loop_vinfo)
+    return NULL;
+
+  gimple *store_stmt = STMT_VINFO_STMT (stmt_vinfo);
+
+  /* Needs to be a gimple store where we have DR info for.  */
+  if (!STMT_VINFO_DATA_REF (stmt_vinfo)
+      || !DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_vinfo))
+      || !gimple_store_p (store_stmt))
+    return NULL;
+
+  tree st_rhs = gimple_assign_rhs1 (store_stmt);
+  tree st_lhs = gimple_assign_lhs (store_stmt);
+
+  if (TREE_CODE (st_rhs) != SSA_NAME)
+    return NULL;
+
+  gassign *cond_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (st_rhs));
+  if (!cond_stmt || gimple_assign_rhs_code (cond_stmt) != COND_EXPR)
+    return NULL;
+
+  /* Check if the else value matches the original loaded one.  */
+  bool invert = false;
+  tree cmp_ls = gimple_arg (cond_stmt, 0);
+  tree cond_arg1 = gimple_arg (cond_stmt, 1);
+  tree cond_arg2 = gimple_arg (cond_stmt, 2);
+
+  if (!vect_cond_store_pattern_same_ref (loop_vinfo, stmt_vinfo, cond_arg2)
+      && !(invert = vect_cond_store_pattern_same_ref (loop_vinfo, stmt_vinfo,
+						      cond_arg1)))
+    return NULL;
+
+  vect_pattern_detected ("vect_recog_cond_store_pattern", store_stmt);
+
+  tree scalar_type = TREE_TYPE (st_rhs);
+  if (VECTOR_TYPE_P (scalar_type))
+    return NULL;
+
+  tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+  if (vectype == NULL_TREE)
+    return NULL;
+
+  machine_mode mask_mode;
+  machine_mode vecmode = TYPE_MODE (vectype);
+  if (!targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
+      || !can_vec_mask_load_store_p (vecmode, mask_mode, false))
+    return NULL;
+
+  /* Convert the mask to the right form.  */
+  tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
+  tree cookie = build_int_cst (build_pointer_type (scalar_type),
+			       TYPE_ALIGN (scalar_type));
+  tree base = TREE_OPERAND (st_lhs, 0);
+  tree cond_store_arg = cond_arg1;
+
+  /* If we have to invert the condition, i.e. use the true argument rather than
+     the false argument, we should check whether we can just invert the
+     comparison or if we have to negate the result.  */
+  if (invert)
+    {
+      gimple *cond = SSA_NAME_DEF_STMT (cmp_ls);
+      /* We need to use the false parameter of the conditional select.  */
+      cond_store_arg = cond_arg2;
+      tree_code new_code = ERROR_MARK;
+      tree mask_vec_type, itype;
+      gassign *conv;
+      tree var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
+
+      if (is_gimple_assign (cond)
+	  && TREE_CODE_CLASS (gimple_assign_rhs_code (cond)) == tcc_comparison)
+	{
+	  tree_code cond_code = gimple_assign_rhs_code (cond);
+	  tree cond_expr0 = gimple_assign_rhs1 (cond);
+	  tree cond_expr1 = gimple_assign_rhs2 (cond);
+
+	  /* We have to invert the comparison, see if we can flip it.  */
+	  bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
+	  new_code = invert_tree_comparison (cond_code, honor_nans);
+	  if (new_code != ERROR_MARK)
+	    {
+	      itype = TREE_TYPE(cond_expr0);
+	      conv = gimple_build_assign (var, new_code, cond_expr0,
+					  cond_expr1);
+	    }
+	}
+
+      if (new_code == ERROR_MARK)
+	{
+	  /* We couldn't flip the condition, so invert the mask instead.  */
+	  itype = TREE_TYPE (cmp_ls);
+	  conv = gimple_build_assign (var, BIT_XOR_EXPR, cmp_ls,
+				      build_int_cst (itype, 1));
+	}
+
+      mask_vec_type = get_mask_type_for_scalar_type (loop_vinfo, itype);
+      append_pattern_def_seq (vinfo, stmt_vinfo, conv, mask_vec_type, itype);
+      /* Then prepare the boolean mask as the mask conversion pattern
+	 won't hit on the pattern statement.  */
+      cmp_ls = build_mask_conversion (vinfo, var, gs_vectype, stmt_vinfo);
+    }
+
+  tree mask = vect_convert_mask_for_vectype (cmp_ls, gs_vectype, stmt_vinfo,
+					     loop_vinfo);
+  gcall *call
+    = gimple_build_call_internal (IFN_MASK_STORE, 4, base, cookie, mask,
+				  cond_store_arg);
+  gimple_set_location (call, gimple_location (store_stmt));
+  gimple_set_lhs (call, make_ssa_name (integer_type_node));
+
+  /* Copy across relevant vectorization info and associate DR with the
+     new pattern statement instead of the original statement.  */
+  stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (call);
+  loop_vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
+
+  *type_out = vectype;
+  return call;
+}
+
 /* Return true if TYPE is a non-boolean integer type.  These are the types
    that we want to consider for narrowing.  */
 
@@ -7061,6 +7232,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
      of mask conversion that are needed for gather and scatter
      internal functions.  */
   { vect_recog_gather_scatter_pattern, "gather_scatter" },
+  { vect_recog_cond_store_pattern, "cond_store" },
   { vect_recog_mask_conversion_pattern, "mask_conversion" },
   { vect_recog_widen_plus_pattern, "widen_plus" },
   { vect_recog_widen_minus_pattern, "widen_minus" },




^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-06-26 13:59 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-25 12:18 [PATCH]middle-end: Implement conditonal store vectorizer pattern [PR115531] Tamar Christina
2024-06-26 13:22 ` Richard Biener
2024-06-26 13:46   ` Tamar Christina
2024-06-26 13:59     ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).