public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH]middle-end: check if target can do extract first for early breaks [PR113199]
@ 2024-01-02 17:57 Tamar Christina
  2024-01-08 12:47 ` Richard Biener
  0 siblings, 1 reply; 9+ messages in thread
From: Tamar Christina @ 2024-01-02 17:57 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, rguenther, jlaw

[-- Attachment #1: Type: text/plain, Size: 5759 bytes --]

Hi All,

I was generating the vector reverse mask without checking if the target
actually supported such an operation.

It also seems like more targets implement VEC_EXTRACT than permute on mask
registers.

So this adds a check for IFN_VEC_EXTRACT support when required and changes
the select first code to use it.

This is good for now since masks always come from whilelo.  But in the future
when masks can come from other sources we will need the old code back.

Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
and no issues with --enable-checking=release --enable-lto
--with-build-config=bootstrap-O3 --enable-checking=yes,rtl,extra.
tested on cross cc1 for amdgcn-amdhsa and issue fixed.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR tree-optimization/113199
	* tree-vect-loop.cc (vectorizable_live_operation_1): Use
	IFN_VEC_EXTRACT.
	(vectorizable_live_operation): Check for IFN_VEC_EXTRACT support.

gcc/testsuite/ChangeLog:

	PR tree-optimization/113199
	* gcc.target/gcn/pr113199.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/testsuite/gcc.target/gcn/pr113199.c b/gcc/testsuite/gcc.target/gcn/pr113199.c
new file mode 100644
index 0000000000000000000000000000000000000000..8a641e5536e80e207ca0163cac66c0f4f6ca93f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/pr113199.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2" } */
+
+typedef long unsigned int size_t;
+typedef int wchar_t;
+struct tm
+{
+  int tm_mon;
+  int tm_year;
+};
+int abs (int);
+struct lc_time_T { const char *month[12]; };
+struct __locale_t * __get_current_locale (void) { }
+const struct lc_time_T * __get_time_locale (struct __locale_t *locale) { }
+const wchar_t * __ctloc (wchar_t *buf, const char *elem, size_t *len_ret) { return buf; }
+size_t
+__strftime (wchar_t *s, size_t maxsize, const wchar_t *format,
+     const struct tm *tim_p, struct __locale_t *locale)
+{
+  size_t count = 0;
+  const wchar_t *ctloc;
+  wchar_t ctlocbuf[256];
+  size_t i, ctloclen;
+  const struct lc_time_T *_CurrentTimeLocale = __get_time_locale (locale);
+    {
+      switch (*format)
+ {
+ case L'B':
+   (ctloc = __ctloc (ctlocbuf, _CurrentTimeLocale->month[tim_p->tm_mon], &ctloclen));
+   for (i = 0; i < ctloclen; i++)
+     {
+       if (count < maxsize - 1)
+  s[count++] = ctloc[i];
+       else
+  return 0;
+       {
+  int century = tim_p->tm_year >= 0
+    ? tim_p->tm_year / 100 + 1900 / 100
+    : abs (tim_p->tm_year + 1900) / 100;
+       }
+   }
+ }
+    }
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 37f1be1101ffae779214056a0886411e0683e887..5aa92e67444e7aacf458fffa1428f1983c482374 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10648,36 +10648,18 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo,
 				      &LOOP_VINFO_MASKS (loop_vinfo),
 				      1, vectype, 0);
       tree scalar_res;
+      gimple_seq_add_seq (&stmts, tem);
 
       /* For an inverted control flow with early breaks we want EXTRACT_FIRST
-	 instead of EXTRACT_LAST.  Emulate by reversing the vector and mask. */
+	 instead of EXTRACT_LAST.  For now since the mask always comes from a
+	 WHILELO we can get the first element ignoring the mask since CLZ of the
+	 mask will always be zero.  */
       if (restart_loop && LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
-	{
-	  /* First create the permuted mask.  */
-	  tree perm_mask = perm_mask_for_reverse (TREE_TYPE (mask));
-	  tree perm_dest = copy_ssa_name (mask);
-	  gimple *perm_stmt
-		= gimple_build_assign (perm_dest, VEC_PERM_EXPR, mask,
-				       mask, perm_mask);
-	  vect_finish_stmt_generation (loop_vinfo, stmt_info, perm_stmt,
-				       &gsi);
-	  mask = perm_dest;
-
-	  /* Then permute the vector contents.  */
-	  tree perm_elem = perm_mask_for_reverse (vectype);
-	  perm_dest = copy_ssa_name (vec_lhs_phi);
-	  perm_stmt
-		= gimple_build_assign (perm_dest, VEC_PERM_EXPR, vec_lhs_phi,
-				       vec_lhs_phi, perm_elem);
-	  vect_finish_stmt_generation (loop_vinfo, stmt_info, perm_stmt,
-				       &gsi);
-	  vec_lhs_phi = perm_dest;
-	}
-
-      gimple_seq_add_seq (&stmts, tem);
-
-      scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST, scalar_type,
-				 mask, vec_lhs_phi);
+	scalar_res = gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype),
+				   vec_lhs_phi, bitstart);
+      else
+	scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST, scalar_type,
+				   mask, vec_lhs_phi);
 
       /* Convert the extracted vector element to the scalar type.  */
       new_tree = gimple_convert (&stmts, lhs_type, scalar_res);
@@ -10852,9 +10834,25 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
 	      gcc_assert (ncopies == 1 && !slp_node);
 	      if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
 						  OPTIMIZE_FOR_SPEED))
-		vect_record_loop_mask (loop_vinfo,
-				       &LOOP_VINFO_MASKS (loop_vinfo),
-				       1, vectype, NULL);
+		{
+		  if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)
+		      && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+		      && !direct_internal_fn_supported_p (IFN_EXTRACT_LAST,
+							  vectype,
+							  OPTIMIZE_FOR_SPEED))
+		    {
+		      if (dump_enabled_p ())
+			dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				"can't operate on partial vectors "
+				"because the target doesn't support extract "
+				"first reduction.\n");
+		      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+		    }
+		  else
+		    vect_record_loop_mask (loop_vinfo,
+					   &LOOP_VINFO_MASKS (loop_vinfo),
+					   1, vectype, NULL);
+		}
 	      else if (can_vec_extract_var_idx_p (
 			 TYPE_MODE (vectype), TYPE_MODE (TREE_TYPE (vectype))))
 		vect_record_loop_len (loop_vinfo,




-- 

[-- Attachment #2: rb18119.patch --]
[-- Type: text/plain, Size: 4693 bytes --]

diff --git a/gcc/testsuite/gcc.target/gcn/pr113199.c b/gcc/testsuite/gcc.target/gcn/pr113199.c
new file mode 100644
index 0000000000000000000000000000000000000000..8a641e5536e80e207ca0163cac66c0f4f6ca93f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/pr113199.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2" } */
+
+typedef long unsigned int size_t;
+typedef int wchar_t;
+struct tm
+{
+  int tm_mon;
+  int tm_year;
+};
+int abs (int);
+struct lc_time_T { const char *month[12]; };
+struct __locale_t * __get_current_locale (void) { }
+const struct lc_time_T * __get_time_locale (struct __locale_t *locale) { }
+const wchar_t * __ctloc (wchar_t *buf, const char *elem, size_t *len_ret) { return buf; }
+size_t
+__strftime (wchar_t *s, size_t maxsize, const wchar_t *format,
+     const struct tm *tim_p, struct __locale_t *locale)
+{
+  size_t count = 0;
+  const wchar_t *ctloc;
+  wchar_t ctlocbuf[256];
+  size_t i, ctloclen;
+  const struct lc_time_T *_CurrentTimeLocale = __get_time_locale (locale);
+    {
+      switch (*format)
+ {
+ case L'B':
+   (ctloc = __ctloc (ctlocbuf, _CurrentTimeLocale->month[tim_p->tm_mon], &ctloclen));
+   for (i = 0; i < ctloclen; i++)
+     {
+       if (count < maxsize - 1)
+  s[count++] = ctloc[i];
+       else
+  return 0;
+       {
+  int century = tim_p->tm_year >= 0
+    ? tim_p->tm_year / 100 + 1900 / 100
+    : abs (tim_p->tm_year + 1900) / 100;
+       }
+   }
+ }
+    }
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 37f1be1101ffae779214056a0886411e0683e887..5aa92e67444e7aacf458fffa1428f1983c482374 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10648,36 +10648,18 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo,
 				      &LOOP_VINFO_MASKS (loop_vinfo),
 				      1, vectype, 0);
       tree scalar_res;
+      gimple_seq_add_seq (&stmts, tem);
 
       /* For an inverted control flow with early breaks we want EXTRACT_FIRST
-	 instead of EXTRACT_LAST.  Emulate by reversing the vector and mask. */
+	 instead of EXTRACT_LAST.  For now since the mask always comes from a
+	 WHILELO we can get the first element ignoring the mask since CLZ of the
+	 mask will always be zero.  */
       if (restart_loop && LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
-	{
-	  /* First create the permuted mask.  */
-	  tree perm_mask = perm_mask_for_reverse (TREE_TYPE (mask));
-	  tree perm_dest = copy_ssa_name (mask);
-	  gimple *perm_stmt
-		= gimple_build_assign (perm_dest, VEC_PERM_EXPR, mask,
-				       mask, perm_mask);
-	  vect_finish_stmt_generation (loop_vinfo, stmt_info, perm_stmt,
-				       &gsi);
-	  mask = perm_dest;
-
-	  /* Then permute the vector contents.  */
-	  tree perm_elem = perm_mask_for_reverse (vectype);
-	  perm_dest = copy_ssa_name (vec_lhs_phi);
-	  perm_stmt
-		= gimple_build_assign (perm_dest, VEC_PERM_EXPR, vec_lhs_phi,
-				       vec_lhs_phi, perm_elem);
-	  vect_finish_stmt_generation (loop_vinfo, stmt_info, perm_stmt,
-				       &gsi);
-	  vec_lhs_phi = perm_dest;
-	}
-
-      gimple_seq_add_seq (&stmts, tem);
-
-      scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST, scalar_type,
-				 mask, vec_lhs_phi);
+	scalar_res = gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype),
+				   vec_lhs_phi, bitstart);
+      else
+	scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST, scalar_type,
+				   mask, vec_lhs_phi);
 
       /* Convert the extracted vector element to the scalar type.  */
       new_tree = gimple_convert (&stmts, lhs_type, scalar_res);
@@ -10852,9 +10834,25 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
 	      gcc_assert (ncopies == 1 && !slp_node);
 	      if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
 						  OPTIMIZE_FOR_SPEED))
-		vect_record_loop_mask (loop_vinfo,
-				       &LOOP_VINFO_MASKS (loop_vinfo),
-				       1, vectype, NULL);
+		{
+		  if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)
+		      && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+		      && !direct_internal_fn_supported_p (IFN_EXTRACT_LAST,
+							  vectype,
+							  OPTIMIZE_FOR_SPEED))
+		    {
+		      if (dump_enabled_p ())
+			dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				"can't operate on partial vectors "
+				"because the target doesn't support extract "
+				"first reduction.\n");
+		      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+		    }
+		  else
+		    vect_record_loop_mask (loop_vinfo,
+					   &LOOP_VINFO_MASKS (loop_vinfo),
+					   1, vectype, NULL);
+		}
 	      else if (can_vec_extract_var_idx_p (
 			 TYPE_MODE (vectype), TYPE_MODE (TREE_TYPE (vectype))))
 		vect_record_loop_len (loop_vinfo,




^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-01-09 16:12 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-02 17:57 [PATCH]middle-end: check if target can do extract first for early breaks [PR113199] Tamar Christina
2024-01-08 12:47 ` Richard Biener
2024-01-08 14:19   ` Tamar Christina
2024-01-09 11:45   ` Tamar Christina
2024-01-09 12:07     ` Richard Biener
2024-01-09 16:01       ` H.J. Lu
2024-01-09 16:06       ` Rainer Orth
2024-01-09 16:09         ` Tamar Christina
2024-01-09 16:12           ` H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).