public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v2] swap: Fix incorrect lane extraction by vec_extract() [PR106770]
@ 2023-09-10 17:28 Surya Kumari Jangala
  2023-09-20  2:01 ` [PING][PATCH " Surya Kumari Jangala
  2023-10-29  4:46 ` [PATCH " Segher Boessenkool
  0 siblings, 2 replies; 6+ messages in thread
From: Surya Kumari Jangala @ 2023-09-10 17:28 UTC (permalink / raw)
  To: Segher Boessenkool, GCC Patches; +Cc: Peter Bergner

swap: Fix incorrect lane extraction by vec_extract() [PR106770]

In the routine rs6000_analyze_swaps(), special handling of swappable
instructions is done even if the webs that contain the swappable
instructions are not optimized, i.e., the webs do not contain any
permuting load/store instructions along with the associated register
swap instructions. Doing special handling in such webs will result in
the extracted lane being adjusted unnecessarily for vec_extract.

Another issue is that existing code treats non-permuting loads/stores
as special swappables. Non-permuting loads/stores (that have not yet
been split into a permuting load/store and a swap) are handled by
converting them into a permuting load/store (which effectively removes
the swap). As a result, if special swappables are handled only in webs
containing permuting loads/stores, then non-optimal code is generated
for non-permuting loads/stores.

Hence, in this patch, all webs containing either permuting loads/
stores or non-permuting loads/stores are marked as requiring special
handling of swappables. Swaps associated with permuting loads/stores
are marked for removal, and non-permuting loads/stores are converted to
permuting loads/stores. Then the special swappables in the webs are
fixed up.

Another issue with always handling swappable instructions is that it is
incorrect to do so in webs where loads/stores on quad word aligned
addresses are changed to lvx/stvx. Similarly, in webs where
swap(load(vector constant)) instructions are replaced with
load(swapped vector constant), the swappable instructions should not be
modified.

2023-09-10  Surya Kumari Jangala  <jskumari@linux.ibm.com>

gcc/
	PR rtl-optimization/PR106770
	* config/rs6000/rs6000-p8swap.cc (non_permuting_mem_insn): New
	function.
	(handle_non_permuting_mem_insn): New function.
	(rs6000_analyze_swaps): Handle swappable instructions only in
	certain webs.
	(web_requires_special_handling): New instance variable.
	(handle_special_swappables): Remove handling of non-permuting
	load/store instructions.

gcc/testsuite/
	PR rtl-optimization/PR106770
	* gcc.target/powerpc/pr106770.c: New test.
---

diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 0388b9bd736..3a695aa1318 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -179,6 +179,13 @@ class swap_web_entry : public web_entry_base
   unsigned int special_handling : 4;
   /* Set if the web represented by this entry cannot be optimized.  */
   unsigned int web_not_optimizable : 1;
+  /* Set if the swappable insns in the web represented by this entry
+     have to be fixed. Swappable insns have to be fixed in :
+       - webs containing permuting loads/stores and the swap insns
+	 in such webs have been marked for removal
+       - webs where non-permuting loads/stores have been converted
+	 to permuting loads/stores  */
+  unsigned int web_requires_special_handling : 1;
   /* Set if this insn should be deleted.  */
   unsigned int will_delete : 1;
 };
@@ -1468,14 +1475,6 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
       if (dump_file)
 	fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
       break;
-    case SH_NOSWAP_LD:
-      /* Convert a non-permuting load to a permuting one.  */
-      permute_load (insn);
-      break;
-    case SH_NOSWAP_ST:
-      /* Convert a non-permuting store to a permuting one.  */
-      permute_store (insn);
-      break;
     case SH_EXTRACT:
       /* Change the lane on an extract operation.  */
       adjust_extract (insn);
@@ -2401,6 +2400,25 @@ recombine_lvx_stvx_patterns (function *fun)
   free (to_delete);
 }
 
+/* Return true if insn is a non-permuting load/store.  */
+static bool
+non_permuting_mem_insn (swap_web_entry *insn_entry, unsigned int i)
+{
+  return (insn_entry[i].special_handling == SH_NOSWAP_LD ||
+	  insn_entry[i].special_handling == SH_NOSWAP_ST);
+}
+
+/* Convert a non-permuting load/store insn to a permuting one.  */
+static void
+handle_non_permuting_mem_insn (swap_web_entry *insn_entry, unsigned int i)
+{
+  rtx_insn *insn = insn_entry[i].insn;
+  if (insn_entry[i].special_handling == SH_NOSWAP_LD)
+    permute_load (insn);
+  else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
+    permute_store (insn);
+}
+
 /* Main entry point for this pass.  */
 unsigned int
 rs6000_analyze_swaps (function *fun)
@@ -2624,25 +2642,56 @@ rs6000_analyze_swaps (function *fun)
       dump_swap_insn_table (insn_entry);
     }
 
-  /* For each load and store in an optimizable web (which implies
-     the loads and stores are permuting), find the associated
-     register swaps and mark them for removal.  Due to various
-     optimizations we may mark the same swap more than once.  Also
-     perform special handling for swappable insns that require it.  */
+  /* There are two kinds of optimizations that can be performed on an
+     optimizable web:
+     1. Remove the register swaps associated with permuting load/store
+	in an optimizable web
+     2. Convert the vanilla loads/stores (that have not yet been split
+	into a permuting load/store and a swap) into a permuting
+	load/store (which effectively removes the swap)
+     In both the cases, swappable instructions in the webs need
+     special handling to fix them up.  */
   for (i = 0; i < e; ++i)
+    /* For each permuting load/store in an optimizable web, find
+       the associated register swaps and mark them for removal.
+       Due to various optimizations we may mark the same swap more
+       than once.  */
     if ((insn_entry[i].is_load || insn_entry[i].is_store)
 	&& insn_entry[i].is_swap)
       {
 	swap_web_entry* root_entry
 	  = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
 	if (!root_entry->web_not_optimizable)
-	  mark_swaps_for_removal (insn_entry, i);
+	  {
+	    mark_swaps_for_removal (insn_entry, i);
+	    root_entry->web_requires_special_handling = true;
+	  }
       }
-    else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
+    /* Convert the non-permuting loads/stores into a permuting
+       load/store.  */
+    else if (insn_entry[i].is_swappable
+	     && non_permuting_mem_insn (insn_entry, i))
       {
 	swap_web_entry* root_entry
 	  = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
 	if (!root_entry->web_not_optimizable)
+	  {
+	    handle_non_permuting_mem_insn (insn_entry, i);
+	    root_entry->web_requires_special_handling = true;
+	  }
+      }
+
+  /* Perform special handling for swappable insns that require it.
+     Note that special handling should be done only for those
+     swappable insns that are present in webs marked as requiring
+     special handling.  */
+  for (i = 0; i < e; ++i)
+    if (insn_entry[i].is_swappable && insn_entry[i].special_handling &&
+	!non_permuting_mem_insn (insn_entry, i))
+      {
+	swap_web_entry* root_entry
+	  = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
+	if (root_entry->web_requires_special_handling)
 	  handle_special_swappables (insn_entry, i);
       }
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106770.c b/gcc/testsuite/gcc.target/powerpc/pr106770.c
new file mode 100644
index 00000000000..11efe39abc5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106770.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2 " } */
+/* The 2 xxpermdi instructions are generated by the two
+   calls to vec_promote() */
+/* { dg-final { scan-assembler-times "xxpermdi" 2 } } */
+
+/* Test case to resolve PR106770  */
+
+#include <altivec.h>
+
+int cmp2(double a, double b)
+{
+    vector double va = vec_promote(a, 1);
+    vector double vb = vec_promote(b, 1);
+    vector long long vlt = (vector long long)vec_cmplt(va, vb);
+    vector long long vgt = (vector long long)vec_cmplt(vb, va);
+    vector signed long long vr = vec_sub(vlt, vgt);
+
+    return vec_extract(vr, 1);
+}


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-10-31 11:46 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-10 17:28 [PATCH v2] swap: Fix incorrect lane extraction by vec_extract() [PR106770] Surya Kumari Jangala
2023-09-20  2:01 ` [PING][PATCH " Surya Kumari Jangala
2023-10-03 10:23   ` [PING^2][PATCH " Surya Kumari Jangala
2023-10-17  5:07     ` [PING^3][PATCH " Surya Kumari Jangala
2023-10-29  4:46 ` [PATCH " Segher Boessenkool
2023-10-31 11:46   ` Surya Kumari Jangala

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).