public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] [PR rtl-optimization/97249]Simplify vec_select of paradoxical subreg.
@ 2020-10-13  8:40 Hongtao Liu
  2020-10-13 19:59 ` Segher Boessenkool
  0 siblings, 1 reply; 20+ messages in thread
From: Hongtao Liu @ 2020-10-13  8:40 UTC (permalink / raw)
  To: GCC Patches, Segher Boessenkool

[-- Attachment #1: Type: text/plain, Size: 469 bytes --]

Hi:
  For rtx like
  (vec_select:V2SI (subreg:V4SI (inner:V2SI) 0)
                   (parallel [(const_int 0) (const_int 1)]))
 it could be simplified as inner.

  Bootstrap is ok, regression test on i386 backend is ok.

gcc/ChangeLog
        PR rtl-optimization/97249
        * simplify-rtx.c (simplify_binary_operation_1): Simplify
        vec_select of paradoxical subreg.

gcc/testsuite/ChangeLog

        * gcc.target/i386/pr97249-1.c: New test.

-- 
BR,
Hongtao

[-- Attachment #2: 0001-Simplify-vec_select-of-paradoxical-subreg.patch --]
[-- Type: text/x-patch, Size: 3180 bytes --]

From c00369aa36d2e169b59287c58872c915953dd2a2 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Tue, 13 Oct 2020 15:35:29 +0800
Subject: [PATCH] Simplify vec_select of paradoxical subreg.

For rtx like
  (vec_select:V2SI (subreg:V4SI (inner:V2SI) 0)
		   (parallel [(const_int 0) (const_int 1)]))
it could be simplified as inner.

gcc/ChangeLog
	PR rtl-optimization/97249
	* simplify-rtx.c (simplify_binary_operation_1): Simplify
	vec_select of paradoxical subreg.

gcc/testsuite/ChangeLog

	* gcc.target/i386/pr97249-1.c: New test.
---
 gcc/simplify-rtx.c                        | 27 ++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr97249-1.c | 30 +++++++++++++++++++++++
 2 files changed, 57 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr97249-1.c

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 869f0d11b2e..9c397157f28 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -4170,6 +4170,33 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
 		    return subop1;
 		}
 	    }
+
+	  /* For cases like
+	     (vec_select:V2SI (subreg:V4SI (inner:V2SI) 0)
+			      (parallel [(const_int 0) (const_int 1)])).
+	     return inner directly.  */
+	  if (GET_CODE (trueop0) == SUBREG
+	      && paradoxical_subreg_p (trueop0)
+	      && mode == GET_MODE (XEXP (trueop0, 0))
+	      && (GET_MODE_NUNITS (GET_MODE (trueop0))).is_constant (&l0)
+	      && (GET_MODE_NUNITS (mode)).is_constant (&l1)
+	      && l0 % l1 == 0)
+	    {
+	      gcc_assert (known_eq (XVECLEN (trueop1, 0), l1));
+	      unsigned HOST_WIDE_INT expect = (HOST_WIDE_INT_1U << l1) - 1;
+	      unsigned HOST_WIDE_INT sel = 0;
+	      int i = 0;
+	      for (;i != l1; i++)
+		{
+		  rtx j = XVECEXP (trueop1, 0, i);
+		  if (!CONST_INT_P (j))
+		    break;
+		  sel |= HOST_WIDE_INT_1U << UINTVAL (j);
+		}
+	      /* ??? Need to simplify XEXP (trueop0, 0) here.  */
+	      if (sel == expect)
+		return XEXP (trueop0, 0);
+	    }
 	}
 
       if (XVECLEN (trueop1, 0) == 1
diff --git a/gcc/testsuite/gcc.target/i386/pr97249-1.c b/gcc/testsuite/gcc.target/i386/pr97249-1.c
new file mode 100644
index 00000000000..bc34aa8baa6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr97249-1.c
@@ -0,0 +1,30 @@
+/* PR target/97249  */
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O3 -masm=att" } */
+/* { dg-final { scan-assembler-times "vpmovzxbw\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpmovzxwd\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpmovzxdq\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+
+void
+foo (unsigned char* p1, unsigned char* p2, short* __restrict p3)
+{
+    for (int i = 0 ; i != 8; i++)
+     p3[i] = p1[i] + p2[i];
+     return;
+}
+
+void
+foo1 (unsigned short* p1, unsigned short* p2, int* __restrict p3)
+{
+    for (int i = 0 ; i != 4; i++)
+     p3[i] = p1[i] + p2[i];
+     return;
+}
+
+void
+foo2 (unsigned int* p1, unsigned int* p2, long long* __restrict p3)
+{
+    for (int i = 0 ; i != 2; i++)
+      p3[i] = (long long)p1[i] + (long long)p2[i];
+     return;
+}
-- 
2.18.1


[-- Attachment #3: 0001-Simplify-vec_select-of-paradoxical-subreg.patch --]
[-- Type: text/x-patch, Size: 3180 bytes --]

From c00369aa36d2e169b59287c58872c915953dd2a2 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Tue, 13 Oct 2020 15:35:29 +0800
Subject: [PATCH] Simplify vec_select of paradoxical subreg.

For rtx like
  (vec_select:V2SI (subreg:V4SI (inner:V2SI) 0)
		   (parallel [(const_int 0) (const_int 1)]))
it could be simplified as inner.

gcc/ChangeLog
	PR rtl-optimization/97249
	* simplify-rtx.c (simplify_binary_operation_1): Simplify
	vec_select of paradoxical subreg.

gcc/testsuite/ChangeLog

	* gcc.target/i386/pr97249-1.c: New test.
---
 gcc/simplify-rtx.c                        | 27 ++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr97249-1.c | 30 +++++++++++++++++++++++
 2 files changed, 57 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr97249-1.c

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 869f0d11b2e..9c397157f28 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -4170,6 +4170,33 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
 		    return subop1;
 		}
 	    }
+
+	  /* For cases like
+	     (vec_select:V2SI (subreg:V4SI (inner:V2SI) 0)
+			      (parallel [(const_int 0) (const_int 1)])).
+	     return inner directly.  */
+	  if (GET_CODE (trueop0) == SUBREG
+	      && paradoxical_subreg_p (trueop0)
+	      && mode == GET_MODE (XEXP (trueop0, 0))
+	      && (GET_MODE_NUNITS (GET_MODE (trueop0))).is_constant (&l0)
+	      && (GET_MODE_NUNITS (mode)).is_constant (&l1)
+	      && l0 % l1 == 0)
+	    {
+	      gcc_assert (known_eq (XVECLEN (trueop1, 0), l1));
+	      unsigned HOST_WIDE_INT expect = (HOST_WIDE_INT_1U << l1) - 1;
+	      unsigned HOST_WIDE_INT sel = 0;
+	      int i = 0;
+	      for (;i != l1; i++)
+		{
+		  rtx j = XVECEXP (trueop1, 0, i);
+		  if (!CONST_INT_P (j))
+		    break;
+		  sel |= HOST_WIDE_INT_1U << UINTVAL (j);
+		}
+	      /* ??? Need to simplify XEXP (trueop0, 0) here.  */
+	      if (sel == expect)
+		return XEXP (trueop0, 0);
+	    }
 	}
 
       if (XVECLEN (trueop1, 0) == 1
diff --git a/gcc/testsuite/gcc.target/i386/pr97249-1.c b/gcc/testsuite/gcc.target/i386/pr97249-1.c
new file mode 100644
index 00000000000..bc34aa8baa6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr97249-1.c
@@ -0,0 +1,30 @@
+/* PR target/97249  */
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O3 -masm=att" } */
+/* { dg-final { scan-assembler-times "vpmovzxbw\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpmovzxwd\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpmovzxdq\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+
+void
+foo (unsigned char* p1, unsigned char* p2, short* __restrict p3)
+{
+    for (int i = 0 ; i != 8; i++)
+     p3[i] = p1[i] + p2[i];
+     return;
+}
+
+void
+foo1 (unsigned short* p1, unsigned short* p2, int* __restrict p3)
+{
+    for (int i = 0 ; i != 4; i++)
+     p3[i] = p1[i] + p2[i];
+     return;
+}
+
+void
+foo2 (unsigned int* p1, unsigned int* p2, long long* __restrict p3)
+{
+    for (int i = 0 ; i != 2; i++)
+      p3[i] = (long long)p1[i] + (long long)p2[i];
+     return;
+}
-- 
2.18.1


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2020-10-22  3:31 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-13  8:40 [PATCH] [PR rtl-optimization/97249]Simplify vec_select of paradoxical subreg Hongtao Liu
2020-10-13 19:59 ` Segher Boessenkool
2020-10-14  5:43   ` Hongtao Liu
2020-10-14 17:35     ` Segher Boessenkool
2020-10-14 17:55       ` Richard Biener
2020-10-14 19:23         ` Segher Boessenkool
2020-10-15  8:14       ` Hongtao Liu
2020-10-15  9:58         ` Hongtao Liu
2020-10-15 12:38           ` Richard Sandiford
2020-10-19  5:18             ` Hongtao Liu
2020-10-19 15:31               ` Richard Sandiford
2020-10-20  3:20                 ` Hongtao Liu
2020-10-20 16:42                   ` Richard Sandiford
2020-10-21  2:43                     ` Hongtao Liu
2020-10-20 21:05                   ` Segher Boessenkool
2020-10-21  3:17                     ` Hongtao Liu
2020-10-21 15:43                       ` Richard Sandiford
2020-10-21 16:34                         ` Segher Boessenkool
2020-10-22  3:33                           ` Hongtao Liu
2020-10-20 20:43         ` Segher Boessenkool

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).