public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Hongtao Liu <crazylht@gmail.com>
To: Segher Boessenkool <segher@kernel.crashing.org>
Cc: GCC Patches <gcc-patches@gcc.gnu.org>, Andrew Pinski <pinskia@gmail.com>
Subject: Re: [PATCH][i386] Split not+broadcast+pand to broadcast+pandn.
Date: Wed, 26 May 2021 09:21:41 +0800	[thread overview]
Message-ID: <CAMZc-bzhP8CSujOHCvFCMBkhbUf9ndXCAi94+10+MDQoUSEOSw@mail.gmail.com> (raw)
In-Reply-To: <CAMZc-byUWU1W9NvH87tX2Z3GVQNhBvncU0mFv+Ff6aT3BFC0=g@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 903 bytes --]

Update patch:
  The new patch simplify (vec_duplicate (not (nonimmedaite_operand)))
to (not (vec_duplicate (nonimmedaite_operand))). This is not a
straightforward simplification, just adding some tendency to pull not
out of vec_duplicate.

  For i386, it will enable below opt

from
        notl    %edi
        vpbroadcastd    %edi, %xmm0
        vpand   %xmm1, %xmm0, %xmm0
to
        vpbroadcastd    %edi, %xmm0
        vpandn   %xmm1, %xmm0, %xmm0

  Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
  Ok for trunk?
gcc/ChangeLog:

        PR target/100711
        * simplify-rtx.c (simplify_unary_operation_1):
        Simplify (vec_duplicate (not (nonimmedaite_operand)))
        to (not (vec_duplicate (nonimmedaite_operand))).

gcc/testsuite/ChangeLog:

        PR target/100711
        * gcc.target/i386/avx2-pr100711.c: New test.
        * gcc.target/i386/avx512bw-pr100711.c: New test.

[-- Attachment #2: 0001-Simplify-vec_duplicate-not-nonimmedaite_operand-to-n.patch --]
[-- Type: text/x-patch, Size: 5492 bytes --]

From aa36def1266538fdda02177be8dbf9433d7e959c Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Tue, 25 May 2021 17:17:32 +0800
Subject: [PATCH] Simplify (vec_duplicate (not (nonimmedaite_operand))) to (not
 (vec_duplicate (nonimmedaite_operand))).

This is not a straightforward simplification, just adding some
tendency to pull not out of vec_duplicate.

For i386, it will enable below opt

from
	notl    %edi
      	vpbroadcastd    %edi, %xmm0
      	vpand   %xmm1, %xmm0, %xmm0
to
      	vpbroadcastd    %edi, %xmm0
      	vpandn   %xmm1, %xmm0, %xmm0

gcc/ChangeLog:

	PR target/100711
	* simplify-rtx.c (simplify_unary_operation_1):
	Simplify (vec_duplicate (not (nonimmedaite_operand)))
	to (not (vec_duplicate (nonimmedaite_operand))).

gcc/testsuite/ChangeLog:

	PR target/100711
	* gcc.target/i386/avx2-pr100711.c: New test.
	* gcc.target/i386/avx512bw-pr100711.c: New test.
---
 gcc/simplify-rtx.c                            |  9 +++
 gcc/testsuite/gcc.target/i386/avx2-pr100711.c | 73 +++++++++++++++++++
 .../gcc.target/i386/avx512bw-pr100711.c       | 48 ++++++++++++
 3 files changed, 130 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx2-pr100711.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 04423bbd195..bb23183a8e0 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -36,6 +36,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "selftest.h"
 #include "selftest-rtl.h"
 #include "rtx-vector-builder.h"
+#include "tm_p.h"
 
 /* Simplification and canonicalization of RTL.  */
 
@@ -1708,6 +1709,14 @@ simplify_context::simplify_unary_operation_1 (rtx_code code, machine_mode mode,
 #endif
       break;
 
+    /* Prefer (not (vec_duplicate (nonimmedaite_operand)))
+       to (vec_duplicate (not (nonimmedaite_operand))).  */
+    case VEC_DUPLICATE:
+      if (GET_CODE (op) == NOT
+	  && nonimmediate_operand (XEXP (op, 0), GET_MODE (op)))
+	return gen_rtx_NOT (mode, gen_rtx_VEC_DUPLICATE (mode, XEXP (op, 0)));
+      break;
+
     default:
       break;
     }
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr100711.c b/gcc/testsuite/gcc.target/i386/avx2-pr100711.c
new file mode 100644
index 00000000000..5b144623873
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-pr100711.c
@@ -0,0 +1,73 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "pandn" 8 } } */
+/* { dg-final { scan-assembler-not "not\[bwlq\]" } } */
+typedef char v16qi __attribute__((vector_size(16)));
+typedef char v32qi __attribute__((vector_size(32)));
+typedef short v8hi __attribute__((vector_size(16)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef long long v2di __attribute__((vector_size(16)));
+typedef long long v4di __attribute__((vector_size(32)));
+
+v16qi
+f1 (char a, v16qi c)
+{
+  char b = ~a;
+  return (__extension__(v16qi) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v32qi
+f2 (char a, v32qi c)
+{
+  char b = ~a;
+  return (__extension__(v32qi) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v8hi
+f3 (short a, v8hi c)
+{
+  short b = ~a;
+  return (__extension__(v8hi) {b, b, b, b, b, b, b, b}) & c;
+}
+
+v16hi
+f4 (short a, v16hi c)
+{
+  short b = ~a;
+  return (__extension__(v16hi) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v4si
+f5 (int a, v4si c)
+{
+  int b = ~a;
+  return (__extension__(v4si) {b, b, b, b}) & c;
+}
+
+v8si
+f6 (int a, v8si c)
+{
+  int b = ~a;
+  return (__extension__(v8si) {b, b, b, b, b, b, b, b}) & c;
+}
+
+v2di
+f7 (long long a, v2di c)
+{
+  long long b = ~a;
+  return (__extension__(v2di) {b, b}) & c;
+}
+
+v4di
+f8 (long long a, v4di c)
+{
+  long long b = ~a;
+  return (__extension__(v4di) {b, b, b, b}) & c;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c
new file mode 100644
index 00000000000..f0a103d0bc2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "pandn" 4 } } */
+/* { dg-final { scan-assembler-not "not\[bwlq\]" } } */
+
+typedef char v64qi __attribute__((vector_size(64)));
+typedef short v32hi __attribute__((vector_size(64)));
+typedef int v16si __attribute__((vector_size(64)));
+typedef long long v8di __attribute__((vector_size(64)));
+
+v64qi
+f1 (char a, v64qi c)
+{
+  char b = ~a;
+  return (__extension__(v64qi) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v32hi
+f2 (short a, v32hi c)
+{
+  short b = ~a;
+  return (__extension__(v32hi) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v16si
+f3 (int a, v16si c)
+{
+  int b = ~a;
+  return (__extension__(v16si) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v8di
+f4 (long long a, v8di c)
+{
+  long long b = ~a;
+  return (__extension__(v8di) {b, b, b, b, b, b, b, b}) & c;
+}
-- 
2.18.1


  reply	other threads:[~2021-05-26  1:17 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-25  5:49 Hongtao Liu
2021-05-25  6:11 ` Andrew Pinski
2021-05-25  6:23   ` Hongtao Liu
2021-05-25  6:29     ` Andrew Pinski
2021-05-25  6:34       ` Hongtao Liu
2021-05-26  1:21         ` Hongtao Liu [this message]
2021-05-26  4:12           ` Andrew Pinski
2021-05-26  5:17             ` Hongtao Liu
2021-06-01  8:32               ` Hongtao Liu
2021-06-01 13:54                 ` Segher Boessenkool
2021-06-01 14:02                 ` Segher Boessenkool
2021-06-02  5:39                   ` liuhongt
2021-06-02  5:39                     ` [PATCH] Canonicalize (vec_duplicate (not A)) to (not (vec_duplicate A)) liuhongt
2021-06-02  7:07                       ` Richard Biener
2021-06-02 20:46                         ` Segher Boessenkool
2021-06-03 11:03                           ` Liu, Hongtao
2021-06-03 11:06                             ` Jakub Jelinek
2021-06-03 19:59                             ` Segher Boessenkool
2021-06-04  2:48                               ` Liu, Hongtao
2021-06-02  5:49                     ` Hongtao Liu
2021-06-02  5:41                   ` [PATCH] Canonicalize (vec_duplicate (not A)) to (not (vec_duplicate A)) liuhongt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAMZc-bzhP8CSujOHCvFCMBkhbUf9ndXCAi94+10+MDQoUSEOSw@mail.gmail.com \
    --to=crazylht@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=pinskia@gmail.com \
    --cc=segher@kernel.crashing.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).