public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Generating all-ones zmm needs dep-breaking pxor before ternlog (PR target/110438)
@ 2023-07-04 18:25 simonaytes.yan
  2023-07-05  1:26 ` Hongtao Liu
  0 siblings, 1 reply; 2+ messages in thread
From: simonaytes.yan @ 2023-07-04 18:25 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 85 bytes --]

PR target/110438 requests to emit PXOR before VPTERNLOG. This patch 
implements that.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: Generating-all-ones-zmm-needs-dep-breaking-pxor-befo.patch --]
[-- Type: text/x-diff; name=Generating-all-ones-zmm-needs-dep-breaking-pxor-befo.patch, Size: 4038 bytes --]

From 815779936d0ca213b4c9ec798ed6acf8179fc2e7 Mon Sep 17 00:00:00 2001
From: Yan Simonaytes <simonaytes.yan@ispras.ru>
Date: Tue, 4 Jul 2023 21:11:04 +0300
Subject: [PATCH] Generating all-ones zmm needs dep-breaking pxor before
 ternlog

	PR target/110438

gcc/ChangeLog:

        * config/i386/i386.cc (standard_sse_constant_opcode): Emit PXOR before VPTERNLOG.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/pr110438-1.c: New test.
        * gcc.target/i386/pr110438-2.c: New test.
        * gcc.target/i386/pr110438-3.c: New test.
---
 gcc/config/i386/i386.cc                    | 23 +++++++++++++++++++---
 gcc/testsuite/gcc.target/i386/pr110438-1.c | 12 +++++++++++
 gcc/testsuite/gcc.target/i386/pr110438-2.c | 12 +++++++++++
 gcc/testsuite/gcc.target/i386/pr110438-3.c | 12 +++++++++++
 4 files changed, 56 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110438-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110438-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110438-3.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 8989985700a..89e0072caa1 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -5329,6 +5329,13 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
 	case MODE_V8DF:
 	case MODE_V16SF:
 	  gcc_assert (TARGET_AVX512F);
+	  if (optimize_insn_for_speed_p ())
+	    {
+	      if (TARGET_AVX512VL)
+		output_asm_insn ("vpxor\t%x0, %x0, %x0", operands);
+	      else
+		output_asm_insn ("vpxor\t%g0, %g0, %g0", operands);
+	    }
 	  return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
 
 	case MODE_OI:
@@ -5344,10 +5351,20 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
 	    return (TARGET_AVX
 		    ? "vpcmpeqd\t%0, %0, %0"
 		    : "pcmpeqd\t%0, %0");
-	  else if (TARGET_AVX512VL)
-	    return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
 	  else
-	    return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
+	    { 
+	      if (optimize_insn_for_speed_p ())
+		{
+		  if (TARGET_AVX512VL)
+		    output_asm_insn ("vpxor\t%x0, %x0, %x0", operands);
+		  else
+		    output_asm_insn ("vpxor\t%g0, %g0, %g0", operands);
+		}
+	      if (TARGET_AVX512VL)
+		return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
+	      else
+		return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
+	    }
 
 	default:
 	  gcc_unreachable ();
diff --git a/gcc/testsuite/gcc.target/i386/pr110438-1.c b/gcc/testsuite/gcc.target/i386/pr110438-1.c
new file mode 100644
index 00000000000..0c5f4470e9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110438-1.c
@@ -0,0 +1,12 @@
+/*  PR target/110438 generating all-ones zmm needs dep-breaking pxor before ternlog */
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+
+typedef int v64 __attribute__((vector_size(64)));
+
+v64 g(void)
+{
+	return (v64){0} - 1;
+}
+
+/* { dg-final { scan-assembler "vpxor\t%z" } }*/
diff --git a/gcc/testsuite/gcc.target/i386/pr110438-2.c b/gcc/testsuite/gcc.target/i386/pr110438-2.c
new file mode 100644
index 00000000000..14770a972e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110438-2.c
@@ -0,0 +1,12 @@
+/*  PR target/110438 generating all-ones zmm needs dep-breaking pxor before ternlog */
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+
+typedef int v64 __attribute__((vector_size(64)));
+
+v64 g(void)
+{
+	return (v64){0} - 1;
+}
+
+/* { dg-final { scan-assembler "vpxor\t%x" } }*/
diff --git a/gcc/testsuite/gcc.target/i386/pr110438-3.c b/gcc/testsuite/gcc.target/i386/pr110438-3.c
new file mode 100644
index 00000000000..fb07997839c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110438-3.c
@@ -0,0 +1,12 @@
+/*  PR target/110438 generating all-ones zmm needs dep-breaking pxor before ternlog */
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -Os" } */
+
+typedef int v64 __attribute__((vector_size(64)));
+
+v64 g(void)
+{
+	return (v64){0} - 1;
+}
+
+/* { dg-final { scan-assembler-not "vpxor" } }*/
-- 
2.34.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: Generating all-ones zmm needs dep-breaking pxor before ternlog (PR target/110438)
  2023-07-04 18:25 Generating all-ones zmm needs dep-breaking pxor before ternlog (PR target/110438) simonaytes.yan
@ 2023-07-05  1:26 ` Hongtao Liu
  0 siblings, 0 replies; 2+ messages in thread
From: Hongtao Liu @ 2023-07-05  1:26 UTC (permalink / raw)
  To: simonaytes.yan; +Cc: gcc-patches

On Wed, Jul 5, 2023 at 2:25 AM simonaytes.yan--- via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> PR target/110438 requests to emit PXOR before VPTERNLOG. This patch
> implements that.

I prefer using UNSPEC_INSN_FALSE_DEP like we did for lzcnt/tzcnt/popcnt.

i.e.
18866; False dependency happens when destination is only updated by tzcnt,
18867; lzcnt or popcnt.  There is no false dependency when destination is
18868; also used in source.
18869(define_insn "*popcountsi2_zext_falsedep"
18870  [(set (match_operand:DI 0 "register_operand" "=r")
18871        (and:DI
18872          (subreg:DI
18873            (popcount:SI
18874              (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
18875          (const_int 63)))
18876   (unspec [(match_operand:DI 2 "register_operand" "0")]
18877           UNSPEC_INSN_FALSE_DEP)
18878   (clobber (reg:CC FLAGS_REG))]
18879  "TARGET_POPCNT && TARGET_64BIT"


BTW, I also posted a patch for this issue at
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/623523.html

-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-07-05  1:27 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-04 18:25 Generating all-ones zmm needs dep-breaking pxor before ternlog (PR target/110438) simonaytes.yan
2023-07-05  1:26 ` Hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).