public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-2999] i386: eliminate redundant operands of VPTERNLOG
@ 2023-08-04 16:44 Alexander Monakov
  0 siblings, 0 replies; only message in thread
From: Alexander Monakov @ 2023-08-04 16:44 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:567d06bb357a39ece865cef67ada44124f227e45

commit r14-2999-g567d06bb357a39ece865cef67ada44124f227e45
Author: Yan Simonaytes <simonaytes.yan@ispras.ru>
Date:   Tue Jul 25 20:43:19 2023 +0300

    i386: eliminate redundant operands of VPTERNLOG
    
    As mentioned in PR 110202, GCC may be presented with input where control
    word of the VPTERNLOG intrinsic implies that some of its operands do not
    affect the result.  In that case, we can eliminate redundant operands
    of the instruction by substituting any other operand in their place.
    This removes false dependencies.
    
    For instance, instead of (252 = 0xfc = _MM_TERNLOG_A | _MM_TERNLOG_B)
    
            vpternlogq      $252, %zmm2, %zmm1, %zmm0
    
    emit
    
            vpternlogq      $252, %zmm0, %zmm1, %zmm0
    
    When VPTERNLOG is invariant w.r.t first and second operands, and the
    third operand is memory, load memory into the output operand first, i.e.
    instead of (85 = 0x55 = ~_MM_TERNLOG_C)
    
            vpternlogq      $85, (%rdi), %zmm1, %zmm0
    
    emit
    
            vmovdqa64       (%rdi), %zmm0
            vpternlogq      $85, %zmm0, %zmm0, %zmm0
    
    gcc/ChangeLog:
    
            PR target/110202
            * config/i386/i386-protos.h
            (vpternlog_redundant_operand_mask): Declare.
            (substitute_vpternlog_operands): Declare.
            * config/i386/i386.cc
            (vpternlog_redundant_operand_mask): New helper.
            (substitute_vpternlog_operands): New function.  Use them...
            * config/i386/sse.md: ... here in new VPTERNLOG define_splits.
    
    gcc/testsuite/ChangeLog:
    
            PR target/110202
            * gcc.target/i386/invariant-ternlog-1.c: New test.
            * gcc.target/i386/invariant-ternlog-2.c: New test.

Diff:
---
 gcc/config/i386/i386-protos.h                      |  3 ++
 gcc/config/i386/i386.cc                            | 43 ++++++++++++++++++++++
 gcc/config/i386/sse.md                             | 42 +++++++++++++++++++++
 .../gcc.target/i386/invariant-ternlog-1.c          | 21 +++++++++++
 .../gcc.target/i386/invariant-ternlog-2.c          | 12 ++++++
 5 files changed, 121 insertions(+)

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 27fe73ca65c..e547ee64587 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -70,6 +70,9 @@ extern machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx);
 extern int avx_vpermilp_parallel (rtx par, machine_mode mode);
 extern int avx_vperm2f128_parallel (rtx par, machine_mode mode);
 
+extern int vpternlog_redundant_operand_mask (rtx[]);
+extern void substitute_vpternlog_operands (rtx[]);
+
 extern bool ix86_expand_strlen (rtx, rtx, rtx, rtx);
 extern bool ix86_expand_set_or_cpymem (rtx, rtx, rtx, rtx, rtx, rtx,
 				       rtx, rtx, rtx, rtx, bool);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index eabc70011ea..8cd26eb54fa 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -19451,6 +19451,49 @@ avx_vperm2f128_parallel (rtx par, machine_mode mode)
   return mask + 1;
 }
 \f
+/* Return a mask of VPTERNLOG operands that do not affect output.  */
+
+int
+vpternlog_redundant_operand_mask (rtx *operands)
+{
+  int mask = 0;
+  int imm8 = XINT (operands[4], 0);
+
+  if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
+    mask |= 1;
+  if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
+    mask |= 2;
+  if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
+    mask |= 4;
+
+  return mask;
+}
+
+/* Eliminate false dependencies on operands that do not affect output
+   by substituting other operands of a VPTERNLOG.  */
+
+void
+substitute_vpternlog_operands (rtx *operands)
+{
+  int mask = vpternlog_redundant_operand_mask (operands);
+
+  if (mask & 1) /* The first operand is redundant.  */
+    operands[1] = operands[2];
+
+  if (mask & 2) /* The second operand is redundant.  */
+    operands[2] = operands[1];
+
+  if (mask & 4) /* The third operand is redundant.  */
+    operands[3] = operands[1];
+  else if (REG_P (operands[3]))
+    {
+      if (mask & 1)
+	operands[1] = operands[3];
+      if (mask & 2)
+	operands[2] = operands[3];
+    }
+}
+\f
 /* Return a register priority for hard reg REGNO.  */
 static int
 ix86_register_priority (int hard_regno)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f1712b001f9..7e2aa3f995c 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -12695,6 +12695,48 @@
 		      (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
 		      (const_string "*")))])
 
+;; When VPTERNLOG happens to be invariant w.r.t first and second operands,
+;; and the third operand is memory, eliminate false dependencies by loading
+;; memory into the output operand first.
+(define_split
+  [(set (match_operand:V 0 "register_operand")
+	(unspec:V
+	  [(match_operand:V 1 "register_operand")
+	   (match_operand:V 2 "register_operand")
+	   (match_operand:V 3 "memory_operand")
+	   (match_operand:SI 4 "const_0_to_255_operand")]
+	  UNSPEC_VTERNLOG))]
+  "!reload_completed && vpternlog_redundant_operand_mask (operands) == 3"
+  [(set (match_dup 0)
+	(match_dup 3))
+   (set (match_dup 0)
+	(unspec:V
+	  [(match_dup 0)
+	   (match_dup 0)
+	   (match_dup 0)
+	   (match_dup 4)]
+	  UNSPEC_VTERNLOG))])
+
+;; Eliminate false dependencies when VPTERNLOG is invariant w.r.t any
+;; of input operands (except the case handled in the above split).
+(define_split
+  [(set (match_operand:V 0 "register_operand")
+	(unspec:V
+	  [(match_operand:V 1 "register_operand")
+	   (match_operand:V 2 "register_operand")
+	   (match_operand:V 3 "nonimmediate_operand")
+	   (match_operand:SI 4 "const_0_to_255_operand")]
+	  UNSPEC_VTERNLOG))]
+  "!reload_completed && vpternlog_redundant_operand_mask (operands) != 0"
+  [(set (match_dup 0)
+	(unspec:V
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)
+	   (match_dup 4)]
+	  UNSPEC_VTERNLOG))]
+  "substitute_vpternlog_operands (operands);")
+
 ;; There must be lots of other combinations like
 ;;
 ;; (any_logic:V
diff --git a/gcc/testsuite/gcc.target/i386/invariant-ternlog-1.c b/gcc/testsuite/gcc.target/i386/invariant-ternlog-1.c
new file mode 100644
index 00000000000..21051c6bba0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/invariant-ternlog-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqa" 4 } } */
+/* { dg-final { scan-assembler-times {vpternlog[^\n\r]*\(%rdx\)} 2 } } */
+
+#include <immintrin.h>
+
+__m512i f(__m512i* a, __m512i* b, __m512i* c)
+{
+	return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], ~_MM_TERNLOG_B | ~_MM_TERNLOG_C);
+}
+
+__m512i g(__m512i* a, __m512i* b, __m512i* c)
+{
+	return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], ~_MM_TERNLOG_A | ~_MM_TERNLOG_C);
+}
+
+__m512i h(__m512i* a, __m512i* b, __m512i* c)
+{
+	return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], ~_MM_TERNLOG_A | ~_MM_TERNLOG_B);
+}
diff --git a/gcc/testsuite/gcc.target/i386/invariant-ternlog-2.c b/gcc/testsuite/gcc.target/i386/invariant-ternlog-2.c
new file mode 100644
index 00000000000..d70bbb02390
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/invariant-ternlog-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqa" 1 } } */
+/* { dg-final { scan-assembler "vpternlog.*zmm0.*zmm0.*zmm0" } } */
+
+#include <immintrin.h>
+
+__m512i f(__m512i* a, __m512i* b, __m512i* c)
+{
+	return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], ~_MM_TERNLOG_C);
+}
+

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-08-04 16:44 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-04 16:44 [gcc r14-2999] i386: eliminate redundant operands of VPTERNLOG Alexander Monakov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).