public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-4928] i386: Narrow test instructions with immediate operands [PR111698]
@ 2023-10-25 14:28 Uros Bizjak
  0 siblings, 0 replies; only message in thread
From: Uros Bizjak @ 2023-10-25 14:28 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:678e6c328c77db383431751bcfcf867b02369bd6

commit r14-4928-g678e6c328c77db383431751bcfcf867b02369bd6
Author: Uros Bizjak <ubizjak@gmail.com>
Date:   Wed Oct 25 16:26:57 2023 +0200

    i386: Narrow test instructions with immediate operands [PR111698]
    
    Narrow test instructions with immediate operand that test memory location
    for zero.  E.g. testl $0x00aa0000, mem can be converted to testb $0xaa, mem+2.
    Reject targets where reading (possibly unaligned) part of memory location
    after a large write to the same address causes store-to-load forwarding stall.
    
            PR target/111698
    
    gcc/ChangeLog:
    
            * config/i386/x86-tune.def (X86_TUNE_PARTIAL_MEMORY_READ_STALL):
            New tune.
            * config/i386/i386.h (TARGET_PARTIAL_MEMORY_READ_STALL): New macro.
            * config/i386/i386.md: New peephole pattern to narrow test
            instructions with immediate operands that test memory locations
            for zero.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/pr111698.c: New test.

Diff:
---
 gcc/config/i386/i386.h                   |  2 ++
 gcc/config/i386/i386.md                  | 51 ++++++++++++++++++++++++++++++++
 gcc/config/i386/x86-tune.def             |  8 +++++
 gcc/testsuite/gcc.target/i386/pr111698.c | 19 ++++++++++++
 4 files changed, 80 insertions(+)

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 7e66fa095855..a9f07223dcad 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -311,6 +311,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 #define TARGET_USE_SAHF		ix86_tune_features[X86_TUNE_USE_SAHF]
 #define TARGET_MOVX		ix86_tune_features[X86_TUNE_MOVX]
 #define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL]
+#define TARGET_PARTIAL_MEMORY_READ_STALL \
+	ix86_tune_features[X86_TUNE_PARTIAL_MEMORY_READ_STALL]
 #define TARGET_PARTIAL_FLAG_REG_STALL \
 	ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL]
 #define TARGET_LCP_STALL \
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index abaf2f311e89..732c7a0323e9 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -11115,6 +11115,57 @@
   operands[3] = gen_int_mode (INTVAL (operands[3]), QImode);
 })
 
+;; Narrow test instructions with immediate operands that test
+;; memory locations for zero.  E.g. testl $0x00aa0000, mem can be
+;; converted to testb $0xaa, mem+2.  Reject volatile locations and
+;; targets where reading (possibly unaligned) part of memory
+;; location after a large write to the same address causes
+;; store-to-load forwarding stall.
+(define_peephole2
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ
+	  (and:SWI248 (match_operand:SWI248 0 "memory_operand")
+		      (match_operand 1 "const_int_operand"))
+	  (const_int 0)))]
+  "!TARGET_PARTIAL_MEMORY_READ_STALL && !MEM_VOLATILE_P (operands[0])"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (match_dup 2) (const_int 0)))]
+{
+  unsigned HOST_WIDE_INT ival = UINTVAL (operands[1]);
+  int first_nonzero_byte, bitsize;
+  rtx new_addr, new_const;
+  machine_mode new_mode;
+
+  if (ival == 0)
+    FAIL;
+
+  /* Clear bits outside mode width.  */
+  ival &= GET_MODE_MASK (<MODE>mode);
+
+  first_nonzero_byte = ctz_hwi (ival) / BITS_PER_UNIT;
+
+  ival >>= first_nonzero_byte * BITS_PER_UNIT;
+
+  bitsize = sizeof (ival) * BITS_PER_UNIT - clz_hwi (ival);
+
+  if (bitsize <= GET_MODE_BITSIZE (QImode))
+    new_mode = QImode;
+  else if (bitsize <= GET_MODE_BITSIZE (HImode))
+    new_mode = HImode;
+  else if (bitsize <= GET_MODE_BITSIZE (SImode))
+    new_mode = SImode;
+  else
+    new_mode = DImode;
+
+  if (GET_MODE_SIZE (new_mode) >= GET_MODE_SIZE (<MODE>mode))
+    FAIL;
+
+  new_addr = adjust_address (operands[0], new_mode, first_nonzero_byte);
+  new_const = gen_int_mode (ival, new_mode);
+
+  operands[2] = gen_rtx_AND (new_mode, new_addr, new_const);
+})
+
 ;; %%% This used to optimize known byte-wide and operations to memory,
 ;; and sometimes to QImode registers.  If this is considered useful,
 ;; it should be done with splitters.
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 53e177a4d97a..9b43588353d4 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -647,6 +647,14 @@ DEF_TUNE (X86_TUNE_NOT_UNPAIRABLE, "not_unpairable", m_PENT | m_LAKEMONT)
    and can happen in caller/callee saving sequences.  */
 DEF_TUNE (X86_TUNE_PARTIAL_REG_STALL, "partial_reg_stall", m_PPRO)
 
+/* X86_TUNE_PARTIAL_MEMORY_READ_STALL: Reading (possible unaligned) part of
+   memory location after a large write to the same address causes
+   store-to-load forwarding stall.  */
+DEF_TUNE (X86_TUNE_PARTIAL_MEMORY_READ_STALL, "partial_memoy_read_stall",
+	  m_386 | m_486 | m_PENT | m_LAKEMONT | m_PPRO | m_P4_NOCONA | m_CORE2
+	   | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT
+	   | m_K6_GEODE | m_ATHLON_K8 | m_AMDFAM10)
+
 /* X86_TUNE_PROMOTE_QIMODE: When it is cheap, turn 8bit arithmetic to
    corresponding 32bit arithmetic.  */
 DEF_TUNE (X86_TUNE_PROMOTE_QIMODE, "promote_qimode",
diff --git a/gcc/testsuite/gcc.target/i386/pr111698.c b/gcc/testsuite/gcc.target/i386/pr111698.c
new file mode 100644
index 000000000000..2da6be531a2e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr111698.c
@@ -0,0 +1,19 @@
+/* PR target/111698 */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-final { scan-assembler-not "testl" } }  */
+
+int m;
+
+_Bool foo (void)
+{
+  return m & 0x0a0000;
+}
+
+/* { dg-final { scan-assembler-times "testb" 1 } }  */
+
+_Bool bar (void)
+{
+  return m & 0xa0a000;
+}
+
+/* { dg-final { scan-assembler-times "testw" 1 } }  */

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-10-25 14:28 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-25 14:28 [gcc r14-4928] i386: Narrow test instructions with immediate operands [PR111698] Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).