diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index e4c1fc6eef0..4426b27f4fe 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -311,6 +311,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_USE_SAHF ix86_tune_features[X86_TUNE_USE_SAHF] #define TARGET_MOVX ix86_tune_features[X86_TUNE_MOVX] #define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL] +#define TARGET_PARTIAL_MEMORY_READ_STALL \ + ix86_tune_features[X86_TUNE_PARTIAL_MEMORY_READ_STALL] #define TARGET_PARTIAL_FLAG_REG_STALL \ ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL] #define TARGET_LCP_STALL \ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index f90cf1ca734..5d8d5b2eae6 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -11100,6 +11100,57 @@ (define_split operands[3] = gen_int_mode (INTVAL (operands[3]), QImode); }) +;; Narrow test instructions with immediate operands that test +;; memory locations for zero. E.g. testl $0x00aa0000, mem can be +;; converted to testb $0xaa, mem+2. Reject volatile locations and +;; targets where reading (possibly unaligned) part of memory +;; location after a large write to the same address causes +;; store-to-load forwarding stall. +(define_peephole2 + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (and:SWI248 (match_operand:SWI248 0 "memory_operand") + (match_operand 1 "const_int_operand")) + (const_int 0)))] + "!TARGET_PARTIAL_MEMORY_READ_STALL && !MEM_VOLATILE_P (operands[0])" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_dup 2) (const_int 0)))] +{ + unsigned HOST_WIDE_INT ival = UINTVAL (operands[1]); + int first_nonzero_byte, bitsize; + rtx new_addr, new_const; + machine_mode new_mode; + + if (ival == 0) + FAIL; + + /* Clear bits outside mode width. */ + ival &= GET_MODE_MASK (mode); + + first_nonzero_byte = ctz_hwi (ival) / BITS_PER_UNIT; + + ival >>= first_nonzero_byte * BITS_PER_UNIT; + + bitsize = sizeof (ival) * BITS_PER_UNIT - clz_hwi (ival); + + if (bitsize <= GET_MODE_BITSIZE (QImode)) + new_mode = QImode; + else if (bitsize <= GET_MODE_BITSIZE (HImode)) + new_mode = HImode; + else if (bitsize <= GET_MODE_BITSIZE (SImode)) + new_mode = SImode; + else + new_mode = DImode; + + if (GET_MODE_SIZE (new_mode) >= GET_MODE_SIZE (mode)) + FAIL; + + new_addr = adjust_address (operands[0], new_mode, first_nonzero_byte); + new_const = gen_int_mode (ival, new_mode); + + operands[2] = gen_rtx_AND (new_mode, new_addr, new_const); +}) + ;; %%% This used to optimize known byte-wide and operations to memory, ;; and sometimes to QImode registers. If this is considered useful, ;; it should be done with splitters. diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 3636a4a95d8..9d0699ff9b9 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -658,6 +658,14 @@ DEF_TUNE (X86_TUNE_NOT_UNPAIRABLE, "not_unpairable", m_PENT | m_LAKEMONT) and can happen in caller/callee saving sequences. */ DEF_TUNE (X86_TUNE_PARTIAL_REG_STALL, "partial_reg_stall", m_PPRO) +/* X86_TUNE_PARTIAL_MEMORY_READ_STALL: Reading (possible unaligned) part of + memory location after a large write to the same address causes + store-to-load forwarding stall. */ +DEF_TUNE (X86_TUNE_PARTIAL_MEMORY_READ_STALL, "partial_memoy_read_stall", + m_386 | m_486 | m_PENT | m_LAKEMONT | m_PPRO | m_P4_NOCONA | m_CORE2 + | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT + | m_K6_GEODE | m_ATHLON_K8 | m_AMDFAM10) + /* X86_TUNE_PROMOTE_QIMODE: When it is cheap, turn 8bit arithmetic to corresponding 32bit arithmetic. */ DEF_TUNE (X86_TUNE_PROMOTE_QIMODE, "promote_qimode", diff --git a/gcc/testsuite/gcc.target/i386/pr111698.c b/gcc/testsuite/gcc.target/i386/pr111698.c new file mode 100644 index 00000000000..2da6be531a2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr111698.c @@ -0,0 +1,19 @@ +/* PR target/111698 */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-final { scan-assembler-not "testl" } } */ + +int m; + +_Bool foo (void) +{ + return m & 0x0a0000; +} + +/* { dg-final { scan-assembler-times "testb" 1 } } */ + +_Bool bar (void) +{ + return m & 0xa0a000; +} + +/* { dg-final { scan-assembler-times "testw" 1 } } */