commit 90e5056d53d680fcc47693a118bfd75f7af0b435 Author: Jeff Law Date: Tue Jan 27 16:32:06 2015 -0700 PR target/15184 * combine.c (try_combine): If I0 is a memory load and I3 a store to a related address, increase the "goodness" of doing a 4-insn combination with I0-I3. (make_field_assignment): Handle SUBREGs in the ior+and case. PR target/15184 * gcc.target/i386/pr15184-1.c: New test. * gcc.target/i386/pr15184-2.c: New test. diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7424d9f..0c02d43 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2015-01-29 Jeff Law + + PR target/15184 + * combine.c (try_combine): If I0 is a memory load and I3 a store + to a related address, increase the "goodness" of doing a 4-insn + combination with I0-I3. + (make_field_assignment): Handle SUBREGs in the ior+and case. + 2015-01-29 Yuri Rumyantsev PR tree-optimization/64746 diff --git a/gcc/combine.c b/gcc/combine.c index 5c763b4..24418df 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -2620,6 +2620,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, int i; int ngood = 0; int nshift = 0; + rtx set0, set3; if (!flag_expensive_optimizations) return 0; @@ -2643,6 +2644,34 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, || GET_CODE (src) == LSHIFTRT) nshift++; } + + /* If I0 loads a memory and I3 sets the same memory, then I2 and I3 + are likely manipulating its value. Ideally we'll be able to combine + all four insns into a bitfield insertion of some kind. + + Note the source in I0 might be inside a sign/zero extension and the + memory modes in I0 and I3 might be different. So extract the address + from the destination of I3 and search for it in the source of I0. + + In the event that there's a match but the source/dest do not actually + refer to the same memory, the worst that happens is we try some + combinations that we wouldn't have otherwise. */ + if ((set0 = single_set (i0)) + /* Ensure the source of SET0 is a MEM, possibly buried inside + an extension. */ + && (GET_CODE (SET_SRC (set0)) == MEM + || ((GET_CODE (SET_SRC (set0)) == ZERO_EXTEND + || GET_CODE (SET_SRC (set0)) == SIGN_EXTEND) + && GET_CODE (XEXP (SET_SRC (set0), 0)) == MEM)) + && (set3 = single_set (i3)) + /* Ensure the destination of SET3 is a MEM. */ + && GET_CODE (SET_DEST (set3)) == MEM + /* Would it be better to extract the base address for the MEM + in SET3 and look for that? I don't have cases where it matters + but I could envision such cases. */ + && rtx_referenced_p (XEXP (SET_DEST (set3), 0), SET_SRC (set0))) + ngood += 2; + if (ngood < 2 && nshift < 2) return 0; } @@ -9272,6 +9301,13 @@ make_field_assignment (rtx x) to the appropriate position, force it to the required mode, and make the extraction. Check for the AND in both operands. */ + /* One or more SUBREGs might obscure the constant-position field + assignment. The first one we are likely to encounter is an outer + narrowing SUBREG, which we can just strip for the purposes of + identifying the constant-field assignment. */ + if (GET_CODE (src) == SUBREG && subreg_lowpart_p (src)) + src = SUBREG_REG (src); + if (GET_CODE (src) != IOR && GET_CODE (src) != XOR) return x; @@ -9282,10 +9318,38 @@ make_field_assignment (rtx x) && CONST_INT_P (XEXP (rhs, 1)) && rtx_equal_for_field_assignment_p (XEXP (rhs, 0), dest)) c1 = INTVAL (XEXP (rhs, 1)), other = lhs; + /* The second SUBREG that might get in the way is a paradoxical + SUBREG around the first operand of the AND. We want to + pretend the operand is as wide as the destination here. We + do this by creating a new MEM in the wider mode for the sole + purpose of the call to rtx_equal_for_field_assignment_p. Also + note this trick only works for MEMs. */ + else if (GET_CODE (rhs) == AND + && paradoxical_subreg_p (XEXP (rhs, 0)) + && GET_CODE (SUBREG_REG (XEXP (rhs, 0))) == MEM + && CONST_INT_P (XEXP (rhs, 1)) + && rtx_equal_for_field_assignment_p (gen_rtx_MEM (GET_MODE (dest), + XEXP (SUBREG_REG (XEXP (rhs, 0)), 0)), + dest)) + c1 = INTVAL (XEXP (rhs, 1)), other = lhs; else if (GET_CODE (lhs) == AND && CONST_INT_P (XEXP (lhs, 1)) && rtx_equal_for_field_assignment_p (XEXP (lhs, 0), dest)) c1 = INTVAL (XEXP (lhs, 1)), other = rhs; + /* The second SUBREG that might get in the way is a paradoxical + SUBREG around the first operand of the AND. We want to + pretend the operand is as wide as the destination here. We + do this by creating a new MEM in the wider mode for the sole + purpose of the call to rtx_equal_for_field_assignment_p. Also + note this trick only works for MEMs. */ + else if (GET_CODE (lhs) == AND + && paradoxical_subreg_p (XEXP (lhs, 0)) + && GET_CODE (SUBREG_REG (XEXP (lhs, 0))) == MEM + && CONST_INT_P (XEXP (lhs, 1)) + && rtx_equal_for_field_assignment_p (gen_rtx_MEM (GET_MODE (dest), + XEXP (SUBREG_REG (XEXP (lhs, 0)), 0)), + dest)) + c1 = INTVAL (XEXP (lhs, 1)), other = rhs; else return x; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 69488ec..c60230f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2015-01-29 Jeff Law + + PR target/15184 + * gcc.target/i386/pr15184-1.c: New test. + * gcc.target/i386/pr15184-2.c: New test. + 2015-01-29 Yuri Rumyantsev PR tree-optimization/64746 diff --git a/gcc/testsuite/gcc.target/i386/pr15184-1.c b/gcc/testsuite/gcc.target/i386/pr15184-1.c new file mode 100644 index 0000000..9eb544c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr15184-1.c @@ -0,0 +1,33 @@ +/* PR 15184 first two tests, plus two addition ones. */ +/* { dg-do compile } */ +/* { dg-options "-O2 -m32 -march=pentiumpro" } */ + +#define regparm __attribute__((__regparm__(3))) + +extern unsigned int x; +extern unsigned short y; + +void regparm f0(unsigned char c) +{ + x = (x & 0xFFFFFF00) | (unsigned int)c; +} + +void regparm f1(unsigned char c) +{ + x = (x & 0xFFFF00FF) | ((unsigned int)c << 8); +} + +void regparm f2(unsigned char c) +{ + x = (x & 0xFF00FFFF) | ((unsigned int)c << 16); +} +void regparm f3(unsigned char c) +{ + x = (x & 0x00FFFFFF) | ((unsigned int)c << 24); +} + + +/* Each function should compile down to a byte move from + the input register into x, possibly at an offset within x. */ +/* { dg-final { scan-assembler-times "\tmovb\t%al, x" 4 } } */ + diff --git a/gcc/testsuite/gcc.target/i386/pr15184-2.c b/gcc/testsuite/gcc.target/i386/pr15184-2.c new file mode 100644 index 0000000..99fdbc8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr15184-2.c @@ -0,0 +1,23 @@ +/* PR 15184 second two tests +/* { dg-do compile } */ +/* { dg-options "-O2 -m32 -march=pentiumpro" } */ + +#define regparm __attribute__((__regparm__(3))) + +extern unsigned int x; +extern unsigned short y; + +void regparm g0(unsigned char c) +{ + y = (y & 0xFF00) | (unsigned short)c; +} + +void regparm g1(unsigned char c) +{ + y = (y & 0x00FF) | ((unsigned short)c << 8); +} + +/* Each function should compile down to a byte move from + the input register into x, possibly at an offset within x. */ +/* { dg-final { scan-assembler-times "\tmovb\t%al, y" 2 } } */ +