* Fix PR rtl-optimization/68910
@ 2015-12-19 22:03 Eric Botcazou
0 siblings, 0 replies; only message in thread
From: Eric Botcazou @ 2015-12-19 22:03 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 2805 bytes --]
This is a code quality regression present on the mainline and 5 branch, and
visible on the SPARC but the underlying issue is very likely present for all
platforms still using reload. The compiler generates useless memory clearing
instructions because the DSE2 pass doesn't eliminate them any more since there
are USEs associated with the memory locations in the RTL.
These USEs come from a trick in find_reloads:
/* If we made a MEM to load (a part of) the stackslot of a pseudo
that didn't get a hard register, emit a USE with a REG_EQUAL
note in front so that we might inherit a previous, possibly
wider reload. */
if (replace
&& MEM_P (op)
&& REG_P (reg)
&& (GET_MODE_SIZE (GET_MODE (reg))
>= GET_MODE_SIZE (GET_MODE (op)))
&& reg_equiv_constant (REGNO (reg)) == 0)
set_unique_reg_note (emit_insn_before (gen_rtx_USE (VOIDmode, reg),
insn),
REG_EQUAL, reg_equiv_memory_loc (REGNO (reg)));
and are supposed to be eliminated at the end of the pass:
/* Make a pass over all the insns and delete all USEs which we inserted
only to tag a REG_EQUAL note on them. Remove all REG_DEAD and REG_UNUSED
notes. Delete all CLOBBER insns, except those that refer to the return
value and the special mem:BLK CLOBBERs added to prevent the scheduler
from misarranging variable-array code, and simplify (subreg (reg))
operands. Strip and regenerate REG_INC notes that may have been moved
around. */
Unfortunately set_unique_reg_note was enhanced to disregard notes that don't
make sense and the new check triggers here, which means that naked USEs are
generated instead of USEs + REG_EQUAL notes, which in turn means that they are
not deleted at the end of the pass. :-(
Fixed by adding a bypass for this case in set_unique_reg_note. The patch also
removes useless DImode logical patterns in the SPARC back-end, which serve no
useful purpose but pessimize register allocation in 32-bit mode by preventing
DImode logical operations from being split early.
Bootstrapped/regtested on SPARC/Solaris, applied on mainline and 5 branch.
2015-12-19 Eric Botcazou <ebotcazou@adacore.com>
PR rtl-optimization/68910
* emit-rtl.c (set_unique_reg_note) <>REG_EQUAL>: Add bypass for USEs.
* config/sparc/sparc.md (anddi3): Enable only in 64-bit mode.
(iordi3): Likewise.
(xordi3): Likewise.
(one_cmpldi2): Likewise.
(*anddi3_sp32): Delete.
(*and_not_di_sp32): Likewise.
(*iordi3_sp32): Likewise.
(*or_not_di_sp32): Likewise.
(*xordi3_sp32): Likewise.
(*xor_not_di_sp32): Likewise.
(32-bit DImode logical operations splitter): Likewise.
(*one_cmpldi2_sp32): Likewise.
2015-12-19 Eric Botcazou <ebotcazou@adacore.com>
* gcc.target/sparc/20151219-1.c: New test.
--
Eric Botcazou
[-- Attachment #2: pr68910.diff --]
[-- Type: text/x-patch, Size: 9507 bytes --]
Index: emit-rtl.c
===================================================================
--- emit-rtl.c (revision 231605)
+++ emit-rtl.c (working copy)
@@ -5239,7 +5239,8 @@ set_unique_reg_note (rtx insn, enum reg_
{
case REG_EQUAL:
case REG_EQUIV:
- if (!set_for_reg_notes (insn))
+ /* We need to support the REG_EQUAL on USE trick of find_reloads. */
+ if (!set_for_reg_notes (insn) && GET_CODE (PATTERN (insn)) != USE)
return NULL_RTX;
/* Don't add ASM_OPERAND REG_EQUAL/REG_EQUIV notes.
Index: config/sparc/sparc.md
===================================================================
--- config/sparc/sparc.md (revision 231605)
+++ config/sparc/sparc.md (working copy)
@@ -4740,24 +4740,7 @@ (define_insn "*umacdi"
;; Boolean instructions.
-;; We define DImode `and' so with DImode `not' we can get
-;; DImode `andn'. Other combinations are possible.
-
-(define_expand "anddi3"
- [(set (match_operand:DI 0 "register_operand" "")
- (and:DI (match_operand:DI 1 "arith_double_operand" "")
- (match_operand:DI 2 "arith_double_operand" "")))]
- ""
- "")
-
-(define_insn "*anddi3_sp32"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (and:DI (match_operand:DI 1 "arith_double_operand" "%r")
- (match_operand:DI 2 "arith_double_operand" "rHI")))]
- "! TARGET_ARCH64"
- "#")
-
-(define_insn "*anddi3_sp64"
+(define_insn "anddi3"
[(set (match_operand:DI 0 "register_operand" "=r")
(and:DI (match_operand:DI 1 "arith_operand" "%r")
(match_operand:DI 2 "arith_operand" "rI")))]
@@ -4783,28 +4766,6 @@ (define_split
operands[4] = GEN_INT (~INTVAL (operands[2]));
})
-(define_insn_and_split "*and_not_di_sp32"
- [(set (match_operand:DI 0 "register_operand" "=&r")
- (and:DI (not:DI (match_operand:DI 1 "register_operand" "%r"))
- (match_operand:DI 2 "register_operand" "r")))]
- "! TARGET_ARCH64"
- "#"
- "&& reload_completed
- && ((GET_CODE (operands[0]) == REG
- && SPARC_INT_REG_P (REGNO (operands[0])))
- || (GET_CODE (operands[0]) == SUBREG
- && GET_CODE (SUBREG_REG (operands[0])) == REG
- && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))"
- [(set (match_dup 3) (and:SI (not:SI (match_dup 4)) (match_dup 5)))
- (set (match_dup 6) (and:SI (not:SI (match_dup 7)) (match_dup 8)))]
- "operands[3] = gen_highpart (SImode, operands[0]);
- operands[4] = gen_highpart (SImode, operands[1]);
- operands[5] = gen_highpart (SImode, operands[2]);
- operands[6] = gen_lowpart (SImode, operands[0]);
- operands[7] = gen_lowpart (SImode, operands[1]);
- operands[8] = gen_lowpart (SImode, operands[2]);"
- [(set_attr "length" "2")])
-
(define_insn "*and_not_di_sp64"
[(set (match_operand:DI 0 "register_operand" "=r")
(and:DI (not:DI (match_operand:DI 1 "register_operand" "%r"))
@@ -4819,22 +4780,7 @@ (define_insn "*and_not_si"
""
"andn\t%2, %1, %0")
-(define_expand "iordi3"
- [(set (match_operand:DI 0 "register_operand" "")
- (ior:DI (match_operand:DI 1 "arith_double_operand" "")
- (match_operand:DI 2 "arith_double_operand" "")))]
- ""
- "")
-
-(define_insn "*iordi3_sp32"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (ior:DI (match_operand:DI 1 "arith_double_operand" "%r")
- (match_operand:DI 2 "arith_double_operand" "rHI")))]
- "! TARGET_ARCH64"
- "#"
- [(set_attr "length" "2")])
-
-(define_insn "*iordi3_sp64"
+(define_insn "iordi3"
[(set (match_operand:DI 0 "register_operand" "=r")
(ior:DI (match_operand:DI 1 "arith_operand" "%r")
(match_operand:DI 2 "arith_operand" "rI")))]
@@ -4860,28 +4806,6 @@ (define_split
operands[4] = gen_int_mode (~INTVAL (operands[2]), SImode);
})
-(define_insn_and_split "*or_not_di_sp32"
- [(set (match_operand:DI 0 "register_operand" "=&r")
- (ior:DI (not:DI (match_operand:DI 1 "register_operand" "r"))
- (match_operand:DI 2 "register_operand" "r")))]
- "! TARGET_ARCH64"
- "#"
- "&& reload_completed
- && ((GET_CODE (operands[0]) == REG
- && SPARC_INT_REG_P (REGNO (operands[0])))
- || (GET_CODE (operands[0]) == SUBREG
- && GET_CODE (SUBREG_REG (operands[0])) == REG
- && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))"
- [(set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))
- (set (match_dup 6) (ior:SI (not:SI (match_dup 7)) (match_dup 8)))]
- "operands[3] = gen_highpart (SImode, operands[0]);
- operands[4] = gen_highpart (SImode, operands[1]);
- operands[5] = gen_highpart (SImode, operands[2]);
- operands[6] = gen_lowpart (SImode, operands[0]);
- operands[7] = gen_lowpart (SImode, operands[1]);
- operands[8] = gen_lowpart (SImode, operands[2]);"
- [(set_attr "length" "2")])
-
(define_insn "*or_not_di_sp64"
[(set (match_operand:DI 0 "register_operand" "=r")
(ior:DI (not:DI (match_operand:DI 1 "register_operand" "r"))
@@ -4896,22 +4820,7 @@ (define_insn "*or_not_si"
""
"orn\t%2, %1, %0")
-(define_expand "xordi3"
- [(set (match_operand:DI 0 "register_operand" "")
- (xor:DI (match_operand:DI 1 "arith_double_operand" "")
- (match_operand:DI 2 "arith_double_operand" "")))]
- ""
- "")
-
-(define_insn "*xordi3_sp32"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (xor:DI (match_operand:DI 1 "arith_double_operand" "%r")
- (match_operand:DI 2 "arith_double_operand" "rHI")))]
- "! TARGET_ARCH64"
- "#"
- [(set_attr "length" "2")])
-
-(define_insn "*xordi3_sp64"
+(define_insn "xordi3"
[(set (match_operand:DI 0 "register_operand" "=r")
(xor:DI (match_operand:DI 1 "arith_operand" "%rJ")
(match_operand:DI 2 "arith_operand" "rI")))]
@@ -4949,54 +4858,6 @@ (define_split
operands[4] = gen_int_mode (~INTVAL (operands[2]), SImode);
})
-;; Split DImode logical operations requiring two instructions.
-(define_split
- [(set (match_operand:DI 0 "register_operand" "")
- (match_operator:DI 1 "cc_arith_operator" ; AND, IOR, XOR
- [(match_operand:DI 2 "register_operand" "")
- (match_operand:DI 3 "arith_double_operand" "")]))]
- "! TARGET_ARCH64
- && reload_completed
- && ((GET_CODE (operands[0]) == REG
- && SPARC_INT_REG_P (REGNO (operands[0])))
- || (GET_CODE (operands[0]) == SUBREG
- && GET_CODE (SUBREG_REG (operands[0])) == REG
- && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))"
- [(set (match_dup 4) (match_op_dup:SI 1 [(match_dup 6) (match_dup 8)]))
- (set (match_dup 5) (match_op_dup:SI 1 [(match_dup 7) (match_dup 9)]))]
-{
- operands[4] = gen_highpart (SImode, operands[0]);
- operands[5] = gen_lowpart (SImode, operands[0]);
- operands[6] = gen_highpart (SImode, operands[2]);
- operands[7] = gen_lowpart (SImode, operands[2]);
- operands[8] = gen_highpart_mode (SImode, DImode, operands[3]);
- operands[9] = gen_lowpart (SImode, operands[3]);
-})
-
-;; xnor patterns. Note that (a ^ ~b) == (~a ^ b) == ~(a ^ b).
-;; Combine now canonicalizes to the rightmost expression.
-(define_insn_and_split "*xor_not_di_sp32"
- [(set (match_operand:DI 0 "register_operand" "=&r")
- (not:DI (xor:DI (match_operand:DI 1 "register_operand" "r")
- (match_operand:DI 2 "register_operand" "r"))))]
- "! TARGET_ARCH64"
- "#"
- "&& reload_completed
- && ((GET_CODE (operands[0]) == REG
- && SPARC_INT_REG_P (REGNO (operands[0])))
- || (GET_CODE (operands[0]) == SUBREG
- && GET_CODE (SUBREG_REG (operands[0])) == REG
- && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))"
- [(set (match_dup 3) (not:SI (xor:SI (match_dup 4) (match_dup 5))))
- (set (match_dup 6) (not:SI (xor:SI (match_dup 7) (match_dup 8))))]
- "operands[3] = gen_highpart (SImode, operands[0]);
- operands[4] = gen_highpart (SImode, operands[1]);
- operands[5] = gen_highpart (SImode, operands[2]);
- operands[6] = gen_lowpart (SImode, operands[0]);
- operands[7] = gen_lowpart (SImode, operands[1]);
- operands[8] = gen_lowpart (SImode, operands[2]);"
- [(set_attr "length" "2")])
-
(define_insn "*xor_not_di_sp64"
[(set (match_operand:DI 0 "register_operand" "=r")
(not:DI (xor:DI (match_operand:DI 1 "register_or_zero_operand" "rJ")
@@ -5245,34 +5106,7 @@ (define_insn "*cmp_ccx_set_neg"
"subcc\t%%g0, %1, %0"
[(set_attr "type" "compare")])
-;; We cannot use the "not" pseudo insn because the Sun assembler
-;; does not know how to make it work for constants.
-(define_expand "one_cmpldi2"
- [(set (match_operand:DI 0 "register_operand" "")
- (not:DI (match_operand:DI 1 "register_operand" "")))]
- ""
- "")
-
-(define_insn_and_split "*one_cmpldi2_sp32"
- [(set (match_operand:DI 0 "register_operand" "=&r")
- (not:DI (match_operand:DI 1 "register_operand" "r")))]
- "! TARGET_ARCH64"
- "#"
- "&& reload_completed
- && ((GET_CODE (operands[0]) == REG
- && SPARC_INT_REG_P (REGNO (operands[0])))
- || (GET_CODE (operands[0]) == SUBREG
- && GET_CODE (SUBREG_REG (operands[0])) == REG
- && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))"
- [(set (match_dup 2) (not:SI (xor:SI (match_dup 3) (const_int 0))))
- (set (match_dup 4) (not:SI (xor:SI (match_dup 5) (const_int 0))))]
- "operands[2] = gen_highpart (SImode, operands[0]);
- operands[3] = gen_highpart (SImode, operands[1]);
- operands[4] = gen_lowpart (SImode, operands[0]);
- operands[5] = gen_lowpart (SImode, operands[1]);"
- [(set_attr "length" "2")])
-
-(define_insn "*one_cmpldi2_sp64"
+(define_insn "one_cmpldi2"
[(set (match_operand:DI 0 "register_operand" "=r")
(not:DI (match_operand:DI 1 "arith_operand" "rI")))]
"TARGET_ARCH64"
[-- Attachment #3: 20151219-1.c --]
[-- Type: text/x-csrc, Size: 3536 bytes --]
/* PR rtl-optimization/68910 */
/* Reported by Sebastian Huber <sebastian.huber@embedded-brains.de> */
/* { dg-do compile } */
/* { dg-options "-O2 -mtune=supersparc" } */
typedef unsigned int size_t;
typedef long long unsigned int uint64_t;
extern void *memcpy (void *, const void *, size_t);
void
SHA512_Transform(uint64_t * state, const unsigned char block[128])
{
uint64_t W[80];
uint64_t S[8];
uint64_t t0, t1;
int i;
memcpy ((void *)W, (const void *)block, (size_t)128);
for (i = 16; i < 80; i++)
W[i] = (((W[i - 2] >> 19) | (W[i - 2] << (64 - 19))) ^ ((W[i - 2] >> 61) | (W[i - 2] << (64 - 61))) ^ (W[i - 2] >> 6)) + W[i - 7] + (((W[i - 15] >> 1) | (W[i - 15] << (64 - 1))) ^ ((W[i - 15] >> 8) | (W[i - 15] << (64 - 8))) ^ (W[i - 15] >> 7)) + W[i - 16];
memcpy (S, state, 64);
t0 = S[(87 - 0) % 8] + (((S[(84 - 0) % 8] >> 14) | (S[(84 - 0) % 8] << (64 - 14))) ^ ((S[(84 - 0) % 8] >> 18) | (S[(84 - 0) % 8] << (64 - 18))) ^ ((S[(84 - 0) % 8] >> 41) | (S[(84 - 0) % 8] << (64 - 41)))) + ((S[(84 - 0) % 8] & (S[(85 - 0) % 8] ^ S[(86 - 0) % 8])) ^ S[(86 - 0) % 8]) + W[0] + 0x428a2f98d728ae22ULL; t1 = (((S[(80 - 0) % 8] >> 28) | (S[(80 - 0) % 8] << (64 - 28))) ^ ((S[(80 - 0) % 8] >> 34) | (S[(80 - 0) % 8] << (64 - 34))) ^ ((S[(80 - 0) % 8] >> 39) | (S[(80 - 0) % 8] << (64 - 39)))) + ((S[(80 - 0) % 8] & (S[(81 - 0) % 8] | S[(82 - 0) % 8])) | (S[(81 - 0) % 8] & S[(82 - 0) % 8])); S[(83 - 0) % 8] += t0; S[(87 - 0) % 8] = t0 + t1;
t0 = S[(87 - 1) % 8] + (((S[(84 - 1) % 8] >> 14) | (S[(84 - 1) % 8] << (64 - 14))) ^ ((S[(84 - 1) % 8] >> 18) | (S[(84 - 1) % 8] << (64 - 18))) ^ ((S[(84 - 1) % 8] >> 41) | (S[(84 - 1) % 8] << (64 - 41)))) + ((S[(84 - 1) % 8] & (S[(85 - 1) % 8] ^ S[(86 - 1) % 8])) ^ S[(86 - 1) % 8]) + W[1] + 0x7137449123ef65cdULL; t1 = (((S[(80 - 1) % 8] >> 28) | (S[(80 - 1) % 8] << (64 - 28))) ^ ((S[(80 - 1) % 8] >> 34) | (S[(80 - 1) % 8] << (64 - 34))) ^ ((S[(80 - 1) % 8] >> 39) | (S[(80 - 1) % 8] << (64 - 39)))) + ((S[(80 - 1) % 8] & (S[(81 - 1) % 8] | S[(82 - 1) % 8])) | (S[(81 - 1) % 8] & S[(82 - 1) % 8])); S[(83 - 1) % 8] += t0; S[(87 - 1) % 8] = t0 + t1;
t0 = S[(87 - 2) % 8] + (((S[(84 - 2) % 8] >> 14) | (S[(84 - 2) % 8] << (64 - 14))) ^ ((S[(84 - 2) % 8] >> 18) | (S[(84 - 2) % 8] << (64 - 18))) ^ ((S[(84 - 2) % 8] >> 41) | (S[(84 - 2) % 8] << (64 - 41)))) + ((S[(84 - 2) % 8] & (S[(85 - 2) % 8] ^ S[(86 - 2) % 8])) ^ S[(86 - 2) % 8]) + W[2] + 0xb5c0fbcfec4d3b2fULL; t1 = (((S[(80 - 2) % 8] >> 28) | (S[(80 - 2) % 8] << (64 - 28))) ^ ((S[(80 - 2) % 8] >> 34) | (S[(80 - 2) % 8] << (64 - 34))) ^ ((S[(80 - 2) % 8] >> 39) | (S[(80 - 2) % 8] << (64 - 39)))) + ((S[(80 - 2) % 8] & (S[(81 - 2) % 8] | S[(82 - 2) % 8])) | (S[(81 - 2) % 8] & S[(82 - 2) % 8])); S[(83 - 2) % 8] += t0; S[(87 - 2) % 8] = t0 + t1;
t0 = S[(87 - 3) % 8] + (((S[(84 - 3) % 8] >> 14) | (S[(84 - 3) % 8] << (64 - 14))) ^ ((S[(84 - 3) % 8] >> 18) | (S[(84 - 3) % 8] << (64 - 18))) ^ ((S[(84 - 3) % 8] >> 41) | (S[(84 - 3) % 8] << (64 - 41)))) + ((S[(84 - 3) % 8] & (S[(85 - 3) % 8] ^ S[(86 - 3) % 8])) ^ S[(86 - 3) % 8]) + W[3] + 0xe9b5dba58189dbbcULL; t1 = (((S[(80 - 3) % 8] >> 28) | (S[(80 - 3) % 8] << (64 - 28))) ^ ((S[(80 - 3) % 8] >> 34) | (S[(80 - 3) % 8] << (64 - 34))) ^ ((S[(80 - 3) % 8] >> 39) | (S[(80 - 3) % 8] << (64 - 39)))) + ((S[(80 - 3) % 8] & (S[(81 - 3) % 8] | S[(82 - 3) % 8])) | (S[(81 - 3) % 8] & S[(82 - 3) % 8])); S[(83 - 3) % 8] += t0; S[(87 - 3) % 8] = t0 + t1;
for (i = 0; i < 8; i++)
state[i] += S[i];
}
/* { dg-final { scan-assembler-not "stx\t%" } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2015-12-19 22:03 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-19 22:03 Fix PR rtl-optimization/68910 Eric Botcazou
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).