* [committed] i386: Apply LRA reload workaround to insns with high registers [PR82524]
@ 2023-11-08 20:59 Uros Bizjak
0 siblings, 0 replies; only message in thread
From: Uros Bizjak @ 2023-11-08 20:59 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 4240 bytes --]
LRA is not able to reload zero_extracted in-out operand with matched input
operand in the same way as strict_low_part in-out operand. The patch
applies the strict_low_part workaround, where we allow LRA to generate
an instruction with non-matched input operand, which is split post reload
to the instruction that inserts non-matched input operand to an in-out
operand and the instruction that uses matched operand, also to
zero_extracted in-out operand case.
The generated code from the pr82524.c testcase improves from:
movl %esi, %ecx
movl %edi, %eax
movsbl %ch, %esi
addl %esi, %edx
movb %dl, %ah
to:
movl %edi, %eax
movl %esi, %ecx
movb %ch, %ah
addb %dl, %ah
The compiler is now also able to handle non-commutative operations:
movl %edi, %eax
movl %esi, %ecx
movb %ch, %ah
subb %dl, %ah
and unary operations:
movl %edi, %eax
movl %esi, %edx
movb %dh, %ah
negb %ah
The patch also robustifies split condition of the splitters to ensure that
only alternatives with unmatched operands are split.
PR target/82524
gcc/ChangeLog:
* config/i386/i386.md (*add<mode>_1_slp):
Split insn only for unmatched operand 0.
(*sub<mode>_1_slp): Ditto.
(*<any_logic:code><mode>_1_slp): Merge pattern from "*and<mode>_1_slp"
and "*<any_logic:code><mode>_1_slp" using any_logic code iterator.
Split insn only for unmatched operand 0.
(*neg<mode>1_slp): Split insn only for unmatched operand 0.
(*one_cmpl<mode>_1_slp): Ditto.
(*ashl<mode>3_1_slp): Ditto.
(*<any_shiftrt:insn><mode>_1_slp): Ditto.
(*<any_rotate:insn><mode>_1_slp): Ditto.
(*addqi_ext<mode>_1): Redefine as define_insn_and_split. Add
alternative 1 and split insn after reload for unmatched operand 0.
(*<plusminus:insn>qi_ext<mode>_2): Merge pattern from
"*addqi_ext<mode>_2" and "*subqi_ext<mode>_2" using plusminus code
iterator. Redefine as define_insn_and_split. Add alternative 1
and split insn after reload for unmatched operand 0.
(*subqi_ext<mode>_1): Redefine as define_insn_and_split. Add
alternative 1 and split insn after reload for unmatched operand 0.
(*<any_logic:code>qi_ext<mode>_0): Merge pattern from
"*andqi_ext<mode>_0" and and "*<any_logic:code>qi_ext<mode>_0" using
any_logic code iterator.
(*<any_logic:code>qi_ext<mode>_1): Merge pattern from
"*andqi_ext<mode>_1" and "*<any_logic:code>qi_ext<mode>_1" using
any_logic code iterator. Redefine as define_insn_and_split. Add
alternative 1 and split insn after reload for unmatched operand 0.
(*<any_logic:code>qi_ext<mode>_1_cc): Merge pattern from
"*andqi_ext<mode>_1_cc" and "*xorqi_ext<mode>_1_cc" using any_logic
code iterator. Redefine as define_insn_and_split. Add alternative 1
and split insn after reload for unmatched operand 0.
(*<any_logic:code>qi_ext<mode>_2): Merge pattern from
"*andqi_ext<mode>_2" and "*<any_or:code>qi_ext<mode>_2" using
any_logic code iterator. Redefine as define_insn_and_split. Add
alternative 1 and split insn after reload for unmatched operand 0.
(*<any_logic:code>qi_ext<mode>_3): Redefine as define_insn_and_split.
Add alternative 1 and split insn after reload for unmatched operand 0.
(*negqi_ext<mode>_1): Rename from "*negqi_ext<mode>_2". Add
alternative 1 and split insn after reload for unmatched operand 0.
(*one_cmplqi_ext<mode>_1): Ditto.
(*ashlqi_ext<mode>_1): Ditto.
(*<any_shiftrt:insn>qi_ext<mode>_1): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr78904-1.c (test_sub): New test.
* gcc.target/i386/pr78904-1a.c (test_sub): Ditto.
* gcc.target/i386/pr78904-1b.c (test_sub): Ditto.
* gcc.target/i386/pr78904-2.c (test_sub): Ditto.
* gcc.target/i386/pr78904-2a.c (test_sub): Ditto.
* gcc.target/i386/pr78904-2b.c (test_sub): Ditto.
* gcc.target/i386/pr78952-4.c (test_sub): Ditto.
* gcc.target/i386/pr82524.c: New test.
* gcc.target/i386/pr82524-1.c: New test.
* gcc.target/i386/pr82524-2.c: New test.
* gcc.target/i386/pr82524-3.c: New test.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 38106 bytes --]
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 99022990377..ce7102af44f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6596,7 +6596,9 @@ (define_insn_and_split "*add<mode>_1_slp"
return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- "&& reload_completed"
+ "&& reload_completed
+ && !(rtx_equal_p (operands[0], operands[1])
+ || rtx_equal_p (operands[0], operands[2]))"
[(set (strict_low_part (match_dup 0)) (match_dup 1))
(parallel
[(set (strict_low_part (match_dup 0))
@@ -7001,38 +7003,58 @@ (define_expand "addqi_ext_1"
(match_operand:QI 2 "const_int_operand")) 0))
(clobber (reg:CC FLAGS_REG))])])
-(define_insn "*addqi_ext<mode>_1"
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*addqi_ext<mode>_1"
[(set (zero_extract:SWI248
- (match_operand 0 "int248_register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q,&Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(plus:QI
(subreg:QI
(match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "0")
+ [(match_operand 1 "int248_register_operand" "0,!Q")
(const_int 8)
(const_int 8)]) 0)
- (match_operand:QI 2 "general_operand" "QnBn")) 0))
+ (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
(clobber (reg:CC FLAGS_REG))]
- "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
- rtx_equal_p (operands[0], operands[1])"
+ ""
{
+ if (which_alternative)
+ return "#";
+
switch (get_attr_type (insn))
{
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
return "inc{b}\t%h0";
else
- {
+ {
gcc_assert (operands[2] == constm1_rtx);
- return "dec{b}\t%h0";
- }
+ return "dec{b}\t%h0";
+ }
default:
return "add{b}\t{%2, %h0|%h0, %2}";
}
}
+ "reload_completed
+ && !rtx_equal_p (operands[0], operands[1])"
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (zero_extract:SWI248
+ (match_dup 1) (const_int 8) (const_int 8)))
+ (parallel
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (subreg:SWI248
+ (plus:QI
+ (subreg:QI
+ (match_op_dup 3
+ [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+ (match_dup 2)) 0))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
[(set_attr "addr" "gpr8")
(set (attr "type")
(if_then_else (match_operand:QI 2 "incdec_operand")
@@ -7040,28 +7062,49 @@ (define_insn "*addqi_ext<mode>_1"
(const_string "alu")))
(set_attr "mode" "QI")])
-(define_insn "*addqi_ext<mode>_2"
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*<insn>qi_ext<mode>_2"
[(set (zero_extract:SWI248
- (match_operand 0 "int248_register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q,&Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
- (plus:QI
+ (plusminus:QI
(subreg:QI
(match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "%0")
+ [(match_operand 1 "int248_register_operand" "<comm>0,!Q")
(const_int 8)
(const_int 8)]) 0)
(subreg:QI
(match_operator:SWI248 4 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q")
+ [(match_operand 2 "int248_register_operand" "Q,Q")
(const_int 8)
(const_int 8)]) 0)) 0))
- (clobber (reg:CC FLAGS_REG))]
- "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
- rtx_equal_p (operands[0], operands[1])
- || rtx_equal_p (operands[0], operands[2])"
- "add{b}\t{%h2, %h0|%h0, %h2}"
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "@
+ <insn>{b}\t{%h2, %h0|%h0, %h2}
+ #"
+ "reload_completed
+ && !(rtx_equal_p (operands[0], operands[1])
+ || (<CODE> == PLUS && rtx_equal_p (operands[0], operands[2])))"
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (zero_extract:SWI248
+ (match_dup 1) (const_int 8) (const_int 8)))
+ (parallel
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (subreg:SWI248
+ (plusminus:QI
+ (subreg:QI
+ (match_op_dup 3
+ [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+ (subreg:QI
+ (match_op_dup 4
+ [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
[(set_attr "type" "alu")
(set_attr "mode" "QI")])
@@ -7570,7 +7613,8 @@ (define_insn_and_split "*sub<mode>_1_slp"
"@
sub{<imodesuffix>}\t{%2, %0|%0, %2}
#"
- "&& reload_completed"
+ "&& reload_completed
+ && !(rtx_equal_p (operands[0], operands[1]))"
[(set (strict_low_part (match_dup 0)) (match_dup 1))
(parallel
[(set (strict_low_part (match_dup 0))
@@ -7627,28 +7671,44 @@ (define_insn "*subqi_ext<mode>_0"
(set_attr "type" "alu")
(set_attr "mode" "QI")])
-(define_insn "*subqi_ext<mode>_2"
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*subqi_ext<mode>_1"
[(set (zero_extract:SWI248
- (match_operand 0 "int248_register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q,&Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(minus:QI
(subreg:QI
(match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "0")
+ [(match_operand 1 "int248_register_operand" "0,!Q")
(const_int 8)
(const_int 8)]) 0)
- (subreg:QI
- (match_operator:SWI248 4 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)) 0))
- (clobber (reg:CC FLAGS_REG))]
- "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
- rtx_equal_p (operands[0], operands[1])"
- "sub{b}\t{%h2, %h0|%h0, %h2}"
- [(set_attr "type" "alu")
+ (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "@
+ sub{b}\t{%2, %h0|%h0, %2}
+ #"
+ "reload_completed
+ && !(rtx_equal_p (operands[0], operands[1]))"
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (zero_extract:SWI248
+ (match_dup 1) (const_int 8) (const_int 8)))
+ (parallel
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (subreg:SWI248
+ (minus:QI
+ (subreg:QI
+ (match_op_dup 3
+ [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+ (match_dup 2)) 0))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ [(set_attr "addr" "gpr8")
+ (set_attr "type" "alu")
(set_attr "mode" "QI")])
;; Subtract with jump on overflow.
@@ -11338,20 +11398,22 @@ (define_insn "*andqi_1"
(symbol_ref "true")))])
;; Alternative 1 is needed to work around LRA limitation, see PR82524.
-(define_insn_and_split "*and<mode>_1_slp"
+(define_insn_and_split "*<code><mode>_1_slp"
[(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
- (and:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
- (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
+ (any_logic:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
+ (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
"@
- and{<imodesuffix>}\t{%2, %0|%0, %2}
+ <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
#"
- "&& reload_completed"
+ "&& reload_completed
+ && !(rtx_equal_p (operands[0], operands[1])
+ || rtx_equal_p (operands[0], operands[2]))"
[(set (strict_low_part (match_dup 0)) (match_dup 1))
(parallel
[(set (strict_low_part (match_dup 0))
- (and:SWI12 (match_dup 0) (match_dup 2)))
+ (any_logic:SWI12 (match_dup 0) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
""
[(set_attr "type" "alu")
@@ -11528,9 +11590,9 @@ (define_insn "*and<mode>_2"
[(set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
-(define_insn "*andqi_ext<mode>_0"
+(define_insn "*<code>qi_ext<mode>_0"
[(set (match_operand:QI 0 "nonimmediate_operand" "=QBn")
- (and:QI
+ (any_logic:QI
(subreg:QI
(match_operator:SWI248 3 "extract_operator"
[(match_operand 2 "int248_register_operand" "Q")
@@ -11539,7 +11601,7 @@ (define_insn "*andqi_ext<mode>_0"
(match_operand:QI 1 "nonimmediate_operand" "0")))
(clobber (reg:CC FLAGS_REG))]
""
- "and{b}\t{%h2, %0|%0, %h2}"
+ "<logic>{b}\t{%h2, %0|%0, %h2}"
[(set_attr "addr" "gpr8")
(set_attr "type" "alu")
(set_attr "mode" "QI")])
@@ -11558,86 +11620,180 @@ (define_expand "andqi_ext_1"
(match_operand:QI 2 "const_int_operand")) 0))
(clobber (reg:CC FLAGS_REG))])])
-(define_insn "*andqi_ext<mode>_1"
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*<code>qi_ext<mode>_1"
[(set (zero_extract:SWI248
- (match_operand 0 "int248_register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q,&Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
- (and:QI
+ (any_logic:QI
(subreg:QI
(match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "0")
+ [(match_operand 1 "int248_register_operand" "0,!Q")
(const_int 8)
(const_int 8)]) 0)
- (match_operand:QI 2 "general_operand" "QnBn")) 0))
+ (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
(clobber (reg:CC FLAGS_REG))]
- "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
- rtx_equal_p (operands[0], operands[1])"
- "and{b}\t{%2, %h0|%h0, %2}"
+ ""
+ "@
+ <logic>{b}\t{%2, %h0|%h0, %2}
+ #"
+ "reload_completed
+ && !(rtx_equal_p (operands[0], operands[1]))"
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (zero_extract:SWI248
+ (match_dup 1) (const_int 8) (const_int 8)))
+ (parallel
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (subreg:SWI248
+ (any_logic:QI
+ (subreg:QI
+ (match_op_dup 3
+ [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+ (match_dup 2)) 0))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
[(set_attr "addr" "gpr8")
(set_attr "type" "alu")
(set_attr "mode" "QI")])
-;; Generated by peephole translating test to and. This shows up
-;; often in fp comparisons.
-(define_insn "*andqi_ext<mode>_1_cc"
- [(set (reg FLAGS_REG)
- (compare
- (and:QI
- (subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "0")
- (const_int 8)
- (const_int 8)]) 0)
- (match_operand:QI 2 "general_operand" "QnBn"))
- (const_int 0)))
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*<code>qi_ext<mode>_1_cc"
+ [(set (match_operand 4 "flags_reg_operand")
+ (match_operator 5 "compare_operator"
+ [(any_logic:QI
+ (subreg:QI
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "0,!Q")
+ (const_int 8)
+ (const_int 8)]) 0)
+ (match_operand:QI 2 "general_operand" "QnBn,QnBn"))
+ (const_int 0)]))
(set (zero_extract:SWI248
- (match_operand 0 "int248_register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q,&Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
- (and:QI
+ (any_logic:QI
(subreg:QI
(match_op_dup 3
- [(match_dup 1)
- (const_int 8)
- (const_int 8)]) 0)
+ [(match_dup 0) (const_int 8) (const_int 8)]) 0)
(match_dup 2)) 0))]
- "ix86_match_ccmode (insn, CCNOmode)
- /* FIXME: without this LRA can't reload this pattern, see PR82524. */
- && rtx_equal_p (operands[0], operands[1])"
- "and{b}\t{%2, %h0|%h0, %2}"
+ "ix86_match_ccmode (insn, CCNOmode)"
+ "@
+ <logic>{b}\t{%2, %h0|%h0, %2}
+ #"
+ "&& reload_completed
+ && !(rtx_equal_p (operands[0], operands[1]))"
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (zero_extract:SWI248
+ (match_dup 1) (const_int 8) (const_int 8)))
+ (parallel
+ [(set (match_dup 4)
+ (match_op_dup 5
+ [(any_logic:QI
+ (subreg:QI
+ (match_op_dup 3
+ [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+ (match_dup 2))
+ (const_int 0)]))
+ (set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (subreg:SWI248
+ (any_logic:QI
+ (subreg:QI
+ (match_op_dup 3
+ [(match_dup 1) (const_int 8) (const_int 8)]) 0)
+ (match_dup 2)) 0))])]
+ ""
[(set_attr "addr" "gpr8")
(set_attr "type" "alu")
(set_attr "mode" "QI")])
-(define_insn "*andqi_ext<mode>_2"
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*<code>qi_ext<mode>_2"
[(set (zero_extract:SWI248
- (match_operand 0 "int248_register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q,&Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
- (and:QI
+ (any_logic:QI
(subreg:QI
(match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "%0")
+ [(match_operand 1 "int248_register_operand" "%0,!Q")
(const_int 8)
(const_int 8)]) 0)
(subreg:QI
(match_operator:SWI248 4 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q")
+ [(match_operand 2 "int248_register_operand" "Q,Q")
(const_int 8)
(const_int 8)]) 0)) 0))
(clobber (reg:CC FLAGS_REG))]
- "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
- rtx_equal_p (operands[0], operands[1])
- || rtx_equal_p (operands[0], operands[2])"
- "and{b}\t{%h2, %h0|%h0, %h2}"
+ ""
+ "@
+ <logic>{b}\t{%h2, %h0|%h0, %h2}
+ #"
+ "reload_completed
+ && !(rtx_equal_p (operands[0], operands[1])
+ || rtx_equal_p (operands[0], operands[2]))"
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (zero_extract:SWI248
+ (match_dup 1) (const_int 8) (const_int 8)))
+ (parallel
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (subreg:SWI248
+ (any_logic:QI
+ (subreg:QI
+ (match_op_dup 3
+ [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+ (subreg:QI
+ (match_op_dup 4
+ [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
[(set_attr "type" "alu")
(set_attr "mode" "QI")])
-;; *andqi_ext<mode>_3 is defined via *<code>qi_ext<mode>_3 below.
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*<code>qi_ext<mode>_3"
+ [(set (zero_extract:SWI248
+ (match_operand 0 "int248_register_operand" "+Q,&Q")
+ (const_int 8)
+ (const_int 8))
+ (match_operator:SWI248 3 "extract_operator"
+ [(any_logic
+ (match_operand 1 "int248_register_operand" "%0,!Q")
+ (match_operand 2 "int248_register_operand" "Q,Q"))
+ (const_int 8)
+ (const_int 8)]))
+ (clobber (reg:CC FLAGS_REG))]
+ "GET_MODE (operands[1]) == GET_MODE (operands[2])"
+ "@
+ <logic>{b}\t{%h2, %h0|%h0, %h2}
+ #"
+ "&& reload_completed
+ && !(rtx_equal_p (operands[0], operands[1])
+ || rtx_equal_p (operands[0], operands[2]))"
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (zero_extract:SWI248
+ (match_dup 1) (const_int 8) (const_int 8)))
+ (parallel
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (match_op_dup 3
+ [(any_logic (match_dup 4) (match_dup 2))
+ (const_int 8) (const_int 8)]))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[4] = gen_lowpart (GET_MODE (operands[1]), operands[0]);"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
;; Convert wide AND instructions with immediate operand to shorter QImode
;; equivalents when possible.
@@ -12166,26 +12322,6 @@ (define_insn_and_split "*notxorqi_1"
(symbol_ref "!TARGET_PARTIAL_REG_STALL")]
(symbol_ref "true")))])
-;; Alternative 1 is needed to work around LRA limitation, see PR82524.
-(define_insn_and_split "*<code><mode>_1_slp"
- [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
- (any_or:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
- (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
- (clobber (reg:CC FLAGS_REG))]
- "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
- "@
- <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
- #"
- "&& reload_completed"
- [(set (strict_low_part (match_dup 0)) (match_dup 1))
- (parallel
- [(set (strict_low_part (match_dup 0))
- (any_or:SWI12 (match_dup 0) (match_dup 2)))
- (clobber (reg:CC FLAGS_REG))])]
- ""
- [(set_attr "type" "alu")
- (set_attr "mode" "<MODE>")])
-
;; convert (sign_extend:WIDE (any_logic:NARROW (memory, immediate)))
;; to (any_logic:WIDE (sign_extend (memory)), (sign_extend (immediate))).
;; This eliminates sign extension after logic operation.
@@ -12276,90 +12412,6 @@ (define_insn "*<code><mode>_3"
[(set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
-(define_insn "*<code>qi_ext<mode>_0"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn")
- (any_or:QI
- (subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)
- (match_operand:QI 1 "nonimmediate_operand" "0")))
- (clobber (reg:CC FLAGS_REG))]
- ""
- "<logic>{b}\t{%h2, %0|%0, %h2}"
- [(set_attr "addr" "gpr8")
- (set_attr "type" "alu")
- (set_attr "mode" "QI")])
-
-(define_insn "*<code>qi_ext<mode>_1"
- [(set (zero_extract:SWI248
- (match_operand 0 "int248_register_operand" "+Q")
- (const_int 8)
- (const_int 8))
- (subreg:SWI248
- (any_or:QI
- (subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "0")
- (const_int 8)
- (const_int 8)]) 0)
- (match_operand:QI 2 "general_operand" "QnBn")) 0))
- (clobber (reg:CC FLAGS_REG))]
- "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
- /* FIXME: without this LRA can't reload this pattern, see PR82524. */
- && rtx_equal_p (operands[0], operands[1])"
- "<logic>{b}\t{%2, %h0|%h0, %2}"
- [(set_attr "addr" "gpr8")
- (set_attr "type" "alu")
- (set_attr "mode" "QI")])
-
-(define_insn "*<code>qi_ext<mode>_2"
- [(set (zero_extract:SWI248
- (match_operand 0 "int248_register_operand" "+Q")
- (const_int 8)
- (const_int 8))
- (subreg:SWI248
- (any_or:QI
- (subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "%0")
- (const_int 8)
- (const_int 8)]) 0)
- (subreg:QI
- (match_operator:SWI248 4 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)) 0))
- (clobber (reg:CC FLAGS_REG))]
- "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
- /* FIXME: without this LRA can't reload this pattern, see PR82524. */
- && (rtx_equal_p (operands[0], operands[1])
- || rtx_equal_p (operands[0], operands[2]))"
- "<logic>{b}\t{%h2, %h0|%h0, %h2}"
- [(set_attr "type" "alu")
- (set_attr "mode" "QI")])
-
-(define_insn "*<code>qi_ext<mode>_3"
- [(set (zero_extract:SWI248
- (match_operand 0 "int248_register_operand" "+Q")
- (const_int 8)
- (const_int 8))
- (zero_extract:SWI248
- (any_logic:SWI248
- (match_operand 1 "int248_register_operand" "%0")
- (match_operand 2 "int248_register_operand" "Q"))
- (const_int 8)
- (const_int 8)))
- (clobber (reg:CC FLAGS_REG))]
- "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
- /* FIXME: without this LRA can't reload this pattern, see PR82524. */
- && (rtx_equal_p (operands[0], operands[1])
- || rtx_equal_p (operands[0], operands[2]))"
- "<logic>{b}\t{%h2, %h0|%h0, %h2}"
- [(set_attr "type" "alu")
- (set_attr "mode" "QI")])
-
;; Convert wide OR instructions with immediate operand to shorter QImode
;; equivalents when possible.
;; Don't do the splitting with memory operands, since it introduces risk
@@ -12443,37 +12495,6 @@ (define_expand "xorqi_ext_1_cc"
(const_int 8)) 0)
(match_dup 2)) 0))])])
-(define_insn "*xorqi_ext<mode>_1_cc"
- [(set (reg FLAGS_REG)
- (compare
- (xor:QI
- (subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "0")
- (const_int 8)
- (const_int 8)]) 0)
- (match_operand:QI 2 "general_operand" "QnBn"))
- (const_int 0)))
- (set (zero_extract:SWI248
- (match_operand 0 "int248_register_operand" "+Q")
- (const_int 8)
- (const_int 8))
- (subreg:SWI248
- (xor:QI
- (subreg:QI
- (match_op_dup 3
- [(match_dup 1)
- (const_int 8)
- (const_int 8)]) 0)
- (match_dup 2)) 0))]
- "ix86_match_ccmode (insn, CCNOmode)
- /* FIXME: without this LRA can't reload this pattern, see PR82524. */
- && rtx_equal_p (operands[0], operands[1])"
- "xor{b}\t{%2, %h0|%h0, %2}"
- [(set_attr "addr" "gpr8")
- (set_attr "type" "alu")
- (set_attr "mode" "QI")])
-
;; Peephole2 rega = 0; rega op= regb into rega = regb.
(define_peephole2
[(parallel [(set (match_operand:SWI 0 "general_reg_operand")
@@ -12813,7 +12834,8 @@ (define_insn_and_split "*neg<mode>_1_slp"
"@
neg{<imodesuffix>}\t%0
#"
- "&& reload_completed"
+ "&& reload_completed
+ && !(rtx_equal_p (operands[0], operands[1]))"
[(set (strict_low_part (match_dup 0)) (match_dup 1))
(parallel
[(set (strict_low_part (match_dup 0))
@@ -12881,22 +12903,40 @@ (define_expand "x86_neg<mode>_ccc"
(set (match_operand:SWI48 0 "register_operand")
(neg:SWI48 (match_dup 1)))])])
-(define_insn "*negqi_ext<mode>_2"
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*negqi_ext<mode>_1"
[(set (zero_extract:SWI248
- (match_operand 0 "int248_register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q,&Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(neg:QI
(subreg:QI
(match_operator:SWI248 2 "extract_operator"
- [(match_operand 1 "int248_register_operand" "0")
+ [(match_operand 1 "int248_register_operand" "0,!Q")
(const_int 8)
(const_int 8)]) 0)) 0))
(clobber (reg:CC FLAGS_REG))]
- "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
- rtx_equal_p (operands[0], operands[1])"
- "neg{b}\t%h0"
+ ""
+ "@
+ neg{b}\t%h0
+ #"
+ "reload_completed
+ && !(rtx_equal_p (operands[0], operands[1]))"
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (zero_extract:SWI248
+ (match_dup 1) (const_int 8) (const_int 8)))
+ (parallel
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (subreg:SWI248
+ (neg:QI
+ (subreg:QI
+ (match_op_dup 2
+ [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
[(set_attr "type" "negnot")
(set_attr "mode" "QI")])
@@ -13420,7 +13460,8 @@ (define_insn_and_split "*one_cmpl<mode>_1_slp"
"@
not{<imodesuffix>}\t%0
#"
- "&& reload_completed"
+ "&& reload_completed
+ && !(rtx_equal_p (operands[0], operands[1]))"
[(set (strict_low_part (match_dup 0)) (match_dup 1))
(set (strict_low_part (match_dup 0))
(not:SWI12 (match_dup 0)))]
@@ -13479,6 +13520,40 @@ (define_split
(const_int 0)]))
(set (match_dup 1)
(zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])])
+
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*one_cmplqi_ext<mode>_1"
+ [(set (zero_extract:SWI248
+ (match_operand 0 "int248_register_operand" "+Q,&Q")
+ (const_int 8)
+ (const_int 8))
+ (subreg:SWI248
+ (not:QI
+ (subreg:QI
+ (match_operator:SWI248 2 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "0,!Q")
+ (const_int 8)
+ (const_int 8)]) 0)) 0))]
+ ""
+ "@
+ not{b}\t%h0
+ #"
+ "reload_completed
+ && !(rtx_equal_p (operands[0], operands[1]))"
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (zero_extract:SWI248
+ (match_dup 1) (const_int 8) (const_int 8)))
+ (set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (subreg:SWI248
+ (not:QI
+ (subreg:QI
+ (match_op_dup 2
+ [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))]
+ ""
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "QI")])
\f
;; Shift instructions
@@ -14254,7 +14329,8 @@ (define_insn_and_split "*ashl<mode>3_1_slp"
return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- "&& reload_completed"
+ "&& reload_completed
+ && !(rtx_equal_p (operands[0], operands[1]))"
[(set (strict_low_part (match_dup 0)) (match_dup 1))
(parallel
[(set (strict_low_part (match_dup 0))
@@ -14458,23 +14534,26 @@ (define_insn "*ashl<mode>3_cconly"
(const_string "*")))
(set_attr "mode" "<MODE>")])
-(define_insn "*ashlqi_ext<mode>_2"
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*ashlqi_ext<mode>_1"
[(set (zero_extract:SWI248
- (match_operand 0 "int248_register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q,&Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(ashift:QI
(subreg:QI
(match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "0")
+ [(match_operand 1 "int248_register_operand" "0,!Q")
(const_int 8)
(const_int 8)]) 0)
- (match_operand:QI 2 "nonmemory_operand" "cI")) 0))
- (clobber (reg:CC FLAGS_REG))]
- "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
- rtx_equal_p (operands[0], operands[1])"
+ (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
{
+ if (which_alternative)
+ return "#";
+
switch (get_attr_type (insn))
{
case TYPE_ALU:
@@ -14489,6 +14568,22 @@ (define_insn "*ashlqi_ext<mode>_2"
return "sal{b}\t{%2, %h0|%h0, %2}";
}
}
+ "reload_completed
+ && !(rtx_equal_p (operands[0], operands[1]))"
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (match_dup 1))
+ (parallel
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (subreg:SWI248
+ (ashift:QI
+ (subreg:QI
+ (match_op_dup 3
+ [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+ (match_dup 2)) 0))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
[(set (attr "type")
(cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 2 "const1_operand"))
@@ -15247,7 +15342,8 @@ (define_insn_and_split "*<insn><mode>3_1_slp"
else
return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
}
- "&& reload_completed"
+ "&& reload_completed
+ && !(rtx_equal_p (operands[0], operands[1]))"
[(set (strict_low_part (match_dup 0)) (match_dup 1))
(parallel
[(set (strict_low_part (match_dup 0))
@@ -15361,29 +15457,48 @@ (define_insn "*<insn><mode>3_cconly"
(const_string "*")))
(set_attr "mode" "<MODE>")])
-(define_insn "*<insn>qi_ext<mode>_2"
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*<insn>qi_ext<mode>_1"
[(set (zero_extract:SWI248
- (match_operand 0 "int248_register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q,&Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(any_shiftrt:QI
(subreg:QI
(match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "0")
+ [(match_operand 1 "int248_register_operand" "0,!Q")
(const_int 8)
(const_int 8)]) 0)
- (match_operand:QI 2 "nonmemory_operand" "cI")) 0))
- (clobber (reg:CC FLAGS_REG))]
- "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
- rtx_equal_p (operands[0], operands[1])"
+ (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
{
+ if (which_alternative)
+ return "#";
+
if (operands[2] == const1_rtx
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
return "<shift>{b}\t%h0";
else
return "<shift>{b}\t{%2, %h0|%h0, %2}";
}
+ "reload_completed
+ && !(rtx_equal_p (operands[0], operands[1]))"
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (match_dup 1))
+ (parallel
+ [(set (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8))
+ (subreg:SWI248
+ (any_shiftrt:QI
+ (subreg:QI
+ (match_op_dup 3
+ [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+ (match_dup 2)) 0))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
[(set_attr "type" "ishift")
(set (attr "length_immediate")
(if_then_else
@@ -15875,7 +15990,8 @@ (define_insn_and_split "*<insn><mode>3_1_slp"
else
return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
}
- "&& reload_completed"
+ "&& reload_completed
+ && !(rtx_equal_p (operands[0], operands[1]))"
[(set (strict_low_part (match_dup 0)) (match_dup 1))
(parallel
[(set (strict_low_part (match_dup 0))
diff --git a/gcc/testsuite/gcc.target/i386/pr78904-1.c b/gcc/testsuite/gcc.target/i386/pr78904-1.c
index d27d7fd651d..ed5403f8067 100644
--- a/gcc/testsuite/gcc.target/i386/pr78904-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr78904-1.c
@@ -46,3 +46,12 @@ struct S1 test_add (struct S1 a, struct S1 b)
}
/* { dg-final { scan-assembler "\[ \t\]addb" } } */
+
+struct S1 test_sub (struct S1 a, struct S1 b)
+{
+ a.val -= b.val;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr78904-1a.c b/gcc/testsuite/gcc.target/i386/pr78904-1a.c
index 7746477d745..aa9273eeb64 100644
--- a/gcc/testsuite/gcc.target/i386/pr78904-1a.c
+++ b/gcc/testsuite/gcc.target/i386/pr78904-1a.c
@@ -45,3 +45,12 @@ struct S1 test_add (struct S1 a, struct S1 b)
}
/* { dg-final { scan-assembler "\[ \t\]addb" } } */
+
+struct S1 test_sub (struct S1 a, struct S1 b)
+{
+ a.val -= b.val;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr78904-1b.c b/gcc/testsuite/gcc.target/i386/pr78904-1b.c
index 20b677252ab..0687c95e912 100644
--- a/gcc/testsuite/gcc.target/i386/pr78904-1b.c
+++ b/gcc/testsuite/gcc.target/i386/pr78904-1b.c
@@ -47,3 +47,12 @@ struct S1 test_add (struct S1 a, struct S1 b)
}
/* { dg-final { scan-assembler "\[ \t\]addb" } } */
+
+struct S1 test_sub (struct S1 a, struct S1 b)
+{
+ a.val -= b.val;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr78904-2.c b/gcc/testsuite/gcc.target/i386/pr78904-2.c
index 0cc4aaa91ea..3e9389ec20d 100644
--- a/gcc/testsuite/gcc.target/i386/pr78904-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr78904-2.c
@@ -47,3 +47,12 @@ struct S1 test_add (struct S1 a)
}
/* { dg-final { scan-assembler "\[ \t\]addb\[ \t\]+t\[^\n\r]*, %.h" } } */
+
+struct S1 test_sub (struct S1 a)
+{
+ a.val -= t.val;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb\[ \t\]+t\[^\n\r]*, %.h" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr78904-2a.c b/gcc/testsuite/gcc.target/i386/pr78904-2a.c
index 41eaa259158..f0c5979c821 100644
--- a/gcc/testsuite/gcc.target/i386/pr78904-2a.c
+++ b/gcc/testsuite/gcc.target/i386/pr78904-2a.c
@@ -46,3 +46,12 @@ struct S1 test_add (struct S1 a)
}
/* { dg-final { scan-assembler "\[ \t\]addb\[ \t\]+t\[^\n\r]*, %.h" } } */
+
+struct S1 test_sub (struct S1 a)
+{
+ a.val -= t.val;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb\[ \t\]+t\[^\n\r]*, %.h" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr78904-2b.c b/gcc/testsuite/gcc.target/i386/pr78904-2b.c
index 23e975ac93e..e6154e6d918 100644
--- a/gcc/testsuite/gcc.target/i386/pr78904-2b.c
+++ b/gcc/testsuite/gcc.target/i386/pr78904-2b.c
@@ -48,3 +48,12 @@ struct S1 test_add (struct S1 a)
}
/* { dg-final { scan-assembler "\[ \t\]addb\[ \t\]+t\[^\n\r]*, %.h" } } */
+
+struct S1 test_sub (struct S1 a)
+{
+ a.val -= t.val;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb\[ \t\]+t\[^\n\r]*, %.h" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr78952-4.c b/gcc/testsuite/gcc.target/i386/pr78952-4.c
index c7bd63c9543..d9979672945 100644
--- a/gcc/testsuite/gcc.target/i386/pr78952-4.c
+++ b/gcc/testsuite/gcc.target/i386/pr78952-4.c
@@ -46,3 +46,12 @@ struct S1 test_add (struct S1 a, struct S1 b)
}
/* { dg-final { scan-assembler "\[ \t\]addb" } } */
+
+struct S1 test_sub (struct S1 a, struct S1 b)
+{
+ a.val -= b.val;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82524-1.c b/gcc/testsuite/gcc.target/i386/pr82524-1.c
new file mode 100644
index 00000000000..6539630900a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82524-1.c
@@ -0,0 +1,63 @@
+/* PR target/82524 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-mregparm=3" { target ia32 } } */
+/* { dg-final { scan-assembler-not "mov\[sz\]bl" } } */
+
+struct S
+{
+ char pad1;
+ char val;
+ char pad2;
+ char pad3;
+};
+
+struct S
+test_plus (struct S a, struct S b, struct S c)
+{
+ a.val = b.val + c.val;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]addb" } } */
+
+struct S
+test_minus (struct S a, struct S b, struct S c)
+{
+ a.val = b.val - c.val;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb" } } */
+
+struct S
+test_and (struct S a, struct S b, struct S c)
+{
+ a.val = b.val & c.val;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]andb" } } */
+
+struct S
+test_or (struct S a, struct S b, struct S c)
+{
+ a.val = b.val | c.val;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]orb" } } */
+
+struct S
+test_xor (struct S a, struct S b, struct S c)
+{
+ a.val = b.val ^ c.val;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]xorb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82524-2.c b/gcc/testsuite/gcc.target/i386/pr82524-2.c
new file mode 100644
index 00000000000..766dd1aae1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82524-2.c
@@ -0,0 +1,63 @@
+/* PR target/82524 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-mregparm=3" { target ia32 } } */
+/* { dg-final { scan-assembler-not "mov\[sz\]bl" } } */
+
+struct S
+{
+ char pad1;
+ char val;
+ char pad2;
+ char pad3;
+};
+
+struct S
+test_plus (struct S a, struct S b, char *c)
+{
+ a.val = b.val + *c;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]addb" } } */
+
+struct S
+test_minus (struct S a, struct S b, char *c)
+{
+ a.val = b.val - *c;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb" } } */
+
+struct S
+test_and (struct S a, struct S b, char *c)
+{
+ a.val = b.val & *c;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]andb" } } */
+
+struct S
+test_or (struct S a, struct S b, char *c)
+{
+ a.val = b.val | *c;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]orb" } } */
+
+struct S
+test_xor (struct S a, struct S b, char *c)
+{
+ a.val = b.val ^ *c;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]xorb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82524-3.c b/gcc/testsuite/gcc.target/i386/pr82524-3.c
new file mode 100644
index 00000000000..7a66712193e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82524-3.c
@@ -0,0 +1,42 @@
+/* PR target/82524 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-final { scan-assembler-not "movzbl" } } */
+
+struct S
+{
+ unsigned char pad1;
+ unsigned char val;
+ unsigned short pad2;
+ unsigned int pad3;
+};
+
+struct S
+test_and (struct S a, struct S b, struct S c)
+{
+ a.val = b.val & c.val;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]andb" } } */
+
+struct S
+test_or (struct S a, struct S b, struct S c)
+{
+ a.val = b.val | c.val;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]orb" } } */
+
+struct S
+test_xor (struct S a, struct S b, struct S c)
+{
+ a.val = b.val ^ c.val;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]xorb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82524.c b/gcc/testsuite/gcc.target/i386/pr82524.c
new file mode 100644
index 00000000000..058f0a2d14d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82524.c
@@ -0,0 +1,83 @@
+/* PR target/82524 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-mregparm=3" { target ia32 } } */
+/* { dg-final { scan-assembler-not "mov\[sz\]bl" } } */
+
+struct S
+{
+ char pad1;
+ char val;
+ char pad2;
+ char pad3;
+};
+
+struct S
+test_plus (struct S a, struct S b, char c)
+{
+ a.val = b.val + c;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]addb" } } */
+
+struct S
+test_minus (struct S a, struct S b, char c)
+{
+ a.val = b.val - c;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb" } } */
+
+struct S
+test_neg (struct S a, struct S b)
+{
+ a.val = -b.val;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]negb" } } */
+
+struct S
+test_and (struct S a, struct S b, char c)
+{
+ a.val = b.val & c;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]andb" } } */
+
+struct S
+test_or (struct S a, struct S b, char c)
+{
+ a.val = b.val | c;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]orb" } } */
+
+struct S
+test_xor (struct S a, struct S b, char c)
+{
+ a.val = b.val ^ c;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]xorb" } } */
+
+struct S
+test_not (struct S a, struct S b)
+{
+ a.val = ~b.val;
+
+ return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]notb" } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-11-08 20:59 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-08 20:59 [committed] i386: Apply LRA reload workaround to insns with high registers [PR82524] Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).