* [PATCH] i386: Emit compares between high registers and memory
@ 2023-04-19 15:06 Uros Bizjak
0 siblings, 0 replies; only message in thread
From: Uros Bizjak @ 2023-04-19 15:06 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 1497 bytes --]
Following code:
typedef __SIZE_TYPE__ size_t;
struct S1s
{
char pad1;
char val;
short pad2;
};
extern char ts[256];
_Bool foo (struct S1s a, size_t i)
{
return (ts[i] > a.val);
}
compiles with -O2 to:
movl %edi, %eax
movsbl %ah, %edi
cmpb %dil, ts(%rsi)
setg %al
ret
the compare could use high register %ah instead of %dil:
movl %edi, %eax
cmpb ts(%rsi), %ah
setl %al
ret
Use any_extract code iterator to handle signed and unsigned extracts
from high register and introduce peephole2 patterns to propagate
norex memory operand into the compare insn.
gcc/ChangeLog:
PR target/78904
PR target/78952
* config/i386/i386.md (*cmpqi_ext<mode>_1_mem_rex64): New insn pattern.
(*cmpqi_ext<mode>_1): Use nonimmediate_operand predicate
for operand 0. Use any_extract code iterator.
(*cmpqi_ext<mode>_1 peephole2): New peephole2 pattern.
(*cmpqi_ext<mode>_2): Use any_extract code iterator.
(*cmpqi_ext<mode>_3_mem_rex64): New insn pattern.
(*cmpqi_ext<mode>_1): Use general_operand predicate
for operand 1. Use any_extract code iterator.
(*cmpqi_ext<mode>_3 peephole2): New peephole2 pattern.
(*cmpqi_ext<mode>_4): Use any_extract code iterator.
gcc/testsuite/ChangeLog:
PR target/78904
PR target/78952
* gcc.target/i386/pr78952-3.c: New test.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Pushed to master.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 5889 bytes --]
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 1419ea4cff3..0f95d8e8918 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1005,6 +1005,9 @@ (define_code_attr absneg_mnemonic [(abs "fabs") (neg "fchs")])
;; Mapping of extend operators
(define_code_iterator any_extend [sign_extend zero_extend])
+;; Mapping of extract operators
+(define_code_iterator any_extract [sign_extract zero_extract])
+
;; Mapping of highpart multiply operators
(define_code_iterator any_mul_highpart [smul_highpart umul_highpart])
@@ -1454,12 +1457,27 @@ (define_insn "*cmp<mode>_minus_1"
[(set_attr "type" "icmp")
(set_attr "mode" "<MODE>")])
+(define_insn "*cmpqi_ext<mode>_1_mem_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (match_operand:QI 0 "norex_memory_operand" "Bn")
+ (subreg:QI
+ (any_extract:SWI248
+ (match_operand 1 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)))]
+ "TARGET_64BIT && reload_completed
+ && ix86_match_ccmode (insn, CCmode)"
+ "cmp{b}\t{%h1, %0|%0, %h1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
(define_insn "*cmpqi_ext<mode>_1"
[(set (reg FLAGS_REG)
(compare
- (match_operand:QI 0 "nonimm_x64constmem_operand" "QBc,m")
+ (match_operand:QI 0 "nonimmediate_operand" "QBc,m")
(subreg:QI
- (zero_extract:SWI248
+ (any_extract:SWI248
(match_operand 1 "int248_register_operand" "Q,Q")
(const_int 8)
(const_int 8)) 0)))]
@@ -1469,11 +1487,33 @@ (define_insn "*cmpqi_ext<mode>_1"
(set_attr "type" "icmp")
(set_attr "mode" "QI")])
+(define_peephole2
+ [(set (match_operand:QI 0 "register_operand")
+ (match_operand:QI 1 "norex_memory_operand"))
+ (set (match_operand 3 "flags_reg_operand")
+ (match_operator 4 "compare_operator"
+ [(match_dup 0)
+ (subreg:QI
+ (any_extract:SWI248
+ (match_operand 2 "int248_register_operand")
+ (const_int 8)
+ (const_int 8)) 0)]))]
+ "TARGET_64BIT
+ && peep2_reg_dead_p (2, operands[0])"
+ [(set (match_dup 3)
+ (match_op_dup 4
+ [(match_dup 1)
+ (subreg:QI
+ (any_extract:SWI248
+ (match_dup 2)
+ (const_int 8)
+ (const_int 8)) 0)]))])
+
(define_insn "*cmpqi_ext<mode>_2"
[(set (reg FLAGS_REG)
(compare
(subreg:QI
- (zero_extract:SWI248
+ (any_extract:SWI248
(match_operand 0 "int248_register_operand" "Q")
(const_int 8)
(const_int 8)) 0)
@@ -1494,31 +1534,68 @@ (define_expand "cmpqi_ext_3"
(const_int 8)) 0)
(match_operand:QI 1 "const_int_operand")))])
+(define_insn "*cmpqi_ext<mode>_3_mem_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (subreg:QI
+ (any_extract:SWI248
+ (match_operand 0 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)
+ (match_operand:QI 1 "norex_memory_operand" "Bn")))]
+ "TARGET_64BIT && reload_completed
+ && ix86_match_ccmode (insn, CCmode)"
+ "cmp{b}\t{%1, %h0|%h0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
(define_insn "*cmpqi_ext<mode>_3"
[(set (reg FLAGS_REG)
(compare
(subreg:QI
- (zero_extract:SWI248
+ (any_extract:SWI248
(match_operand 0 "int248_register_operand" "Q,Q")
(const_int 8)
(const_int 8)) 0)
- (match_operand:QI 1 "general_x64constmem_operand" "QnBc,m")))]
+ (match_operand:QI 1 "general_operand" "QnBc,m")))]
"ix86_match_ccmode (insn, CCmode)"
"cmp{b}\t{%1, %h0|%h0, %1}"
[(set_attr "isa" "*,nox64")
(set_attr "type" "icmp")
(set_attr "mode" "QI")])
+(define_peephole2
+ [(set (match_operand:QI 0 "register_operand")
+ (match_operand:QI 1 "norex_memory_operand"))
+ (set (match_operand 3 "flags_reg_operand")
+ (match_operator 4 "compare_operator"
+ [(subreg:QI
+ (any_extract:SWI248
+ (match_operand 2 "int248_register_operand")
+ (const_int 8)
+ (const_int 8)) 0)
+ (match_dup 0)]))]
+ "TARGET_64BIT
+ && peep2_reg_dead_p (2, operands[0])"
+ [(set (match_dup 3)
+ (match_op_dup 4
+ [(subreg:QI
+ (any_extract:SWI248
+ (match_dup 2)
+ (const_int 8)
+ (const_int 8)) 0)
+ (match_dup 1)]))])
+
(define_insn "*cmpqi_ext<mode>_4"
[(set (reg FLAGS_REG)
(compare
(subreg:QI
- (zero_extract:SWI248
+ (any_extract:SWI248
(match_operand 0 "int248_register_operand" "Q")
(const_int 8)
(const_int 8)) 0)
(subreg:QI
- (zero_extract:SWI248
+ (any_extract:SWI248
(match_operand 1 "int248_register_operand" "Q")
(const_int 8)
(const_int 8)) 0)))]
@@ -3374,9 +3451,6 @@ (define_peephole2
operands[4] = gen_int_mode (tmp, <SWI48:MODE>mode);
})
-
-(define_code_iterator any_extract [sign_extract zero_extract])
-
(define_insn "*insvqi_2"
[(set (zero_extract:SWI248
(match_operand 0 "int248_register_operand" "+Q")
diff --git a/gcc/testsuite/gcc.target/i386/pr78952-3.c b/gcc/testsuite/gcc.target/i386/pr78952-3.c
new file mode 100644
index 00000000000..ab00c55b370
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr78952-3.c
@@ -0,0 +1,40 @@
+/* PR target/78952 */
+/* { dg-do compile } */
+/* { dg-require-effective-target nonpic } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-mregparm=1" { target ia32 } } */
+/* { dg-final { scan-assembler-not "mov\[sz]bl" } } */
+
+typedef __SIZE_TYPE__ size_t;
+
+struct S1s
+{
+ char pad1;
+ char val;
+ short pad2;
+};
+
+extern char ts[256];
+
+_Bool foo (struct S1s a, size_t i)
+{
+ return (ts[i] > a.val);
+}
+
+/* { dg-final { scan-assembler "cmpb\[ \\t]+ts\[^\n]*%.h" } } */
+
+struct S1u
+{
+ unsigned char pad1;
+ unsigned char val;
+ unsigned short pad2;
+};
+
+extern unsigned char tu[256];
+
+_Bool bar (struct S1u a, size_t i)
+{
+ return (tu[i] > a.val);
+}
+
+/* { dg-final { scan-assembler "cmpb\[ \\t]+tu\[^\n]*%.h" } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-04-19 15:06 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-19 15:06 [PATCH] i386: Emit compares between high registers and memory Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).