* [PATCH] i386: Add *setcc_hi_1* define_insn_and_split [PR97950]
@ 2020-11-24 8:20 Jakub Jelinek
2020-11-24 8:57 ` Uros Bizjak
0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2020-11-24 8:20 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches
Hi!
As the following testcase shows, unlike char, int or long long sized
__builtin_*_overflow{,_p}, for short sized one in most cases the ce1 pass
doesn't optimize the jo/jno or jc/jnc jumps with setting of a pseudo to 0/1
into seto/setc. The reason is missing *setcc_hi_1* pattern. The following
patch implements it using mode iterators so that on i486 and pentium?
one can get the zero extension through and instead of movzbw.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2020-11-24 Jakub Jelinek <jakub@redhat.com>
PR target/97950
* config/i386/i386.md (*setcc_si_1_and): Macroize into...
(*setcc_<mode>_1_and): New define_insn_and_split with SWI24 iterator.
(*setcc_si_1_movzbl): Macroize into...
(*setcc_<mode>_1_movzbl): New define_insn_and_split with SWI24
iterator.
* gcc.target/i386/pr97950.c: New test.
--- gcc/config/i386/i386.md.jj 2020-11-23 17:01:48.235055044 +0100
+++ gcc/config/i386/i386.md 2020-11-23 21:29:43.425842870 +0100
@@ -12714,9 +12714,9 @@ (define_insn_and_split "*setcc_di_1"
operands[2] = gen_lowpart (QImode, operands[0]);
})
-(define_insn_and_split "*setcc_si_1_and"
- [(set (match_operand:SI 0 "register_operand" "=q")
- (match_operator:SI 1 "ix86_comparison_operator"
+(define_insn_and_split "*setcc_<mode>_1_and"
+ [(set (match_operand:SWI24 0 "register_operand" "=q")
+ (match_operator:SWI24 1 "ix86_comparison_operator"
[(reg FLAGS_REG) (const_int 0)]))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_PARTIAL_REG_STALL
@@ -12724,7 +12724,7 @@ (define_insn_and_split "*setcc_si_1_and"
"#"
"&& reload_completed"
[(set (match_dup 2) (match_dup 1))
- (parallel [(set (match_dup 0) (zero_extend:SI (match_dup 2)))
+ (parallel [(set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
{
operands[1] = shallow_copy_rtx (operands[1]);
@@ -12732,16 +12732,16 @@ (define_insn_and_split "*setcc_si_1_and"
operands[2] = gen_lowpart (QImode, operands[0]);
})
-(define_insn_and_split "*setcc_si_1_movzbl"
- [(set (match_operand:SI 0 "register_operand" "=q")
- (match_operator:SI 1 "ix86_comparison_operator"
+(define_insn_and_split "*setcc_<mode>_1_movzbl"
+ [(set (match_operand:SWI24 0 "register_operand" "=q")
+ (match_operator:SWI24 1 "ix86_comparison_operator"
[(reg FLAGS_REG) (const_int 0)]))]
"!TARGET_PARTIAL_REG_STALL
&& (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))"
"#"
"&& reload_completed"
[(set (match_dup 2) (match_dup 1))
- (set (match_dup 0) (zero_extend:SI (match_dup 2)))]
+ (set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))]
{
operands[1] = shallow_copy_rtx (operands[1]);
PUT_MODE (operands[1], QImode);
--- gcc/testsuite/gcc.target/i386/pr97950.c.jj 2020-11-23 17:20:33.481605139 +0100
+++ gcc/testsuite/gcc.target/i386/pr97950.c 2020-11-23 21:32:53.593734242 +0100
@@ -0,0 +1,153 @@
+/* PR target/95950 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic" } */
+/* { dg-final { scan-assembler-times "\tseta\t" 4 } } */
+/* { dg-final { scan-assembler-times "\tseto\t" 16 } } */
+/* { dg-final { scan-assembler-times "\tsetc\t" 4 } } */
+/* { dg-final { scan-assembler-not "\tjn?a\t" } } */
+/* { dg-final { scan-assembler-not "\tjn?o\t" } } */
+/* { dg-final { scan-assembler-not "\tjn?c\t" } } */
+
+char
+f1 (short a, short b)
+{
+ return __builtin_mul_overflow_p (a, b, (short) 0);
+}
+
+char
+f2 (short a, short b)
+{
+ return __builtin_add_overflow_p (a, b, (short) 0);
+}
+
+char
+f3 (short a, short b)
+{
+ return __builtin_sub_overflow_p (a, b, (short) 0);
+}
+
+char
+f4 (unsigned short a, unsigned short b)
+{
+ return __builtin_mul_overflow_p (a, b, (unsigned short) 0);
+}
+
+char
+f5 (unsigned short a, unsigned short b)
+{
+ return __builtin_add_overflow_p (a, b, (unsigned short) 0);
+}
+
+char
+f6 (unsigned short a, unsigned short b)
+{
+ return __builtin_sub_overflow_p (a, b, (unsigned short) 0);
+}
+
+char
+f7 (short a, short b)
+{
+ return __builtin_mul_overflow_p (a, b, (short) 0);
+}
+
+char
+f8 (short a, short b)
+{
+ return __builtin_add_overflow_p (a, b, (short) 0);
+}
+
+char
+f9 (short a, short b)
+{
+ return __builtin_sub_overflow_p (a, b, (short) 0);
+}
+
+char
+f10 (unsigned short a, unsigned short b)
+{
+ return __builtin_mul_overflow_p (a, b, (unsigned short) 0);
+}
+
+char
+f11 (unsigned short a, unsigned short b)
+{
+ return __builtin_add_overflow_p (a, b, (unsigned short) 0);
+}
+
+char
+f12 (unsigned short a, unsigned short b)
+{
+ return __builtin_sub_overflow_p (a, b, (unsigned short) 0);
+}
+
+unsigned short
+f13 (short a, short b)
+{
+ return __builtin_mul_overflow_p (a, b, (short) 0);
+}
+
+unsigned short
+f14 (short a, short b)
+{
+ return __builtin_add_overflow_p (a, b, (short) 0);
+}
+
+unsigned short
+f15 (short a, short b)
+{
+ return __builtin_sub_overflow_p (a, b, (short) 0);
+}
+
+unsigned short
+f16 (unsigned short a, unsigned short b)
+{
+ return __builtin_mul_overflow_p (a, b, (unsigned short) 0);
+}
+
+unsigned short
+f17 (unsigned short a, unsigned short b)
+{
+ return __builtin_add_overflow_p (a, b, (unsigned short) 0);
+}
+
+unsigned short
+f18 (unsigned short a, unsigned short b)
+{
+ return __builtin_sub_overflow_p (a, b, (unsigned short) 0);
+}
+
+unsigned short
+f19 (short a, short b)
+{
+ return __builtin_mul_overflow_p (a, b, (short) 0);
+}
+
+unsigned short
+f20 (short a, short b)
+{
+ return __builtin_add_overflow_p (a, b, (short) 0);
+}
+
+unsigned short
+f21 (short a, short b)
+{
+ return __builtin_sub_overflow_p (a, b, (short) 0);
+}
+
+unsigned short
+f22 (unsigned short a, unsigned short b)
+{
+ return __builtin_mul_overflow_p (a, b, (unsigned short) 0);
+}
+
+unsigned short
+f23 (unsigned short a, unsigned short b)
+{
+ return __builtin_add_overflow_p (a, b, (unsigned short) 0);
+}
+
+unsigned short
+f24 (unsigned short a, unsigned short b)
+{
+ return __builtin_sub_overflow_p (a, b, (unsigned short) 0);
+}
Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] i386: Add *setcc_hi_1* define_insn_and_split [PR97950]
2020-11-24 8:20 [PATCH] i386: Add *setcc_hi_1* define_insn_and_split [PR97950] Jakub Jelinek
@ 2020-11-24 8:57 ` Uros Bizjak
0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2020-11-24 8:57 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: gcc-patches
On Tue, Nov 24, 2020 at 9:20 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> As the following testcase shows, unlike char, int or long long sized
> __builtin_*_overflow{,_p}, for short sized one in most cases the ce1 pass
> doesn't optimize the jo/jno or jc/jnc jumps with setting of a pseudo to 0/1
> into seto/setc. The reason is missing *setcc_hi_1* pattern. The following
> patch implements it using mode iterators so that on i486 and pentium?
> one can get the zero extension through and instead of movzbw.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2020-11-24 Jakub Jelinek <jakub@redhat.com>
>
> PR target/97950
> * config/i386/i386.md (*setcc_si_1_and): Macroize into...
> (*setcc_<mode>_1_and): New define_insn_and_split with SWI24 iterator.
> (*setcc_si_1_movzbl): Macroize into...
> (*setcc_<mode>_1_movzbl): New define_insn_and_split with SWI24
> iterator.
>
> * gcc.target/i386/pr97950.c: New test.
OK.
Thanks,
Uros.
>
> --- gcc/config/i386/i386.md.jj 2020-11-23 17:01:48.235055044 +0100
> +++ gcc/config/i386/i386.md 2020-11-23 21:29:43.425842870 +0100
> @@ -12714,9 +12714,9 @@ (define_insn_and_split "*setcc_di_1"
> operands[2] = gen_lowpart (QImode, operands[0]);
> })
>
> -(define_insn_and_split "*setcc_si_1_and"
> - [(set (match_operand:SI 0 "register_operand" "=q")
> - (match_operator:SI 1 "ix86_comparison_operator"
> +(define_insn_and_split "*setcc_<mode>_1_and"
> + [(set (match_operand:SWI24 0 "register_operand" "=q")
> + (match_operator:SWI24 1 "ix86_comparison_operator"
> [(reg FLAGS_REG) (const_int 0)]))
> (clobber (reg:CC FLAGS_REG))]
> "!TARGET_PARTIAL_REG_STALL
> @@ -12724,7 +12724,7 @@ (define_insn_and_split "*setcc_si_1_and"
> "#"
> "&& reload_completed"
> [(set (match_dup 2) (match_dup 1))
> - (parallel [(set (match_dup 0) (zero_extend:SI (match_dup 2)))
> + (parallel [(set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))
> (clobber (reg:CC FLAGS_REG))])]
> {
> operands[1] = shallow_copy_rtx (operands[1]);
> @@ -12732,16 +12732,16 @@ (define_insn_and_split "*setcc_si_1_and"
> operands[2] = gen_lowpart (QImode, operands[0]);
> })
>
> -(define_insn_and_split "*setcc_si_1_movzbl"
> - [(set (match_operand:SI 0 "register_operand" "=q")
> - (match_operator:SI 1 "ix86_comparison_operator"
> +(define_insn_and_split "*setcc_<mode>_1_movzbl"
> + [(set (match_operand:SWI24 0 "register_operand" "=q")
> + (match_operator:SWI24 1 "ix86_comparison_operator"
> [(reg FLAGS_REG) (const_int 0)]))]
> "!TARGET_PARTIAL_REG_STALL
> && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))"
> "#"
> "&& reload_completed"
> [(set (match_dup 2) (match_dup 1))
> - (set (match_dup 0) (zero_extend:SI (match_dup 2)))]
> + (set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))]
> {
> operands[1] = shallow_copy_rtx (operands[1]);
> PUT_MODE (operands[1], QImode);
> --- gcc/testsuite/gcc.target/i386/pr97950.c.jj 2020-11-23 17:20:33.481605139 +0100
> +++ gcc/testsuite/gcc.target/i386/pr97950.c 2020-11-23 21:32:53.593734242 +0100
> @@ -0,0 +1,153 @@
> +/* PR target/95950 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtune=generic" } */
> +/* { dg-final { scan-assembler-times "\tseta\t" 4 } } */
> +/* { dg-final { scan-assembler-times "\tseto\t" 16 } } */
> +/* { dg-final { scan-assembler-times "\tsetc\t" 4 } } */
> +/* { dg-final { scan-assembler-not "\tjn?a\t" } } */
> +/* { dg-final { scan-assembler-not "\tjn?o\t" } } */
> +/* { dg-final { scan-assembler-not "\tjn?c\t" } } */
> +
> +char
> +f1 (short a, short b)
> +{
> + return __builtin_mul_overflow_p (a, b, (short) 0);
> +}
> +
> +char
> +f2 (short a, short b)
> +{
> + return __builtin_add_overflow_p (a, b, (short) 0);
> +}
> +
> +char
> +f3 (short a, short b)
> +{
> + return __builtin_sub_overflow_p (a, b, (short) 0);
> +}
> +
> +char
> +f4 (unsigned short a, unsigned short b)
> +{
> + return __builtin_mul_overflow_p (a, b, (unsigned short) 0);
> +}
> +
> +char
> +f5 (unsigned short a, unsigned short b)
> +{
> + return __builtin_add_overflow_p (a, b, (unsigned short) 0);
> +}
> +
> +char
> +f6 (unsigned short a, unsigned short b)
> +{
> + return __builtin_sub_overflow_p (a, b, (unsigned short) 0);
> +}
> +
> +char
> +f7 (short a, short b)
> +{
> + return __builtin_mul_overflow_p (a, b, (short) 0);
> +}
> +
> +char
> +f8 (short a, short b)
> +{
> + return __builtin_add_overflow_p (a, b, (short) 0);
> +}
> +
> +char
> +f9 (short a, short b)
> +{
> + return __builtin_sub_overflow_p (a, b, (short) 0);
> +}
> +
> +char
> +f10 (unsigned short a, unsigned short b)
> +{
> + return __builtin_mul_overflow_p (a, b, (unsigned short) 0);
> +}
> +
> +char
> +f11 (unsigned short a, unsigned short b)
> +{
> + return __builtin_add_overflow_p (a, b, (unsigned short) 0);
> +}
> +
> +char
> +f12 (unsigned short a, unsigned short b)
> +{
> + return __builtin_sub_overflow_p (a, b, (unsigned short) 0);
> +}
> +
> +unsigned short
> +f13 (short a, short b)
> +{
> + return __builtin_mul_overflow_p (a, b, (short) 0);
> +}
> +
> +unsigned short
> +f14 (short a, short b)
> +{
> + return __builtin_add_overflow_p (a, b, (short) 0);
> +}
> +
> +unsigned short
> +f15 (short a, short b)
> +{
> + return __builtin_sub_overflow_p (a, b, (short) 0);
> +}
> +
> +unsigned short
> +f16 (unsigned short a, unsigned short b)
> +{
> + return __builtin_mul_overflow_p (a, b, (unsigned short) 0);
> +}
> +
> +unsigned short
> +f17 (unsigned short a, unsigned short b)
> +{
> + return __builtin_add_overflow_p (a, b, (unsigned short) 0);
> +}
> +
> +unsigned short
> +f18 (unsigned short a, unsigned short b)
> +{
> + return __builtin_sub_overflow_p (a, b, (unsigned short) 0);
> +}
> +
> +unsigned short
> +f19 (short a, short b)
> +{
> + return __builtin_mul_overflow_p (a, b, (short) 0);
> +}
> +
> +unsigned short
> +f20 (short a, short b)
> +{
> + return __builtin_add_overflow_p (a, b, (short) 0);
> +}
> +
> +unsigned short
> +f21 (short a, short b)
> +{
> + return __builtin_sub_overflow_p (a, b, (short) 0);
> +}
> +
> +unsigned short
> +f22 (unsigned short a, unsigned short b)
> +{
> + return __builtin_mul_overflow_p (a, b, (unsigned short) 0);
> +}
> +
> +unsigned short
> +f23 (unsigned short a, unsigned short b)
> +{
> + return __builtin_add_overflow_p (a, b, (unsigned short) 0);
> +}
> +
> +unsigned short
> +f24 (unsigned short a, unsigned short b)
> +{
> + return __builtin_sub_overflow_p (a, b, (unsigned short) 0);
> +}
>
> Jakub
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2020-11-24 8:57 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-24 8:20 [PATCH] i386: Add *setcc_hi_1* define_insn_and_split [PR97950] Jakub Jelinek
2020-11-24 8:57 ` Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).