* [PATCH, i386]: Handle zero extended addresses in ix86_avoid_lea_for_addr
@ 2012-07-27 9:35 Uros Bizjak
2012-07-27 17:29 ` [PATCH v2, " Uros Bizjak
0 siblings, 1 reply; 4+ messages in thread
From: Uros Bizjak @ 2012-07-27 9:35 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 1356 bytes --]
Hello!
Attached patch enables ix86_avoid_lea_for_addr to process
zero-extended addresses. This patch should help atom performance,
especially in x32 mode.
Please note the complication with insn re-recognition in
ix86_avoid_lea_for_addr, to solve the problem as described in the
comment:
/* ix86_avoid_lea_for_addr re-recognizes insn and changes operands[]
array behind our backs. To make things worse, zero-extended oeprands
(zero_extend:DI (addr:SI)) are re-recognized as (addr:DI), since they
also satisfy operand constraints of one of many *lea<mode> insn patterns.
However, at this point we are looking only if the original insn
is performing inherent zero extension, and will emit
split insn sequence in SImode for this case. */
2012-07-27 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.c (ix86_avoid_lea_for_addr): Handle
zero-extended addresses.
(ix86_split_lea_for_addr): Unconditionally convert target and
all address operands to requested mode.
* config/i386/i386.md (*lea<mode>): Determine mode of split insn
sequence from the original insn pattern.
Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
{,-m32}, also when configured with "--with-arch=core2 --with-cpu=atom"
I will wait a day or two for possible comments, before the patch is
committed to mainline SVN.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 3939 bytes --]
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 189904)
+++ config/i386/i386.md (working copy)
@@ -3474,13 +3474,28 @@
(match_operand:SI 1 "x86_64_zext_general_operand"
"rmWz,0,r ,m ,r ,m")))]
"TARGET_64BIT"
- "@
- mov{l}\t{%1, %k0|%k0, %1}
- #
- movd\t{%1, %0|%0, %1}
- movd\t{%1, %0|%0, %1}
- %vmovd\t{%1, %0|%0, %1}
- %vmovd\t{%1, %0|%0, %1}"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ if (ix86_use_lea_for_mov (insn, operands))
+ return "lea{l}\t{%E1, %k0|%k0, %E1}";
+ else
+ return "mov{l}\t{%1, %k0|%k0, %1}";
+
+ case TYPE_MULTI:
+ return "#";
+
+ case TYPE_MMXMOV:
+ return "movd\t{%1, %0|%0, %1}";
+
+ case TYPE_SSEMOV:
+ return "%vmovd\t{%1, %0|%0, %1}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
[(set_attr "type" "imovx,multi,mmxmov,mmxmov,ssemov,ssemov")
(set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex")
(set_attr "prefix_0f" "0,*,*,*,*,*")
@@ -5479,7 +5494,26 @@
"reload_completed && ix86_avoid_lea_for_addr (insn, operands)"
[(const_int 0)]
{
- ix86_split_lea_for_addr (operands, <MODE>mode);
+ enum machine_mode mode = <MODE>mode;
+ rtx addr;
+
+ /* ix86_avoid_lea_for_addr re-recognizes insn and changes operands[]
+ array behind our backs. To make things worse, zero-extended oeprands
+ (zero_extend:DI (addr:SI)) are re-recognized as (addr:DI), since they
+ also satisfy operand constraints of one of many *lea<mode> insn patterns.
+
+ However, at this point we are looking only if the original insn
+ is performing inherent zero extension, and will emit
+ split insn sequence in SImode for this case. */
+ addr = SET_SRC (PATTERN (curr_insn));
+
+ /* Emit all operations in SImode for zero-extended addresses. Recall
+ that x86_64 inheretly zero-extends SImode operations to DImode. */
+ if (GET_CODE (addr) == ZERO_EXTEND
+ || GET_CODE (addr) == AND)
+ mode = SImode;
+
+ ix86_split_lea_for_addr (operands, mode);
DONE;
}
[(set_attr "type" "lea")
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 189904)
+++ config/i386/i386.c (working copy)
@@ -17036,11 +17036,6 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
struct ix86_address parts;
int ok;
- /* FIXME: Handle zero-extended addresses. */
- if (GET_CODE (operands[1]) == ZERO_EXTEND
- || GET_CODE (operands[1]) == AND)
- return false;
-
/* Check we need to optimize. */
if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
return false;
@@ -17124,7 +17119,7 @@ ix86_emit_binop (enum rtx_code code, enum machine_
It is assumed that it is allowed to clobber flags register
at lea position. */
-extern void
+void
ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
{
unsigned int regno0, regno1, regno2;
@@ -17135,7 +17130,7 @@ ix86_split_lea_for_addr (rtx operands[], enum mach
ok = ix86_decompose_address (operands[1], &parts);
gcc_assert (ok);
- target = operands[0];
+ target = gen_lowpart (mode, operands[0]);
regno0 = true_regnum (target);
regno1 = INVALID_REGNUM;
@@ -17143,18 +17138,19 @@ ix86_split_lea_for_addr (rtx operands[], enum mach
if (parts.base)
{
- if (GET_MODE (parts.base) != mode)
- parts.base = gen_lowpart (mode, parts.base);
+ parts.base = gen_lowpart (mode, parts.base);
regno1 = true_regnum (parts.base);
}
if (parts.index)
{
- if (GET_MODE (parts.index) != mode)
- parts.index = gen_lowpart (mode, parts.index);
+ parts.index = gen_lowpart (mode, parts.index);
regno2 = true_regnum (parts.index);
}
+ if (parts.disp)
+ parts.disp = gen_lowpart (mode, parts.disp);
+
if (parts.scale > 1)
{
/* Case r1 = r1 + ... */
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v2, i386]: Handle zero extended addresses in ix86_avoid_lea_for_addr
2012-07-27 9:35 [PATCH, i386]: Handle zero extended addresses in ix86_avoid_lea_for_addr Uros Bizjak
@ 2012-07-27 17:29 ` Uros Bizjak
2012-07-27 18:14 ` Uros Bizjak
2012-07-28 9:26 ` [PATCH v3, " Uros Bizjak
0 siblings, 2 replies; 4+ messages in thread
From: Uros Bizjak @ 2012-07-27 17:29 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 2187 bytes --]
On Fri, Jul 27, 2012 at 11:29 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> Attached patch enables ix86_avoid_lea_for_addr to process
> zero-extended addresses. This patch should help atom performance,
> especially in x32 mode.
>
> Please note the complication with insn re-recognition in
> ix86_avoid_lea_for_addr, to solve the problem as described in the
> comment:
>
> /* ix86_avoid_lea_for_addr re-recognizes insn and changes operands[]
> array behind our backs. To make things worse, zero-extended oeprands
> (zero_extend:DI (addr:SI)) are re-recognized as (addr:DI), since they
> also satisfy operand constraints of one of many *lea<mode> insn patterns.
Actually, the instruction gets re-recognized as
*zero_extendsidi2_rex64, this is the reason why we got DImode
(addr:DI) operand. This fact further uncovers existing problem with
ix86_avoid_lea_for_addr. This function should not mark addresses
having less than two operands for splitting. These patterns are
re-recognized as MOV (and now as zero-extending MOVL) due to the
approach, described in the comment above, and due to the fact that we
define *mov{si,di} and *zero_extendsidi2_rex64 patterns before
*lea<mode> in the i386.md.
However, here is no point messing with these patterns in splitters,
they are conditionally converted to LEAs at the insn emission phase
(see i.e. *zero_extendsidi2_rex64 change in attached patch). The
attached patch prevents splitting by a simple criteria function.
As a bonus, the patch also includes conditional splitter for
non-destructive zero-extended adds.
2012-07-27 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.c (ix86_avoid_lea_for_addr): Handle
zero-extended addresses. Return false if the address has less
than two components.
(ix86_split_lea_for_addr): Unconditionally convert target and
all address operands to requested mode.
* config/i386/i386.md (*lea<mode>): Pass SImode to
ix86_split_lea_for_addr when splitting zero-extended address.
(zero-extended add splitter): New splitter to conditionally split
non-destructive adds.
(*zero_extendsidi2_rex64): Conditionally emit leal instead of movl.
I am currently re-testing v2 patch.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 5385 bytes --]
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 189915)
+++ config/i386/i386.c (working copy)
@@ -17036,11 +17036,6 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
struct ix86_address parts;
int ok;
- /* FIXME: Handle zero-extended addresses. */
- if (GET_CODE (operands[1]) == ZERO_EXTEND
- || GET_CODE (operands[1]) == AND)
- return false;
-
/* Check we need to optimize. */
if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
return false;
@@ -17052,6 +17047,11 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
ok = ix86_decompose_address (operands[1], &parts);
gcc_assert (ok);
+ /* There should be at least two components in the address. */
+ if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
+ + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
+ return false;
+
/* We should not split into add if non legitimate pic
operand is used as displacement. */
if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
@@ -17124,7 +17124,7 @@ ix86_emit_binop (enum rtx_code code, enum machine_
It is assumed that it is allowed to clobber flags register
at lea position. */
-extern void
+void
ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
{
unsigned int regno0, regno1, regno2;
@@ -17135,7 +17135,7 @@ ix86_split_lea_for_addr (rtx operands[], enum mach
ok = ix86_decompose_address (operands[1], &parts);
gcc_assert (ok);
- target = operands[0];
+ target = gen_lowpart (mode, operands[0]);
regno0 = true_regnum (target);
regno1 = INVALID_REGNUM;
@@ -17143,18 +17143,19 @@ ix86_split_lea_for_addr (rtx operands[], enum mach
if (parts.base)
{
- if (GET_MODE (parts.base) != mode)
- parts.base = gen_lowpart (mode, parts.base);
+ parts.base = gen_lowpart (mode, parts.base);
regno1 = true_regnum (parts.base);
}
if (parts.index)
{
- if (GET_MODE (parts.index) != mode)
- parts.index = gen_lowpart (mode, parts.index);
+ parts.index = gen_lowpart (mode, parts.index);
regno2 = true_regnum (parts.index);
}
+ if (parts.disp)
+ parts.disp = gen_lowpart (mode, parts.disp);
+
if (parts.scale > 1)
{
/* Case r1 = r1 + ... */
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 189915)
+++ config/i386/i386.md (working copy)
@@ -3474,13 +3474,28 @@
(match_operand:SI 1 "x86_64_zext_general_operand"
"rmWz,0,r ,m ,r ,m")))]
"TARGET_64BIT"
- "@
- mov{l}\t{%1, %k0|%k0, %1}
- #
- movd\t{%1, %0|%0, %1}
- movd\t{%1, %0|%0, %1}
- %vmovd\t{%1, %0|%0, %1}
- %vmovd\t{%1, %0|%0, %1}"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ if (ix86_use_lea_for_mov (insn, operands))
+ return "lea{l}\t{%E1, %k0|%k0, %E1}";
+ else
+ return "mov{l}\t{%1, %k0|%k0, %1}";
+
+ case TYPE_MULTI:
+ return "#";
+
+ case TYPE_MMXMOV:
+ return "movd\t{%1, %0|%0, %1}";
+
+ case TYPE_SSEMOV:
+ return "%vmovd\t{%1, %0|%0, %1}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
[(set_attr "type" "imovx,multi,mmxmov,mmxmov,ssemov,ssemov")
(set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex")
(set_attr "prefix_0f" "0,*,*,*,*,*")
@@ -5479,7 +5494,16 @@
"reload_completed && ix86_avoid_lea_for_addr (insn, operands)"
[(const_int 0)]
{
- ix86_split_lea_for_addr (operands, <MODE>mode);
+ enum machine_mode mode = <MODE>mode;
+ rtx addr = operands[1];
+
+ /* Emit all operations in SImode for zero-extended addresses. Recall
+ that x86_64 inheretly zero-extends SImode operations to DImode. */
+ if (GET_CODE (addr) == ZERO_EXTEND
+ || GET_CODE (addr) == AND)
+ mode = SImode;
+
+ ix86_split_lea_for_addr (operands, mode);
DONE;
}
[(set_attr "type" "lea")
@@ -5807,11 +5831,11 @@
(define_split
[(set (match_operand:SWI48 0 "register_operand")
(plus:SWI48 (match_operand:SWI48 1 "register_operand")
- (match_operand:SWI48 2 "nonmemory_operand")))
+ (match_operand:SWI48 2 "x86_64_nonmemory_operand")))
(clobber (reg:CC FLAGS_REG))]
"reload_completed && ix86_avoid_lea_for_add (insn, operands)"
[(set (match_dup 0) (match_dup 1))
- (parallel [(set (match_dup 0) (plus:<MODE> (match_dup 0) (match_dup 2)))
+ (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])])
;; Convert add to the lea pattern to avoid flags dependency.
@@ -5840,6 +5864,21 @@
DONE;
})
+;; Split non destructive adds if we cannot use lea.
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (zero_extend:DI
+ (plus:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "x86_64_nonmemory_operand"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && reload_completed && ix86_avoid_lea_for_add (insn, operands)"
+ [(set (match_dup 3) (match_dup 1))
+ (parallel [(set (match_dup 0)
+ (zero_extend:DI (plus:SI (match_dup 3) (match_dup 2))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[3] = gen_lowpart (SImode, operands[0]);")
+
;; Convert add to the lea pattern to avoid flags dependency.
(define_split
[(set (match_operand:DI 0 "register_operand")
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2, i386]: Handle zero extended addresses in ix86_avoid_lea_for_addr
2012-07-27 17:29 ` [PATCH v2, " Uros Bizjak
@ 2012-07-27 18:14 ` Uros Bizjak
2012-07-28 9:26 ` [PATCH v3, " Uros Bizjak
1 sibling, 0 replies; 4+ messages in thread
From: Uros Bizjak @ 2012-07-27 18:14 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 2445 bytes --]
On Fri, Jul 27, 2012 at 7:16 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Fri, Jul 27, 2012 at 11:29 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>
>> Attached patch enables ix86_avoid_lea_for_addr to process
>> zero-extended addresses. This patch should help atom performance,
>> especially in x32 mode.
>>
>> Please note the complication with insn re-recognition in
>> ix86_avoid_lea_for_addr, to solve the problem as described in the
>> comment:
>>
>> /* ix86_avoid_lea_for_addr re-recognizes insn and changes operands[]
>> array behind our backs. To make things worse, zero-extended oeprands
>> (zero_extend:DI (addr:SI)) are re-recognized as (addr:DI), since they
>> also satisfy operand constraints of one of many *lea<mode> insn patterns.
>
> Actually, the instruction gets re-recognized as
> *zero_extendsidi2_rex64, this is the reason why we got DImode
> (addr:DI) operand. This fact further uncovers existing problem with
> ix86_avoid_lea_for_addr. This function should not mark addresses
> having less than two operands for splitting. These patterns are
> re-recognized as MOV (and now as zero-extending MOVL) due to the
> approach, described in the comment above, and due to the fact that we
> define *mov{si,di} and *zero_extendsidi2_rex64 patterns before
> *lea<mode> in the i386.md.
>
> However, here is no point messing with these patterns in splitters,
> they are conditionally converted to LEAs at the insn emission phase
> (see i.e. *zero_extendsidi2_rex64 change in attached patch). The
> attached patch prevents splitting by a simple criteria function.
>
> As a bonus, the patch also includes conditional splitter for
> non-destructive zero-extended adds.
>
> 2012-07-27 Uros Bizjak <ubizjak@gmail.com>
>
> * config/i386/i386.c (ix86_avoid_lea_for_addr): Handle
> zero-extended addresses. Return false if the address has less
> than two components.
> (ix86_split_lea_for_addr): Unconditionally convert target and
> all address operands to requested mode.
> * config/i386/i386.md (*lea<mode>): Pass SImode to
> ix86_split_lea_for_addr when splitting zero-extended address.
> (zero-extended add splitter): New splitter to conditionally split
> non-destructive adds.
> (*zero_extendsidi2_rex64): Conditionally emit leal instead of movl.
>
> I am currently re-testing v2 patch.
... now with correct v2 patch attached.
Uros.
[-- Attachment #2: p2.diff.txt --]
[-- Type: text/plain, Size: 5885 bytes --]
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 189915)
+++ config/i386/i386.md (working copy)
@@ -3474,13 +3474,28 @@
(match_operand:SI 1 "x86_64_zext_general_operand"
"rmWz,0,r ,m ,r ,m")))]
"TARGET_64BIT"
- "@
- mov{l}\t{%1, %k0|%k0, %1}
- #
- movd\t{%1, %0|%0, %1}
- movd\t{%1, %0|%0, %1}
- %vmovd\t{%1, %0|%0, %1}
- %vmovd\t{%1, %0|%0, %1}"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ if (ix86_use_lea_for_mov (insn, operands))
+ return "lea{l}\t{%E1, %k0|%k0, %E1}";
+ else
+ return "mov{l}\t{%1, %k0|%k0, %1}";
+
+ case TYPE_MULTI:
+ return "#";
+
+ case TYPE_MMXMOV:
+ return "movd\t{%1, %0|%0, %1}";
+
+ case TYPE_SSEMOV:
+ return "%vmovd\t{%1, %0|%0, %1}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
[(set_attr "type" "imovx,multi,mmxmov,mmxmov,ssemov,ssemov")
(set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex")
(set_attr "prefix_0f" "0,*,*,*,*,*")
@@ -5479,7 +5494,26 @@
"reload_completed && ix86_avoid_lea_for_addr (insn, operands)"
[(const_int 0)]
{
- ix86_split_lea_for_addr (operands, <MODE>mode);
+ enum machine_mode mode = <MODE>mode;
+ rtx addr;
+
+ /* ix86_avoid_lea_for_addr re-recognizes insn and changes operands[]
+ array behind our backs. To make things worse, zero-extended oeprands
+ (zero_extend:DI (addr:SI)) are re-recognized as (addr:DI), since they
+ also satisfy operand constraints of other insn patterns.
+
+ However, at this point we are looking only if the original insn
+ is performing inherent zero extension, and will emit
+ split insn sequence in SImode for this case. */
+ addr = SET_SRC (PATTERN (curr_insn));
+
+ /* Emit all operations in SImode for zero-extended addresses. Recall
+ that x86_64 inheretly zero-extends SImode operations to DImode. */
+ if (GET_CODE (addr) == ZERO_EXTEND
+ || GET_CODE (addr) == AND)
+ mode = SImode;
+
+ ix86_split_lea_for_addr (operands, mode);
DONE;
}
[(set_attr "type" "lea")
@@ -5807,11 +5841,11 @@
(define_split
[(set (match_operand:SWI48 0 "register_operand")
(plus:SWI48 (match_operand:SWI48 1 "register_operand")
- (match_operand:SWI48 2 "nonmemory_operand")))
+ (match_operand:SWI48 2 "x86_64_nonmemory_operand")))
(clobber (reg:CC FLAGS_REG))]
"reload_completed && ix86_avoid_lea_for_add (insn, operands)"
[(set (match_dup 0) (match_dup 1))
- (parallel [(set (match_dup 0) (plus:<MODE> (match_dup 0) (match_dup 2)))
+ (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])])
;; Convert add to the lea pattern to avoid flags dependency.
@@ -5840,6 +5874,21 @@
DONE;
})
+;; Split non destructive adds if we cannot use lea.
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (zero_extend:DI
+ (plus:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "x86_64_nonmemory_operand"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && reload_completed && ix86_avoid_lea_for_add (insn, operands)"
+ [(set (match_dup 3) (match_dup 1))
+ (parallel [(set (match_dup 0)
+ (zero_extend:DI (plus:SI (match_dup 3) (match_dup 2))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[3] = gen_lowpart (SImode, operands[0]);")
+
;; Convert add to the lea pattern to avoid flags dependency.
(define_split
[(set (match_operand:DI 0 "register_operand")
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 189915)
+++ config/i386/i386.c (working copy)
@@ -17036,11 +17036,6 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
struct ix86_address parts;
int ok;
- /* FIXME: Handle zero-extended addresses. */
- if (GET_CODE (operands[1]) == ZERO_EXTEND
- || GET_CODE (operands[1]) == AND)
- return false;
-
/* Check we need to optimize. */
if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
return false;
@@ -17052,6 +17047,11 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
ok = ix86_decompose_address (operands[1], &parts);
gcc_assert (ok);
+ /* There should be at least two components in the address. */
+ if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
+ + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
+ return false;
+
/* We should not split into add if non legitimate pic
operand is used as displacement. */
if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
@@ -17124,7 +17124,7 @@ ix86_emit_binop (enum rtx_code code, enum machine_
It is assumed that it is allowed to clobber flags register
at lea position. */
-extern void
+void
ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
{
unsigned int regno0, regno1, regno2;
@@ -17135,7 +17135,7 @@ ix86_split_lea_for_addr (rtx operands[], enum mach
ok = ix86_decompose_address (operands[1], &parts);
gcc_assert (ok);
- target = operands[0];
+ target = gen_lowpart (mode, operands[0]);
regno0 = true_regnum (target);
regno1 = INVALID_REGNUM;
@@ -17143,18 +17143,19 @@ ix86_split_lea_for_addr (rtx operands[], enum mach
if (parts.base)
{
- if (GET_MODE (parts.base) != mode)
- parts.base = gen_lowpart (mode, parts.base);
+ parts.base = gen_lowpart (mode, parts.base);
regno1 = true_regnum (parts.base);
}
if (parts.index)
{
- if (GET_MODE (parts.index) != mode)
- parts.index = gen_lowpart (mode, parts.index);
+ parts.index = gen_lowpart (mode, parts.index);
regno2 = true_regnum (parts.index);
}
+ if (parts.disp)
+ parts.disp = gen_lowpart (mode, parts.disp);
+
if (parts.scale > 1)
{
/* Case r1 = r1 + ... */
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v3, i386]: Handle zero extended addresses in ix86_avoid_lea_for_addr
2012-07-27 17:29 ` [PATCH v2, " Uros Bizjak
2012-07-27 18:14 ` Uros Bizjak
@ 2012-07-28 9:26 ` Uros Bizjak
1 sibling, 0 replies; 4+ messages in thread
From: Uros Bizjak @ 2012-07-28 9:26 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 2276 bytes --]
On Fri, Jul 27, 2012 at 7:16 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> Attached patch enables ix86_avoid_lea_for_addr to process
>> zero-extended addresses. This patch should help atom performance,
>> especially in x32 mode.
>>
>> Please note the complication with insn re-recognition in
>> ix86_avoid_lea_for_addr, to solve the problem as described in the
>> comment:
>>
>> /* ix86_avoid_lea_for_addr re-recognizes insn and changes operands[]
>> array behind our backs. To make things worse, zero-extended oeprands
>> (zero_extend:DI (addr:SI)) are re-recognized as (addr:DI), since they
>> also satisfy operand constraints of one of many *lea<mode> insn patterns.
>
> Actually, the instruction gets re-recognized as
> *zero_extendsidi2_rex64, this is the reason why we got DImode
> (addr:DI) operand. This fact further uncovers existing problem with
> ix86_avoid_lea_for_addr. This function should not mark addresses
> having less than two operands for splitting. These patterns are
> re-recognized as MOV (and now as zero-extending MOVL) due to the
> approach, described in the comment above, and due to the fact that we
> define *mov{si,di} and *zero_extendsidi2_rex64 patterns before
> *lea<mode> in the i386.md.
>
> However, here is no point messing with these patterns in splitters,
> they are conditionally converted to LEAs at the insn emission phase
> (see i.e. *zero_extendsidi2_rex64 change in attached patch). The
> attached patch prevents splitting by a simple criteria function.
>
> As a bonus, the patch also includes conditional splitter for
> non-destructive zero-extended adds.
Here is what I have committed to mainline SVN.
2012-07-27 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.c (ix86_avoid_lea_for_addr): Handle
zero-extended addresses. Return false if the address has less
than two components.
(ix86_split_lea_for_addr): Unconditionally convert target and
all address operands to requested mode.
* config/i386/i386.md (*lea<mode>): Recover operands from curr_insn.
Pass SImode to ix86_split_lea_for_addr when splitting zero-extended
address.
(zero-extended add splitter): New splitter to conditionally split
non-destructive adds.
(*zero_extendsidi2_rex64): Conditionally emit leal instead of movl.
Uros.
[-- Attachment #2: p3.diff.txt --]
[-- Type: text/plain, Size: 5589 bytes --]
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 189915)
+++ config/i386/i386.c (working copy)
@@ -17036,11 +17036,6 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
struct ix86_address parts;
int ok;
- /* FIXME: Handle zero-extended addresses. */
- if (GET_CODE (operands[1]) == ZERO_EXTEND
- || GET_CODE (operands[1]) == AND)
- return false;
-
/* Check we need to optimize. */
if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
return false;
@@ -17052,6 +17047,11 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
ok = ix86_decompose_address (operands[1], &parts);
gcc_assert (ok);
+ /* There should be at least two components in the address. */
+ if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
+ + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
+ return false;
+
/* We should not split into add if non legitimate pic
operand is used as displacement. */
if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
@@ -17124,7 +17124,7 @@ ix86_emit_binop (enum rtx_code code, enum machine_
It is assumed that it is allowed to clobber flags register
at lea position. */
-extern void
+void
ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
{
unsigned int regno0, regno1, regno2;
@@ -17135,7 +17135,7 @@ ix86_split_lea_for_addr (rtx operands[], enum mach
ok = ix86_decompose_address (operands[1], &parts);
gcc_assert (ok);
- target = operands[0];
+ target = gen_lowpart (mode, operands[0]);
regno0 = true_regnum (target);
regno1 = INVALID_REGNUM;
@@ -17143,18 +17143,19 @@ ix86_split_lea_for_addr (rtx operands[], enum mach
if (parts.base)
{
- if (GET_MODE (parts.base) != mode)
- parts.base = gen_lowpart (mode, parts.base);
+ parts.base = gen_lowpart (mode, parts.base);
regno1 = true_regnum (parts.base);
}
if (parts.index)
{
- if (GET_MODE (parts.index) != mode)
- parts.index = gen_lowpart (mode, parts.index);
+ parts.index = gen_lowpart (mode, parts.index);
regno2 = true_regnum (parts.index);
}
+ if (parts.disp)
+ parts.disp = gen_lowpart (mode, parts.disp);
+
if (parts.scale > 1)
{
/* Case r1 = r1 + ... */
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 189915)
+++ config/i386/i386.md (working copy)
@@ -3474,13 +3474,28 @@
(match_operand:SI 1 "x86_64_zext_general_operand"
"rmWz,0,r ,m ,r ,m")))]
"TARGET_64BIT"
- "@
- mov{l}\t{%1, %k0|%k0, %1}
- #
- movd\t{%1, %0|%0, %1}
- movd\t{%1, %0|%0, %1}
- %vmovd\t{%1, %0|%0, %1}
- %vmovd\t{%1, %0|%0, %1}"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ if (ix86_use_lea_for_mov (insn, operands))
+ return "lea{l}\t{%E1, %k0|%k0, %E1}";
+ else
+ return "mov{l}\t{%1, %k0|%k0, %1}";
+
+ case TYPE_MULTI:
+ return "#";
+
+ case TYPE_MMXMOV:
+ return "movd\t{%1, %0|%0, %1}";
+
+ case TYPE_SSEMOV:
+ return "%vmovd\t{%1, %0|%0, %1}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
[(set_attr "type" "imovx,multi,mmxmov,mmxmov,ssemov,ssemov")
(set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex")
(set_attr "prefix_0f" "0,*,*,*,*,*")
@@ -5479,7 +5494,23 @@
"reload_completed && ix86_avoid_lea_for_addr (insn, operands)"
[(const_int 0)]
{
- ix86_split_lea_for_addr (operands, <MODE>mode);
+ enum machine_mode mode = <MODE>mode;
+ rtx pat;
+
+ /* ix86_avoid_lea_for_addr re-recognizes insn and may
+ change operands[] array behind our back. */
+ pat = PATTERN (curr_insn);
+
+ operands[0] = SET_DEST (pat);
+ operands[1] = SET_SRC (pat);
+
+ /* Emit all operations in SImode for zero-extended addresses. Recall
+ that x86_64 inheretly zero-extends SImode operations to DImode. */
+ if (GET_CODE (operands[1]) == ZERO_EXTEND
+ || GET_CODE (operands[1]) == AND)
+ mode = SImode;
+
+ ix86_split_lea_for_addr (operands, mode);
DONE;
}
[(set_attr "type" "lea")
@@ -5807,11 +5838,11 @@
(define_split
[(set (match_operand:SWI48 0 "register_operand")
(plus:SWI48 (match_operand:SWI48 1 "register_operand")
- (match_operand:SWI48 2 "nonmemory_operand")))
+ (match_operand:SWI48 2 "x86_64_nonmemory_operand")))
(clobber (reg:CC FLAGS_REG))]
"reload_completed && ix86_avoid_lea_for_add (insn, operands)"
[(set (match_dup 0) (match_dup 1))
- (parallel [(set (match_dup 0) (plus:<MODE> (match_dup 0) (match_dup 2)))
+ (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])])
;; Convert add to the lea pattern to avoid flags dependency.
@@ -5840,6 +5871,21 @@
DONE;
})
+;; Split non destructive adds if we cannot use lea.
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (zero_extend:DI
+ (plus:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "x86_64_nonmemory_operand"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && reload_completed && ix86_avoid_lea_for_add (insn, operands)"
+ [(set (match_dup 3) (match_dup 1))
+ (parallel [(set (match_dup 0)
+ (zero_extend:DI (plus:SI (match_dup 3) (match_dup 2))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[3] = gen_lowpart (SImode, operands[0]);")
+
;; Convert add to the lea pattern to avoid flags dependency.
(define_split
[(set (match_operand:DI 0 "register_operand")
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2012-07-28 9:20 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-27 9:35 [PATCH, i386]: Handle zero extended addresses in ix86_avoid_lea_for_addr Uros Bizjak
2012-07-27 17:29 ` [PATCH v2, " Uros Bizjak
2012-07-27 18:14 ` Uros Bizjak
2012-07-28 9:26 ` [PATCH v3, " Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).