* RFC: Add TARGET_EXPAND_COMPOUND_OPERATION
@ 2010-06-25 23:29 H.J. Lu
2010-06-28 10:44 ` Eric Botcazou
0 siblings, 1 reply; 6+ messages in thread
From: H.J. Lu @ 2010-06-25 23:29 UTC (permalink / raw)
To: gcc-patches
Hi,
x86 backend has special optimization for accessing
(zero_extract:SI (reg:M N) (const_int 8) (const_int 8))
However, combiner never exposes this to x86 backend. I added
a TARGET_EXPAND_COMPOUND_OPERATION hook to allow x86 backend to
optimize it. For
---
typedef struct
{
unsigned char c1;
unsigned char c2;
unsigned char c3;
unsigned char c4;
} foo_t;
int
foo (foo_t x)
{
return x.c2 > 4;
}
---
it generates:
movl %edi, %eax
cmpb $4, %ah
seta %al
movzbl %al, %eax
ret
instead of
movl %edi, %eax
movzbl %ah, %edi
xorl %eax, %eax
cmpb $4, %dil
seta %al
ret
Any comments?
Thanks.
H.J.
---
diff --git a/gcc/combine.c b/gcc/combine.c
index 1bee2c7..34f4f76 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -396,7 +396,6 @@ static rtx combine_simplify_rtx (rtx, enum machine_mode, int);
static rtx simplify_if_then_else (rtx);
static rtx simplify_set (rtx);
static rtx simplify_logical (rtx);
-static rtx expand_compound_operation (rtx);
static const_rtx expand_field_assignment (const_rtx);
static rtx make_extraction (enum machine_mode, rtx, HOST_WIDE_INT,
rtx, unsigned HOST_WIDE_INT, int, int, int);
@@ -5085,7 +5084,7 @@ combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
break;
case NEG:
- temp = expand_compound_operation (XEXP (x, 0));
+ temp = targetm.expand_compound_operation (XEXP (x, 0));
/* For C equal to the width of MODE minus 1, (neg (ashiftrt X C)) can be
replaced by (lshiftrt X C). This will convert
@@ -5322,7 +5321,7 @@ combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
&& mode == GET_MODE (op0)
&& nonzero_bits (op0, mode) == 1)
return gen_lowpart (mode,
- expand_compound_operation (op0));
+ targetm.expand_compound_operation (op0));
else if (STORE_FLAG_VALUE == 1
&& new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
@@ -5331,7 +5330,7 @@ combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
&& (num_sign_bit_copies (op0, mode)
== GET_MODE_BITSIZE (mode)))
{
- op0 = expand_compound_operation (op0);
+ op0 = targetm.expand_compound_operation (op0);
return simplify_gen_unary (NEG, mode,
gen_lowpart (mode, op0),
mode);
@@ -5343,7 +5342,7 @@ combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
&& mode == GET_MODE (op0)
&& nonzero_bits (op0, mode) == 1)
{
- op0 = expand_compound_operation (op0);
+ op0 = targetm.expand_compound_operation (op0);
return simplify_gen_binary (XOR, mode,
gen_lowpart (mode, op0),
const1_rtx);
@@ -5356,7 +5355,7 @@ combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
&& (num_sign_bit_copies (op0, mode)
== GET_MODE_BITSIZE (mode)))
{
- op0 = expand_compound_operation (op0);
+ op0 = targetm.expand_compound_operation (op0);
return plus_constant (gen_lowpart (mode, op0), 1);
}
@@ -5368,7 +5367,7 @@ combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
&& (num_sign_bit_copies (op0, mode)
== GET_MODE_BITSIZE (mode)))
return gen_lowpart (mode,
- expand_compound_operation (op0));
+ targetm.expand_compound_operation (op0));
else if (STORE_FLAG_VALUE == -1
&& new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
@@ -5376,7 +5375,7 @@ combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
&& mode == GET_MODE (op0)
&& nonzero_bits (op0, mode) == 1)
{
- op0 = expand_compound_operation (op0);
+ op0 = targetm.expand_compound_operation (op0);
return simplify_gen_unary (NEG, mode,
gen_lowpart (mode, op0),
mode);
@@ -5389,7 +5388,7 @@ combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
&& (num_sign_bit_copies (op0, mode)
== GET_MODE_BITSIZE (mode)))
{
- op0 = expand_compound_operation (op0);
+ op0 = targetm.expand_compound_operation (op0);
return simplify_gen_unary (NOT, mode,
gen_lowpart (mode, op0),
mode);
@@ -5402,7 +5401,7 @@ combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
&& mode == GET_MODE (op0)
&& nonzero_bits (op0, mode) == 1)
{
- op0 = expand_compound_operation (op0);
+ op0 = targetm.expand_compound_operation (op0);
return plus_constant (gen_lowpart (mode, op0), -1);
}
@@ -5420,7 +5419,7 @@ combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
&& (i = exact_log2 (nonzero_bits (op0, mode))) >= 0)
{
x = simplify_shift_const (NULL_RTX, ASHIFT, mode,
- expand_compound_operation (op0),
+ targetm.expand_compound_operation (op0),
GET_MODE_BITSIZE (mode) - 1 - i);
if (GET_CODE (x) == AND && XEXP (x, 1) == const_true_rtx)
return XEXP (x, 0);
@@ -5450,7 +5449,7 @@ combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
if (in_dest)
return x;
- return expand_compound_operation (x);
+ return targetm.expand_compound_operation (x);
case SET:
return simplify_set (x);
@@ -6248,7 +6247,7 @@ simplify_logical (rtx x)
consisting of shifts and ANDs into the equivalent compound expression.
It is the inverse of this function, loosely speaking. */
-static rtx
+rtx
expand_compound_operation (rtx x)
{
unsigned HOST_WIDE_INT pos = 0, len;
@@ -6347,7 +6346,7 @@ expand_compound_operation (rtx x)
== 0)))
{
rtx temp = gen_rtx_ZERO_EXTEND (GET_MODE (x), XEXP (x, 0));
- rtx temp2 = expand_compound_operation (temp);
+ rtx temp2 = targetm.expand_compound_operation (temp);
/* Make sure this is a profitable operation. */
if (rtx_cost (x, SET, optimize_this_for_speed_p)
@@ -7609,7 +7608,7 @@ force_to_mode (rtx x, enum machine_mode mode, unsigned HOST_WIDE_INT mask,
case ZERO_EXTEND:
case ZERO_EXTRACT:
case SIGN_EXTRACT:
- x = expand_compound_operation (x);
+ x = targetm.expand_compound_operation (x);
if (GET_CODE (x) != code)
return force_to_mode (x, mode, mask, next_select);
break;
@@ -8564,8 +8563,8 @@ make_field_assignment (rtx x)
if (GET_CODE (src) != IOR && GET_CODE (src) != XOR)
return x;
- rhs = expand_compound_operation (XEXP (src, 0));
- lhs = expand_compound_operation (XEXP (src, 1));
+ rhs = targetm.expand_compound_operation (XEXP (src, 0));
+ lhs = targetm.expand_compound_operation (XEXP (src, 1));
if (GET_CODE (rhs) == AND
&& CONST_INT_P (XEXP (rhs, 1))
@@ -8650,8 +8649,8 @@ apply_distributive_law (rtx x)
if (OBJECT_P (lhs) || OBJECT_P (rhs))
return x;
- lhs = expand_compound_operation (lhs);
- rhs = expand_compound_operation (rhs);
+ lhs = targetm.expand_compound_operation (lhs);
+ rhs = targetm.expand_compound_operation (rhs);
inner_code = GET_CODE (lhs);
if (inner_code != GET_CODE (rhs))
return x;
@@ -9434,7 +9433,7 @@ simplify_shift_const_1 (enum rtx_code code, enum machine_mode result_mode,
case ZERO_EXTEND:
case SIGN_EXTRACT:
case ZERO_EXTRACT:
- new_rtx = expand_compound_operation (varop);
+ new_rtx = targetm.expand_compound_operation (varop);
if (new_rtx != varop)
{
varop = new_rtx;
@@ -10716,7 +10715,7 @@ simplify_comparison (enum rtx_code code, rtx *pop0, rtx *pop1)
/* ... fall through ... */
case SIGN_EXTRACT:
- tem = expand_compound_operation (op0);
+ tem = targetm.expand_compound_operation (op0);
if (tem != op0)
{
op0 = tem;
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 8957fe2..181fa06 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -30673,6 +30673,33 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree)
return 0;
}
+
+/* Optimize conversion of ZERO_EXTRACT, SIGN_EXTRACT, ZERO_EXTEND and
+ SIGN_EXTEND for combiner. */
+
+static rtx
+ix86_expand_compound_operation (rtx x)
+{
+ /* Don't convert:
+
+ (zero_extract:SI (reg:M N) (const_int 8) (const_int 8))
+
+ since we have special patterns to access upper 8bit registers. */
+
+ if (GET_CODE (x) == ZERO_EXTRACT
+ && GET_MODE (x) == SImode
+ && GET_CODE (XEXP (x, 0)) != CLOBBER
+ && GET_MODE (XEXP (x, 0)) != VOIDmode
+ && SCALAR_INT_MODE_P (GET_MODE (XEXP (x, 0)))
+ && CONST_INT_P (XEXP (x, 1))
+ && CONST_INT_P (XEXP (x, 2))
+ && INTVAL (XEXP (x, 1)) == 8
+ && INTVAL (XEXP (x, 2)) == 8)
+ return x;
+
+ return expand_compound_operation (x);
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
@@ -30943,6 +30970,9 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree)
#undef TARGET_ASM_CODE_END
#define TARGET_ASM_CODE_END ix86_code_end
+#undef TARGET_EXPAND_COMPOUND_OPERATION
+#define TARGET_EXPAND_COMPOUND_OPERATION ix86_expand_compound_operation
+
struct gcc_target targetm = TARGET_INITIALIZER;
\f
#include "gt-i386.h"
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 87329e0..f92b68a 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -1474,6 +1474,11 @@ This hook allows the backend to perform additional instantiations on rtl
that are not actually in any insns yet, but will be later.
@end deftypefn
+@deftypefn {Target Hook} rtx TARGET_EXPAND_COMPOUND_OPERATION (rtx)
+This hook allows the backend to optimize conversion of ZERO_EXTRACT,
+SIGN_EXTRACT, ZERO_EXTEND and SIGN_EXTEND for combiner.
+@end deftypefn
+
@deftypefn {Target Hook} {const char *} TARGET_MANGLE_TYPE (const_tree @var{type})
If your target defines any fundamental types, or any types your target
uses should be mangled differently from the default, define this hook
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 6be88d1..053e3de 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -2233,6 +2233,7 @@ extern bool validate_subreg (enum machine_mode, enum machine_mode,
const_rtx, unsigned int);
/* In combine.c */
+extern rtx expand_compound_operation (rtx);
extern unsigned int extended_count (const_rtx, enum machine_mode, int);
extern rtx remove_death (unsigned int, rtx);
extern void dump_combine_stats (FILE *);
diff --git a/gcc/target-def.h b/gcc/target-def.h
index 1aaf38c..0c0b4fc 100644
--- a/gcc/target-def.h
+++ b/gcc/target-def.h
@@ -488,6 +488,9 @@
/* In tree-ssa-math-opts.c */
#define TARGET_BUILTIN_RECIPROCAL default_builtin_reciprocal
+/* In combine.c. */
+#define TARGET_EXPAND_COMPOUND_OPERATION expand_compound_operation
+
/* In varasm.c. */
#ifndef TARGET_SECTION_TYPE_FLAGS
#define TARGET_SECTION_TYPE_FLAGS default_section_type_flags
@@ -1070,6 +1073,7 @@
TARGET_SECONDARY_RELOAD, \
TARGET_EXPAND_TO_RTL_HOOK, \
TARGET_INSTANTIATE_DECLS, \
+ TARGET_EXPAND_COMPOUND_OPERATION, \
TARGET_HARD_REGNO_SCRATCH_OK, \
TARGET_CASE_VALUES_THRESHOLD, \
TARGET_FRAME_POINTER_REQUIRED, \
diff --git a/gcc/target.h b/gcc/target.h
index 2f181eb..0ff890a 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -1108,6 +1108,10 @@ struct gcc_target
but will be later. */
void (* instantiate_decls) (void);
+ /* Used by combiner to convert ZERO_EXTRACT, SIGN_EXTRACT, ZERO_EXTEND
+ and SIGN_EXTEND into basic operations. */
+ rtx (* expand_compound_operation) (rtx);
+
/* Return true if is OK to use a hard register REGNO as scratch register
in peephole2. */
bool (* hard_regno_scratch_ok) (unsigned int regno);
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: RFC: Add TARGET_EXPAND_COMPOUND_OPERATION
2010-06-25 23:29 RFC: Add TARGET_EXPAND_COMPOUND_OPERATION H.J. Lu
@ 2010-06-28 10:44 ` Eric Botcazou
2010-06-28 15:59 ` H.J. Lu
0 siblings, 1 reply; 6+ messages in thread
From: Eric Botcazou @ 2010-06-28 10:44 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 924 bytes --]
> However, combiner never exposes this to x86 backend. I added
> a TARGET_EXPAND_COMPOUND_OPERATION hook to allow x86 backend to
> optimize it. For
>
> ---
> typedef struct
> {
> unsigned char c1;
> unsigned char c2;
> unsigned char c3;
> unsigned char c4;
> } foo_t;
>
> int
> foo (foo_t x)
> {
> return x.c2 > 4;
> }
I think that disabling the canonicalization done by expand_compound_operation
can disable certain forms of combining that are beneficial to x86 even for
this kind of patterns.
The combiner already knows that it needs to re-create the compound forms when
it is trying to simplify a comparison, see simplify_comparison. The problem
with your testcase is that make_compound_operation fails to do so.
Lightly tested patch attached.
* combine.c (make_compound_operation) <SUBREG>: Do not return the
result of force_to_mode if it partially re-expanded the compound.
--
Eric Botcazou
[-- Attachment #2: p.diff --]
[-- Type: text/x-diff, Size: 2228 bytes --]
Index: combine.c
===================================================================
--- combine.c (revision 161470)
+++ combine.c (working copy)
@@ -7277,22 +7277,21 @@ make_compound_operation (rtx x, enum rtx
/* Call ourselves recursively on the inner expression. If we are
narrowing the object and it has a different RTL code from
what it originally did, do this SUBREG as a force_to_mode. */
-
- tem = make_compound_operation (SUBREG_REG (x), in_code);
-
{
- rtx simplified = simplify_subreg (mode, tem, GET_MODE (SUBREG_REG (x)),
- SUBREG_BYTE (x));
+ rtx inner = SUBREG_REG (x), simplified;
+
+ tem = make_compound_operation (inner, in_code);
+ simplified
+ = simplify_subreg (mode, tem, GET_MODE (inner), SUBREG_BYTE (x));
if (simplified)
tem = simplified;
- if (GET_CODE (tem) != GET_CODE (SUBREG_REG (x))
- && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))
+ if (GET_CODE (tem) != GET_CODE (inner)
+ && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (inner))
&& subreg_lowpart_p (x))
{
- rtx newer = force_to_mode (tem, mode, ~(HOST_WIDE_INT) 0,
- 0);
+ rtx newer = force_to_mode (tem, mode, ~(HOST_WIDE_INT) 0, 0);
/* If we have something other than a SUBREG, we might have
done an expansion, so rerun ourselves. */
@@ -7300,9 +7299,16 @@ make_compound_operation (rtx x, enum rtx
newer = make_compound_operation (newer, in_code);
/* force_to_mode can expand compounds. If it just re-expanded the
- compound use gen_lowpart instead to convert to the desired
- mode. */
- if (rtx_equal_p (newer, x))
+ compound, use gen_lowpart to convert to the desired mode. */
+ if (rtx_equal_p (newer, x)
+ /* Likewise if it re-expanded the compound only partially.
+ This happens for SUBREG of ZERO_EXTRACT if they extract
+ the same number of bits. */
+ || (GET_CODE (newer) == SUBREG
+ && (GET_CODE (SUBREG_REG (newer)) == LSHIFTRT
+ || GET_CODE (SUBREG_REG (newer)) == ASHIFTRT)
+ && GET_CODE (inner) == AND
+ && rtx_equal_p (SUBREG_REG (newer), XEXP (inner, 0))))
return gen_lowpart (GET_MODE (x), tem);
return newer;
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: RFC: Add TARGET_EXPAND_COMPOUND_OPERATION
2010-06-28 10:44 ` Eric Botcazou
@ 2010-06-28 15:59 ` H.J. Lu
2010-06-28 18:39 ` H.J. Lu
0 siblings, 1 reply; 6+ messages in thread
From: H.J. Lu @ 2010-06-28 15:59 UTC (permalink / raw)
To: Eric Botcazou; +Cc: gcc-patches
On Mon, Jun 28, 2010 at 3:05 AM, Eric Botcazou <ebotcazou@adacore.com> wrote:
>> However, combiner never exposes this to x86 backend. I added
>> a TARGET_EXPAND_COMPOUND_OPERATION hook to allow x86 backend to
>> optimize it. For
>>
>> ---
>> typedef struct
>> {
>> unsigned char c1;
>> unsigned char c2;
>> unsigned char c3;
>> unsigned char c4;
>> } foo_t;
>>
>> int
>> foo (foo_t x)
>> {
>> return x.c2 > 4;
>> }
>
> I think that disabling the canonicalization done by expand_compound_operation
> can disable certain forms of combining that are beneficial to x86 even for
> this kind of patterns.
>
> The combiner already knows that it needs to re-create the compound forms when
> it is trying to simplify a comparison, see simplify_comparison. The problem
> with your testcase is that make_compound_operation fails to do so.
>
> Lightly tested patch attached.
>
>
> * combine.c (make_compound_operation) <SUBREG>: Do not return the
> result of force_to_mode if it partially re-expanded the compound.
>
It works on my testcases. I will do a full bootstrap and test on
Linux/x86-64.
Thanks.
--
H.J.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: RFC: Add TARGET_EXPAND_COMPOUND_OPERATION
2010-06-28 15:59 ` H.J. Lu
@ 2010-06-28 18:39 ` H.J. Lu
2010-06-28 18:57 ` H.J. Lu
0 siblings, 1 reply; 6+ messages in thread
From: H.J. Lu @ 2010-06-28 18:39 UTC (permalink / raw)
To: Eric Botcazou; +Cc: gcc-patches
On Mon, Jun 28, 2010 at 7:28 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Mon, Jun 28, 2010 at 3:05 AM, Eric Botcazou <ebotcazou@adacore.com> wrote:
>>> However, combiner never exposes this to x86 backend. I added
>>> a TARGET_EXPAND_COMPOUND_OPERATION hook to allow x86 backend to
>>> optimize it. For
>>>
>>> ---
>>> typedef struct
>>> {
>>> unsigned char c1;
>>> unsigned char c2;
>>> unsigned char c3;
>>> unsigned char c4;
>>> } foo_t;
>>>
>>> int
>>> foo (foo_t x)
>>> {
>>> return x.c2 > 4;
>>> }
>>
>> I think that disabling the canonicalization done by expand_compound_operation
>> can disable certain forms of combining that are beneficial to x86 even for
>> this kind of patterns.
>>
>> The combiner already knows that it needs to re-create the compound forms when
>> it is trying to simplify a comparison, see simplify_comparison. The problem
>> with your testcase is that make_compound_operation fails to do so.
>>
>> Lightly tested patch attached.
>>
>>
>> * combine.c (make_compound_operation) <SUBREG>: Do not return the
>> result of force_to_mode if it partially re-expanded the compound.
>>
>
> It works on my testcases. I will do a full bootstrap and test on
> Linux/x86-64.
>
There are no regressions. Can you install it?
Thanks.
--
H.J.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: RFC: Add TARGET_EXPAND_COMPOUND_OPERATION
2010-06-28 18:39 ` H.J. Lu
@ 2010-06-28 18:57 ` H.J. Lu
2010-06-29 12:05 ` Eric Botcazou
0 siblings, 1 reply; 6+ messages in thread
From: H.J. Lu @ 2010-06-28 18:57 UTC (permalink / raw)
To: Eric Botcazou; +Cc: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 2083 bytes --]
On Mon, Jun 28, 2010 at 10:21 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Mon, Jun 28, 2010 at 7:28 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Mon, Jun 28, 2010 at 3:05 AM, Eric Botcazou <ebotcazou@adacore.com> wrote:
>>>> However, combiner never exposes this to x86 backend. I added
>>>> a TARGET_EXPAND_COMPOUND_OPERATION hook to allow x86 backend to
>>>> optimize it. For
>>>>
>>>> ---
>>>> typedef struct
>>>> {
>>>> unsigned char c1;
>>>> unsigned char c2;
>>>> unsigned char c3;
>>>> unsigned char c4;
>>>> } foo_t;
>>>>
>>>> int
>>>> foo (foo_t x)
>>>> {
>>>> return x.c2 > 4;
>>>> }
>>>
>>> I think that disabling the canonicalization done by expand_compound_operation
>>> can disable certain forms of combining that are beneficial to x86 even for
>>> this kind of patterns.
>>>
>>> The combiner already knows that it needs to re-create the compound forms when
>>> it is trying to simplify a comparison, see simplify_comparison. The problem
>>> with your testcase is that make_compound_operation fails to do so.
>>>
>>> Lightly tested patch attached.
>>>
>>>
>>> * combine.c (make_compound_operation) <SUBREG>: Do not return the
>>> result of force_to_mode if it partially re-expanded the compound.
>>>
>>
>> It works on my testcases. I will do a full bootstrap and test on
>> Linux/x86-64.
>>
>
> There are no regressions. Can you install it?
>
Here is the complete patch with testcases.
Thanks.
--
H.J.
---
gcc/
2010-06-28 Eric Botcazou <ebotcazou@adacore.com>
PR rtl-optimization/44659
* combine.c (make_compound_operation) <SUBREG>: Do not return the
result of force_to_mode if it partially re-expanded the compound.
gcc/testsuite/
2010-06-28 H.J. Lu <hongjiu.lu@intel.com>
PR rtl-optimization/44659
* gcc.target/i386/extract-1.c: New.
* gcc.target/i386/extract-2.c: Likewise.
* gcc.target/i386/extract-3.c: Likewise.
* gcc.target/i386/extract-4.c: Likewise.
* gcc.target/i386/extract-5.c: Likewise.
* gcc.target/i386/extract-6.c: Likewise.
[-- Attachment #2: gcc-pr44659-2.patch --]
[-- Type: text/plain, Size: 6278 bytes --]
gcc/
2010-06-28 Eric Botcazou <ebotcazou@adacore.com>
PR rtl-optimization/44659
* combine.c (make_compound_operation) <SUBREG>: Do not return the
result of force_to_mode if it partially re-expanded the compound.
gcc/testsuite/
2010-06-28 H.J. Lu <hongjiu.lu@intel.com>
PR rtl-optimization/44659
* gcc.target/i386/extract-1.c: New.
* gcc.target/i386/extract-2.c: Likewise.
* gcc.target/i386/extract-3.c: Likewise.
* gcc.target/i386/extract-4.c: Likewise.
* gcc.target/i386/extract-5.c: Likewise.
* gcc.target/i386/extract-6.c: Likewise.
diff --git a/gcc/combine.c b/gcc/combine.c
index 1bee2c7..d3305cb 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -7277,22 +7277,21 @@ make_compound_operation (rtx x, enum rtx_code in_code)
/* Call ourselves recursively on the inner expression. If we are
narrowing the object and it has a different RTL code from
what it originally did, do this SUBREG as a force_to_mode. */
-
- tem = make_compound_operation (SUBREG_REG (x), in_code);
-
{
- rtx simplified = simplify_subreg (mode, tem, GET_MODE (SUBREG_REG (x)),
- SUBREG_BYTE (x));
+ rtx inner = SUBREG_REG (x), simplified;
+
+ tem = make_compound_operation (inner, in_code);
+ simplified
+ = simplify_subreg (mode, tem, GET_MODE (inner), SUBREG_BYTE (x));
if (simplified)
tem = simplified;
- if (GET_CODE (tem) != GET_CODE (SUBREG_REG (x))
- && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))
+ if (GET_CODE (tem) != GET_CODE (inner)
+ && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (inner))
&& subreg_lowpart_p (x))
{
- rtx newer = force_to_mode (tem, mode, ~(HOST_WIDE_INT) 0,
- 0);
+ rtx newer = force_to_mode (tem, mode, ~(HOST_WIDE_INT) 0, 0);
/* If we have something other than a SUBREG, we might have
done an expansion, so rerun ourselves. */
@@ -7300,9 +7299,16 @@ make_compound_operation (rtx x, enum rtx_code in_code)
newer = make_compound_operation (newer, in_code);
/* force_to_mode can expand compounds. If it just re-expanded the
- compound use gen_lowpart instead to convert to the desired
- mode. */
- if (rtx_equal_p (newer, x))
+ compound, use gen_lowpart to convert to the desired mode. */
+ if (rtx_equal_p (newer, x)
+ /* Likewise if it re-expanded the compound only partially.
+ This happens for SUBREG of ZERO_EXTRACT if they extract
+ the same number of bits. */
+ || (GET_CODE (newer) == SUBREG
+ && (GET_CODE (SUBREG_REG (newer)) == LSHIFTRT
+ || GET_CODE (SUBREG_REG (newer)) == ASHIFTRT)
+ && GET_CODE (inner) == AND
+ && rtx_equal_p (SUBREG_REG (newer), XEXP (inner, 0))))
return gen_lowpart (GET_MODE (x), tem);
return newer;
diff --git a/gcc/testsuite/gcc.target/i386/extract-1.c b/gcc/testsuite/gcc.target/i386/extract-1.c
new file mode 100644
index 0000000..102beb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/extract-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic" } */
+
+int
+foo (unsigned char x, unsigned char y)
+{
+ return (x % y) != 0;
+}
+
+/* { dg-final { scan-assembler-not "test\[b\]?\[^\\n\]*%\[a-d\]l" } } */
diff --git a/gcc/testsuite/gcc.target/i386/extract-2.c b/gcc/testsuite/gcc.target/i386/extract-2.c
new file mode 100644
index 0000000..3bb5f15
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/extract-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic" } */
+
+int
+foo (unsigned char x, unsigned char y)
+{
+ return (x % y) > 4;
+}
+
+/* { dg-final { scan-assembler-times "cmp\[b\]?\[^\\n\]*%\[a-d\]h" 1 } } */
+/* { dg-final { scan-assembler-not "cmp\[b\]?\[^\\n\]*%\[a-d\]l" } } */
diff --git a/gcc/testsuite/gcc.target/i386/extract-3.c b/gcc/testsuite/gcc.target/i386/extract-3.c
new file mode 100644
index 0000000..520bf3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/extract-3.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic" } */
+
+typedef struct
+{
+ unsigned char c1;
+ unsigned char c2;
+ unsigned char c3;
+ unsigned char c4;
+} foo_t;
+
+int
+#ifndef __x86_64__
+__attribute__((regparm(3)))
+#endif
+foo (foo_t x)
+{
+ return x.c2 != 0;
+}
+
+/* { dg-final { scan-assembler-not "test\[b\]?\[^\\n\]*%\[a-z0-9\]+l" } } */
diff --git a/gcc/testsuite/gcc.target/i386/extract-4.c b/gcc/testsuite/gcc.target/i386/extract-4.c
new file mode 100644
index 0000000..716ae22
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/extract-4.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic" } */
+
+typedef struct
+{
+ unsigned char c1;
+ unsigned char c2;
+ unsigned char c3;
+ unsigned char c4;
+} foo_t;
+
+int
+#ifndef __x86_64__
+__attribute__((regparm(3)))
+#endif
+foo (foo_t x)
+{
+ return x.c2 > 4;
+}
+
+/* { dg-final { scan-assembler-times "cmp\[b\]?\[^\\n\]*%\[a-z0-9\]+h" 1 } } */
+/* { dg-final { scan-assembler-not "cmp\[b\]?\[^\\n\]*%\[a-z0-9\]+l" } } */
diff --git a/gcc/testsuite/gcc.target/i386/extract-5.c b/gcc/testsuite/gcc.target/i386/extract-5.c
new file mode 100644
index 0000000..a488daf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/extract-5.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic" } */
+
+typedef struct
+{
+ unsigned int c1:8;
+ unsigned int c2:8;
+ unsigned int c3:8;
+ unsigned int c4:8;
+} foo_t;
+
+int
+#ifndef __x86_64__
+__attribute__((regparm(3)))
+#endif
+foo (foo_t x)
+{
+ return x.c2 != 0;
+}
+
+/* { dg-final { scan-assembler-not "test\[b\]?\[^\\n\]*%\[a-z0-9\]+l" } } */
diff --git a/gcc/testsuite/gcc.target/i386/extract-6.c b/gcc/testsuite/gcc.target/i386/extract-6.c
new file mode 100644
index 0000000..1440ec3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/extract-6.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic" } */
+
+typedef struct
+{
+ unsigned int c1:8;
+ unsigned int c2:8;
+ unsigned int c3:8;
+ unsigned int c4:8;
+
+} foo_t;
+
+int
+#ifndef __x86_64__
+__attribute__((regparm(3)))
+#endif
+foo (foo_t x)
+{
+ return x.c2 > 4;
+}
+
+/* { dg-final { scan-assembler-times "cmp\[b\]?\[^\\n\]*%\[a-z0-9\]+h" 1 } } */
+/* { dg-final { scan-assembler-not "cmp\[b\]?\[^\\n\]*%\[a-z0-9\]+l" } } */
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: RFC: Add TARGET_EXPAND_COMPOUND_OPERATION
2010-06-28 18:57 ` H.J. Lu
@ 2010-06-29 12:05 ` Eric Botcazou
0 siblings, 0 replies; 6+ messages in thread
From: Eric Botcazou @ 2010-06-29 12:05 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
> Here is the complete patch with testcases.
Thanks for the testing, now installed.
--
Eric Botcazou
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2010-06-29 9:26 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-06-25 23:29 RFC: Add TARGET_EXPAND_COMPOUND_OPERATION H.J. Lu
2010-06-28 10:44 ` Eric Botcazou
2010-06-28 15:59 ` H.J. Lu
2010-06-28 18:39 ` H.J. Lu
2010-06-28 18:57 ` H.J. Lu
2010-06-29 12:05 ` Eric Botcazou
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).