* [PATCH, i386]: Optimization of the i386 and x86_64 compilers
@ 2007-03-02 11:01 Uros Bizjak
2007-03-02 12:03 ` Jan Hubicka
2007-03-02 18:38 ` Richard Henderson
0 siblings, 2 replies; 7+ messages in thread
From: Uros Bizjak @ 2007-03-02 11:01 UTC (permalink / raw)
To: GCC Patches
[-- Attachment #1: Type: text/plain, Size: 1777 bytes --]
Hello!
Attached patch implements the idea, proposed by Michael Meissner in PR
31019. The core of his idea is to substitute (1 << ix86_[tune|arch])
in x86_some_var & (1 << ix86_[tune|arch])) by a precalculated global
variable.
The results of this patch are quite suprising as the text size of cc1
on i686 host was lowered considerably:
size cc1
text data bss dec hex filename
7632793 18876 574516 8226185 7d8589 cc1
size cc1-patched
text data bss dec hex filename
6731749 18876 574516 7325141 6fc5d5 cc1-patched
Yes, for 901k.
This effect could be partially attributed to the way i386 handles
variable shifts. Unpached gcc uses code that also clobbers %ecx:
8554612: 8b 0d 9c a1 81 08 mov 0x881a19c,%ecx
8554618: a1 a4 4b 72 08 mov 0x8724ba4,%eax
855461d: d3 f8 sar %cl,%eax
855461f: a8 01 test $0x1,%al
Patched gcc uses following code:
8498762: a1 a0 e1 73 08 mov 0x873e1a0,%eax
8498767: 85 05 04 8c 64 08 test %eax,0x8648c04
2007-03-02 Uros Bizjak <ubizjak@gmail.com>
Michael Meissner <michael.meissner@amd.com>
* config/i386/i386.h (TUNEMASK): Redefine to use ix86_tune_mask.
(ARCHMASK): Define.
(TARGET_CMOVE): Use ARCHMASK.
(TARGET_CMPXCHG): Ditto.
(TARGET_CMPXCHG8B): Ditto.
(TARGET_XADD): Ditto.
(TARGET_BSWAP): Ditto.
* config/i386/i386.c (ix86_tune_mask): New global variable.
(ix86_arch_mask): Ditto.
(override_options): Initialize ix86_tune_mask and
ix86_arch_mask. Use ARCHMASK to clear MASK_NO_FANCY_MATH_387 in
target_flags.
The patch was bootstrapped on i686-pc-linux-gnu, regression tested for
c, c++ and gfortran.
BTW: The patch also rearranges processors #defines into more logical sequence.
Uros.
[-- Attachment #2: i386-mask.diff --]
[-- Type: application/octet-stream, Size: 4876 bytes --]
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h (revision 122469)
+++ config/i386/i386.h (working copy)
@@ -179,7 +179,6 @@
#define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
#define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
-#define TUNEMASK (1 << ix86_tune)
extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
extern const int x86_use_bit_test, x86_cmove, x86_deep_branch;
extern const int x86_branch_hints, x86_unroll_strlen;
@@ -208,6 +207,9 @@
extern const int x86_partial_flag_reg_stall;
extern int x86_prefetch_sse, x86_cmpxchg16b;
+#define TUNEMASK ix86_tune_mask
+#define ARCHMASK ix86_arch_mask
+
#define TARGET_USE_LEAVE (x86_use_leave & TUNEMASK)
#define TARGET_PUSH_MEMORY (x86_push_memory & TUNEMASK)
#define TARGET_ZERO_EXTEND_WITH_AND (x86_zero_extend_with_and & TUNEMASK)
@@ -215,7 +217,7 @@
#define TARGET_UNROLL_STRLEN (x86_unroll_strlen & TUNEMASK)
/* For sane SSE instruction set generation we need fcomi instruction. It is
safe to enable all CMOVE instructions. */
-#define TARGET_CMOVE ((x86_cmove & (1 << ix86_arch)) || TARGET_SSE)
+#define TARGET_CMOVE ((x86_cmove & ARCHMASK) || TARGET_SSE)
#define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387)
#define TARGET_DEEP_BRANCH_PREDICTION (x86_deep_branch & TUNEMASK)
#define TARGET_BRANCH_PREDICTION_HINTS (x86_branch_hints & TUNEMASK)
@@ -276,11 +278,11 @@
#define TARGET_ANY_GNU_TLS (TARGET_GNU_TLS || TARGET_GNU2_TLS)
#define TARGET_SUN_TLS (ix86_tls_dialect == TLS_DIALECT_SUN)
-#define TARGET_CMPXCHG (x86_cmpxchg & (1 << ix86_arch))
-#define TARGET_CMPXCHG8B (x86_cmpxchg8b & (1 << ix86_arch))
+#define TARGET_CMPXCHG (x86_cmpxchg & ARCHMASK)
+#define TARGET_CMPXCHG8B (x86_cmpxchg8b & ARCHMASK)
#define TARGET_CMPXCHG16B (x86_cmpxchg16b)
-#define TARGET_XADD (x86_xadd & (1 << ix86_arch))
-#define TARGET_BSWAP (x86_bswap & (1 << ix86_arch))
+#define TARGET_XADD (x86_xadd & ARCHMASK)
+#define TARGET_BSWAP (x86_bswap & ARCHMASK)
#ifndef TARGET_64BIT_DEFAULT
#define TARGET_64BIT_DEFAULT 0
@@ -2130,7 +2132,10 @@
};
extern enum processor_type ix86_tune;
+extern int ix86_tune_mask;
+
extern enum processor_type ix86_arch;
+extern int ix86_arch_mask;
enum fpmath_unit
{
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 122469)
+++ config/i386/i386.c (working copy)
@@ -984,23 +984,25 @@
#define m_486 (1<<PROCESSOR_I486)
#define m_PENT (1<<PROCESSOR_PENTIUM)
#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
+#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
+#define m_NOCONA (1<<PROCESSOR_NOCONA)
+#define m_CORE2 (1<<PROCESSOR_CORE2)
+
#define m_GEODE (1<<PROCESSOR_GEODE)
+#define m_K6 (1<<PROCESSOR_K6)
#define m_K6_GEODE (m_K6 | m_GEODE)
-#define m_K6 (1<<PROCESSOR_K6)
+#define m_K8 (1<<PROCESSOR_K8)
#define m_ATHLON (1<<PROCESSOR_ATHLON)
-#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
-#define m_K8 (1<<PROCESSOR_K8)
#define m_ATHLON_K8 (m_K8 | m_ATHLON)
#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
-#define m_NOCONA (1<<PROCESSOR_NOCONA)
-#define m_CORE2 (1<<PROCESSOR_CORE2)
+#define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
+
#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
-#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
-#define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
/* Generic instruction choice should be common subset of supported CPUs
(PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
+#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
Generic64 seems like good code size tradeoff. We can't enable it for 32bit
@@ -1395,8 +1397,11 @@
/* Which cpu are we scheduling for. */
enum processor_type ix86_tune;
+int ix86_tune_mask;
+
/* Which instruction set architecture to use. */
enum processor_type ix86_arch;
+int ix86_arch_mask;
/* true if sse prefetch instruction is not NOOP. */
int x86_prefetch_sse;
@@ -2074,8 +2079,10 @@
if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
{
ix86_arch = processor_alias_table[i].processor;
+ ix86_arch_mask = 1 << ix86_arch;
/* Default cpu tuning to the architecture. */
ix86_tune = ix86_arch;
+ ix86_tune_mask = 1 << ix86_tune;
if (processor_alias_table[i].flags & PTA_MMX
&& !(target_flags_explicit & MASK_MMX))
target_flags |= MASK_MMX;
@@ -2276,7 +2283,7 @@
/* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
since the insns won't need emulation. */
- if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
+ if (x86_arch_always_fancy_math_387 & ARCHMASK)
target_flags &= ~MASK_NO_FANCY_MATH_387;
/* Likewise, if the target doesn't have a 387, or we've specified
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH, i386]: Optimization of the i386 and x86_64 compilers
2007-03-02 11:01 [PATCH, i386]: Optimization of the i386 and x86_64 compilers Uros Bizjak
@ 2007-03-02 12:03 ` Jan Hubicka
2007-03-02 14:42 ` Uros Bizjak
2007-03-02 17:55 ` Meissner, Michael
2007-03-02 18:38 ` Richard Henderson
1 sibling, 2 replies; 7+ messages in thread
From: Jan Hubicka @ 2007-03-02 12:03 UTC (permalink / raw)
To: Uros Bizjak; +Cc: GCC Patches
> Hello!
>
> Attached patch implements the idea, proposed by Michael Meissner in PR
> 31019. The core of his idea is to substitute (1 << ix86_[tune|arch])
> in x86_some_var & (1 << ix86_[tune|arch])) by a precalculated global
> variable.
>
> The results of this patch are quite suprising as the text size of cc1
> on i686 host was lowered considerably:
>
> size cc1
> text data bss dec hex filename
> 7632793 18876 574516 8226185 7d8589 cc1
>
> size cc1-patched
> text data bss dec hex filename
> 6731749 18876 574516 7325141 6fc5d5 cc1-patched
>
> Yes, for 901k.
Neat :) It also imply that our insn-attrtab/insn-recog is truly ugly
piece of autogenerated code, but this is very nice win.
I wonder if we are able to fold something like
(((1 << var) & 0x2) || ((1 << var) & 0x4)) into (1<<var) & 0x6
as we can do for var & 0x2 || var & 0x4 since the former is how a lot of
code looks like.
Once I used to be able to reduce insn-attrtab size considerably via pure
attributes on the predicate functions. THis wasn't considered safe as
people might want to add something non-pure into the functions, but
perhaps now we can consider pushing some of very trivial predicates into
inlined headers to allow more compiler optimization to trigger...
>
>
> 2007-03-02 Uros Bizjak <ubizjak@gmail.com>
> Michael Meissner <michael.meissner@amd.com>
>
> * config/i386/i386.h (TUNEMASK): Redefine to use ix86_tune_mask.
> (ARCHMASK): Define.
> (TARGET_CMOVE): Use ARCHMASK.
> (TARGET_CMPXCHG): Ditto.
> (TARGET_CMPXCHG8B): Ditto.
> (TARGET_XADD): Ditto.
> (TARGET_BSWAP): Ditto.
> * config/i386/i386.c (ix86_tune_mask): New global variable.
> (ix86_arch_mask): Ditto.
> (override_options): Initialize ix86_tune_mask and
> ix86_arch_mask. Use ARCHMASK to clear MASK_NO_FANCY_MATH_387 in
> target_flags.
>
> The patch was bootstrapped on i686-pc-linux-gnu, regression tested for
> c, c++ and gfortran.
You didn't mentioned if you are waiting for approval or just going to
commit it, so just in case this is OK :)
Honza
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH, i386]: Optimization of the i386 and x86_64 compilers
2007-03-02 12:03 ` Jan Hubicka
@ 2007-03-02 14:42 ` Uros Bizjak
2007-03-02 17:55 ` Meissner, Michael
1 sibling, 0 replies; 7+ messages in thread
From: Uros Bizjak @ 2007-03-02 14:42 UTC (permalink / raw)
To: Jan Hubicka; +Cc: GCC Patches
On 3/2/07, Jan Hubicka <hubicka@ucw.cz> wrote:
> > The results of this patch are quite suprising as the text size of cc1
> > on i686 host was lowered considerably:
> Neat :) It also imply that our insn-attrtab/insn-recog is truly ugly
> piece of autogenerated code, but this is very nice win.
Well, according to Murphy - if something looks too good to be true ...
Unfortunatelly, there was a thinko in my patch, and newly introduced
masks were not updated for default case. After the fix, the gains are
less breathtaking, the text size of cc1 is lowered by ~8k. These gains
are mainly in insn-attrtab (4.7k), insn-recog (1.2k) and a couple of
hundred bytes saved here and there.
> > The patch was bootstrapped on i686-pc-linux-gnu, regression tested for
> > c, c++ and gfortran.
>
> You didn't mentioned if you are waiting for approval or just going to
> commit it, so just in case this is OK :)
I was looking where all those big differences were coming from.
Finally, I have commited fixed version of the patch, tested on
i686-pc-linux-gnu.
2007-03-02 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.c (override_options): Put initialization of
ix86_tune_mask and ix86_arch_mask to the correct place.
Uros.
Index: i386.c
===================================================================
--- i386.c (revision 122473)
+++ i386.c (working copy)
@@ -2079,10 +2079,8 @@
if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
{
ix86_arch = processor_alias_table[i].processor;
- ix86_arch_mask = 1 << ix86_arch;
/* Default cpu tuning to the architecture. */
ix86_tune = ix86_arch;
- ix86_tune_mask = 1 << ix86_tune;
if (processor_alias_table[i].flags & PTA_MMX
&& !(target_flags_explicit & MASK_MMX))
target_flags |= MASK_MMX;
@@ -2157,6 +2155,9 @@
if (i == pta_size)
error ("bad value (%s) for -mtune= switch", ix86_tune_string);
+ ix86_arch_mask = 1 << ix86_arch;
+ ix86_tune_mask = 1 << ix86_tune;
+
if (optimize_size)
ix86_cost = &size_cost;
else
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [PATCH, i386]: Optimization of the i386 and x86_64 compilers
2007-03-02 12:03 ` Jan Hubicka
2007-03-02 14:42 ` Uros Bizjak
@ 2007-03-02 17:55 ` Meissner, Michael
2007-03-03 0:46 ` Kaveh R. GHAZI
1 sibling, 1 reply; 7+ messages in thread
From: Meissner, Michael @ 2007-03-02 17:55 UTC (permalink / raw)
To: Jan Hubicka, Uros Bizjak; +Cc: GCC Patches
> -----Original Message-----
> From: gcc-patches-owner@gcc.gnu.org
[mailto:gcc-patches-owner@gcc.gnu.org]
> On Behalf Of Jan Hubicka
> Sent: Friday, March 02, 2007 7:03 AM
> To: Uros Bizjak
> Cc: GCC Patches
> Subject: Re: [PATCH, i386]: Optimization of the i386 and x86_64
compilers
>
> > Hello!
> >
> > Attached patch implements the idea, proposed by Michael Meissner in
PR
> > 31019. The core of his idea is to substitute (1 << ix86_[tune|arch])
> > in x86_some_var & (1 << ix86_[tune|arch])) by a precalculated global
> > variable.
> >
> > The results of this patch are quite suprising as the text size of
cc1
> > on i686 host was lowered considerably:
> >
> > size cc1
> > text data bss dec hex filename
> > 7632793 18876 574516 8226185 7d8589 cc1
> >
> > size cc1-patched
> > text data bss dec hex filename
> > 6731749 18876 574516 7325141 6fc5d5 cc1-patched
> >
> > Yes, for 901k.
>
> Neat :) It also imply that our insn-attrtab/insn-recog is truly ugly
> piece of autogenerated code, but this is very nice win.
>
> I wonder if we are able to fold something like
> (((1 << var) & 0x2) || ((1 << var) & 0x4)) into (1<<var) & 0x6
> as we can do for var & 0x2 || var & 0x4 since the former is how a lot
of
> code looks like.
One win I've thought about is to optimize multiple comparisons against
small constants, ie:
If (a == 5 || a == 7 || a == 11) { ... }
Into:
if (((unsigned)a) < 32 && (1 << a) & ((1 << 5) | (1 << 7) | (1
<< 11))
Then you would also want to handle switch statements where you have a
few cases that are small constants to using this also.
The trouble is you can't optimize away the test if the value is 0..31
normally, but perhaps the gen* functions can do that.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH, i386]: Optimization of the i386 and x86_64 compilers
2007-03-02 11:01 [PATCH, i386]: Optimization of the i386 and x86_64 compilers Uros Bizjak
2007-03-02 12:03 ` Jan Hubicka
@ 2007-03-02 18:38 ` Richard Henderson
2007-03-02 22:32 ` Uros Bizjak
1 sibling, 1 reply; 7+ messages in thread
From: Richard Henderson @ 2007-03-02 18:38 UTC (permalink / raw)
To: Uros Bizjak; +Cc: GCC Patches
On Fri, Mar 02, 2007 at 12:01:49PM +0100, Uros Bizjak wrote:
> +#define TUNEMASK ix86_tune_mask
> +#define ARCHMASK ix86_arch_mask
Please remove these defines and use the variables directly.
r~
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH, i386]: Optimization of the i386 and x86_64 compilers
2007-03-02 18:38 ` Richard Henderson
@ 2007-03-02 22:32 ` Uros Bizjak
0 siblings, 0 replies; 7+ messages in thread
From: Uros Bizjak @ 2007-03-02 22:32 UTC (permalink / raw)
To: Richard Henderson, GCC Patches
[-- Attachment #1: Type: text/plain, Size: 678 bytes --]
Richard Henderson wrote:
> On Fri, Mar 02, 2007 at 12:01:49PM +0100, Uros Bizjak wrote:
>
>> +#define TUNEMASK ix86_tune_mask
>> +#define ARCHMASK ix86_arch_mask
>>
>
> Please remove these defines and use the variables directly.
>
I have committed attached patch. Tested on x86_64-pc-linux-gnu.
2007-03-02 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.h (TUNEMASK): Remove define.
(ARCHMASK): Remove define.
(TARGET_*): Use ix86_tune_mask variable instead of TUNEMASK.
Use ix86_arch_mask variable instead of ARCHMASK.
* config/i386/i386.c (override_options): Ditto.
(standard_80387_constant_p): Ditto.
Uros.
[-- Attachment #2: i386-nodef.diff --]
[-- Type: text/x-patch, Size: 9110 bytes --]
Index: i386.h
===================================================================
--- i386.h (revision 122478)
+++ i386.h (working copy)
@@ -207,65 +207,65 @@
extern const int x86_partial_flag_reg_stall;
extern int x86_prefetch_sse, x86_cmpxchg16b;
-#define TUNEMASK ix86_tune_mask
-#define ARCHMASK ix86_arch_mask
-
-#define TARGET_USE_LEAVE (x86_use_leave & TUNEMASK)
-#define TARGET_PUSH_MEMORY (x86_push_memory & TUNEMASK)
-#define TARGET_ZERO_EXTEND_WITH_AND (x86_zero_extend_with_and & TUNEMASK)
-#define TARGET_USE_BIT_TEST (x86_use_bit_test & TUNEMASK)
-#define TARGET_UNROLL_STRLEN (x86_unroll_strlen & TUNEMASK)
+#define TARGET_USE_LEAVE (x86_use_leave & ix86_tune_mask)
+#define TARGET_PUSH_MEMORY (x86_push_memory & ix86_tune_mask)
+#define TARGET_ZERO_EXTEND_WITH_AND (x86_zero_extend_with_and & ix86_tune_mask)
+#define TARGET_USE_BIT_TEST (x86_use_bit_test & ix86_tune_mask)
+#define TARGET_UNROLL_STRLEN (x86_unroll_strlen & ix86_tune_mask)
/* For sane SSE instruction set generation we need fcomi instruction. It is
safe to enable all CMOVE instructions. */
-#define TARGET_CMOVE ((x86_cmove & ARCHMASK) || TARGET_SSE)
+#define TARGET_CMOVE ((x86_cmove & ix86_arch_mask) || TARGET_SSE)
#define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387)
-#define TARGET_DEEP_BRANCH_PREDICTION (x86_deep_branch & TUNEMASK)
-#define TARGET_BRANCH_PREDICTION_HINTS (x86_branch_hints & TUNEMASK)
-#define TARGET_DOUBLE_WITH_ADD (x86_double_with_add & TUNEMASK)
-#define TARGET_USE_SAHF ((x86_use_sahf & TUNEMASK) && !TARGET_64BIT)
-#define TARGET_MOVX (x86_movx & TUNEMASK)
-#define TARGET_PARTIAL_REG_STALL (x86_partial_reg_stall & TUNEMASK)
-#define TARGET_PARTIAL_FLAG_REG_STALL (x86_partial_flag_reg_stall & TUNEMASK)
-#define TARGET_USE_HIMODE_FIOP (x86_use_himode_fiop & TUNEMASK)
-#define TARGET_USE_SIMODE_FIOP (x86_use_simode_fiop & TUNEMASK)
-#define TARGET_USE_MOV0 (x86_use_mov0 & TUNEMASK)
-#define TARGET_USE_CLTD (x86_use_cltd & TUNEMASK)
-#define TARGET_USE_XCHGB (x86_use_xchgb & TUNEMASK)
-#define TARGET_SPLIT_LONG_MOVES (x86_split_long_moves & TUNEMASK)
-#define TARGET_READ_MODIFY_WRITE (x86_read_modify_write & TUNEMASK)
-#define TARGET_READ_MODIFY (x86_read_modify & TUNEMASK)
-#define TARGET_PROMOTE_QImode (x86_promote_QImode & TUNEMASK)
-#define TARGET_FAST_PREFIX (x86_fast_prefix & TUNEMASK)
-#define TARGET_SINGLE_STRINGOP (x86_single_stringop & TUNEMASK)
-#define TARGET_QIMODE_MATH (x86_qimode_math & TUNEMASK)
-#define TARGET_HIMODE_MATH (x86_himode_math & TUNEMASK)
-#define TARGET_PROMOTE_QI_REGS (x86_promote_qi_regs & TUNEMASK)
-#define TARGET_PROMOTE_HI_REGS (x86_promote_hi_regs & TUNEMASK)
-#define TARGET_ADD_ESP_4 (x86_add_esp_4 & TUNEMASK)
-#define TARGET_ADD_ESP_8 (x86_add_esp_8 & TUNEMASK)
-#define TARGET_SUB_ESP_4 (x86_sub_esp_4 & TUNEMASK)
-#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & TUNEMASK)
-#define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & TUNEMASK)
-#define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & TUNEMASK)
-#define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
- (x86_sse_partial_reg_dependency & TUNEMASK)
-#define TARGET_SSE_UNALIGNED_MOVE_OPTIMAL \
- (x86_sse_unaligned_move_optimal & TUNEMASK)
-#define TARGET_SSE_SPLIT_REGS (x86_sse_split_regs & TUNEMASK)
-#define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & TUNEMASK)
-#define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & TUNEMASK)
-#define TARGET_MEMORY_MISMATCH_STALL (x86_memory_mismatch_stall & TUNEMASK)
-#define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & TUNEMASK)
-#define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & TUNEMASK)
+#define TARGET_DEEP_BRANCH_PREDICTION (x86_deep_branch & ix86_tune_mask)
+#define TARGET_BRANCH_PREDICTION_HINTS (x86_branch_hints & ix86_tune_mask)
+#define TARGET_DOUBLE_WITH_ADD (x86_double_with_add & ix86_tune_mask)
+#define TARGET_USE_SAHF ((x86_use_sahf & ix86_tune_mask) && !TARGET_64BIT)
+#define TARGET_MOVX (x86_movx & ix86_tune_mask)
+#define TARGET_PARTIAL_REG_STALL (x86_partial_reg_stall & ix86_tune_mask)
+#define TARGET_PARTIAL_FLAG_REG_STALL \
+ (x86_partial_flag_reg_stall & ix86_tune_mask)
+#define TARGET_USE_HIMODE_FIOP (x86_use_himode_fiop & ix86_tune_mask)
+#define TARGET_USE_SIMODE_FIOP (x86_use_simode_fiop & ix86_tune_mask)
+#define TARGET_USE_MOV0 (x86_use_mov0 & ix86_tune_mask)
+#define TARGET_USE_CLTD (x86_use_cltd & ix86_tune_mask)
+#define TARGET_USE_XCHGB (x86_use_xchgb & ix86_tune_mask)
+#define TARGET_SPLIT_LONG_MOVES (x86_split_long_moves & ix86_tune_mask)
+#define TARGET_READ_MODIFY_WRITE (x86_read_modify_write & ix86_tune_mask)
+#define TARGET_READ_MODIFY (x86_read_modify & ix86_tune_mask)
+#define TARGET_PROMOTE_QImode (x86_promote_QImode & ix86_tune_mask)
+#define TARGET_FAST_PREFIX (x86_fast_prefix & ix86_tune_mask)
+#define TARGET_SINGLE_STRINGOP (x86_single_stringop & ix86_tune_mask)
+#define TARGET_QIMODE_MATH (x86_qimode_math & ix86_tune_mask)
+#define TARGET_HIMODE_MATH (x86_himode_math & ix86_tune_mask)
+#define TARGET_PROMOTE_QI_REGS (x86_promote_qi_regs & ix86_tune_mask)
+#define TARGET_PROMOTE_HI_REGS (x86_promote_hi_regs & ix86_tune_mask)
+#define TARGET_ADD_ESP_4 (x86_add_esp_4 & ix86_tune_mask)
+#define TARGET_ADD_ESP_8 (x86_add_esp_8 & ix86_tune_mask)
+#define TARGET_SUB_ESP_4 (x86_sub_esp_4 & ix86_tune_mask)
+#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & ix86_tune_mask)
+#define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & ix86_tune_mask)
+#define TARGET_PARTIAL_REG_DEPENDENCY \
+ (x86_partial_reg_dependency & ix86_tune_mask)
+#define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
+ (x86_sse_partial_reg_dependency & ix86_tune_mask)
+#define TARGET_SSE_UNALIGNED_MOVE_OPTIMAL \
+ (x86_sse_unaligned_move_optimal & ix86_tune_mask)
+#define TARGET_SSE_SPLIT_REGS (x86_sse_split_regs & ix86_tune_mask)
+#define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & ix86_tune_mask)
+#define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & ix86_tune_mask)
+#define TARGET_MEMORY_MISMATCH_STALL \
+ (x86_memory_mismatch_stall & ix86_tune_mask)
+#define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & ix86_tune_mask)
+#define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & ix86_tune_mask)
#define TARGET_PREFETCH_SSE (x86_prefetch_sse)
-#define TARGET_SHIFT1 (x86_shift1 & TUNEMASK)
-#define TARGET_USE_FFREEP (x86_use_ffreep & TUNEMASK)
-#define TARGET_INTER_UNIT_MOVES (x86_inter_unit_moves & TUNEMASK)
-#define TARGET_FOUR_JUMP_LIMIT (x86_four_jump_limit & TUNEMASK)
-#define TARGET_SCHEDULE (x86_schedule & TUNEMASK)
-#define TARGET_USE_BT (x86_use_bt & TUNEMASK)
-#define TARGET_USE_INCDEC (x86_use_incdec & TUNEMASK)
-#define TARGET_PAD_RETURNS (x86_pad_returns & TUNEMASK)
+#define TARGET_SHIFT1 (x86_shift1 & ix86_tune_mask)
+#define TARGET_USE_FFREEP (x86_use_ffreep & ix86_tune_mask)
+#define TARGET_INTER_UNIT_MOVES (x86_inter_unit_moves & ix86_tune_mask)
+#define TARGET_FOUR_JUMP_LIMIT (x86_four_jump_limit & ix86_tune_mask)
+#define TARGET_SCHEDULE (x86_schedule & ix86_tune_mask)
+#define TARGET_USE_BT (x86_use_bt & ix86_tune_mask)
+#define TARGET_USE_INCDEC (x86_use_incdec & ix86_tune_mask)
+#define TARGET_PAD_RETURNS (x86_pad_returns & ix86_tune_mask)
#define ASSEMBLER_DIALECT (ix86_asm_dialect)
@@ -278,11 +278,11 @@
#define TARGET_ANY_GNU_TLS (TARGET_GNU_TLS || TARGET_GNU2_TLS)
#define TARGET_SUN_TLS (ix86_tls_dialect == TLS_DIALECT_SUN)
-#define TARGET_CMPXCHG (x86_cmpxchg & ARCHMASK)
-#define TARGET_CMPXCHG8B (x86_cmpxchg8b & ARCHMASK)
+#define TARGET_CMPXCHG (x86_cmpxchg & ix86_arch_mask)
+#define TARGET_CMPXCHG8B (x86_cmpxchg8b & ix86_arch_mask)
#define TARGET_CMPXCHG16B (x86_cmpxchg16b)
-#define TARGET_XADD (x86_xadd & ARCHMASK)
-#define TARGET_BSWAP (x86_bswap & ARCHMASK)
+#define TARGET_XADD (x86_xadd & ix86_arch_mask)
+#define TARGET_BSWAP (x86_bswap & ix86_arch_mask)
#ifndef TARGET_64BIT_DEFAULT
#define TARGET_64BIT_DEFAULT 0
Index: i386.c
===================================================================
--- i386.c (revision 122478)
+++ i386.c (working copy)
@@ -2284,7 +2284,7 @@
/* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
since the insns won't need emulation. */
- if (x86_arch_always_fancy_math_387 & ARCHMASK)
+ if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
target_flags &= ~MASK_NO_FANCY_MATH_387;
/* Likewise, if the target doesn't have a 387, or we've specified
@@ -2405,7 +2405,7 @@
if (!TARGET_80387)
target_flags &= ~MASK_FLOAT_RETURNS;
- if ((x86_accumulate_outgoing_args & TUNEMASK)
+ if ((x86_accumulate_outgoing_args & ix86_tune_mask)
&& !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
&& !optimize_size)
target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
@@ -4999,7 +4999,7 @@
/* For XFmode constants, try to find a special 80387 instruction when
optimizing for size or on those CPUs that benefit from them. */
if (GET_MODE (x) == XFmode
- && (optimize_size || x86_ext_80387_constants & TUNEMASK))
+ && (optimize_size || x86_ext_80387_constants & ix86_tune_mask))
{
int i;
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [PATCH, i386]: Optimization of the i386 and x86_64 compilers
2007-03-02 17:55 ` Meissner, Michael
@ 2007-03-03 0:46 ` Kaveh R. GHAZI
0 siblings, 0 replies; 7+ messages in thread
From: Kaveh R. GHAZI @ 2007-03-03 0:46 UTC (permalink / raw)
To: Meissner, Michael; +Cc: Jan Hubicka, Uros Bizjak, GCC Patches
On Fri, 2 Mar 2007, Meissner, Michael wrote:
> One win I've thought about is to optimize multiple comparisons against
> small constants, ie:
>
> If (a == 5 || a == 7 || a == 11) { ... }
>
> Into:
>
> if (((unsigned)a) < 32 && (1 << a) & ((1 << 5) | (1 << 7) | (1
> << 11))
>
> Then you would also want to handle switch statements where you have a
> few cases that are small constants to using this also.
Switch statements were already done by Roger (four years ago):
http://gcc.gnu.org/ml/gcc-patches/2003-01/msg01950.html
But AFAICT, if statements are not yet so optimized. I think that would be
a welcome addition.
Thanks,
--Kaveh
--
Kaveh R. Ghazi ghazi@caip.rutgers.edu
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2007-03-03 0:46 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-03-02 11:01 [PATCH, i386]: Optimization of the i386 and x86_64 compilers Uros Bizjak
2007-03-02 12:03 ` Jan Hubicka
2007-03-02 14:42 ` Uros Bizjak
2007-03-02 17:55 ` Meissner, Michael
2007-03-03 0:46 ` Kaveh R. GHAZI
2007-03-02 18:38 ` Richard Henderson
2007-03-02 22:32 ` Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).