* patch to fix PR60969
@ 2014-05-16 17:49 Vladimir Makarov
2014-05-19 21:37 ` James Greenhalgh
0 siblings, 1 reply; 6+ messages in thread
From: Vladimir Makarov @ 2014-05-16 17:49 UTC (permalink / raw)
To: GCC Patches
[-- Attachment #1: Type: text/plain, Size: 536 bytes --]
The following patch fixes
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60969
The patch was bootstrapped and tested on x86/x86-64.
Committed as rev. 210519 to gcc 4.9 branch and as rev. 210520 to trunk.
2014-05-16 Vladimir Makarov <vmakarov@redhat.com>
PR rtl-optimization/60969
* ira-costs.c (record_reg_classes): Allow only memory for pseudo.
Calculate costs for this case.
2014-05-16 Vladimir Makarov <vmakarov@redhat.com>
PR rtl-optimization/60969
* g++.dg/pr60969.C: New.
[-- Attachment #2: pr60969.patch --]
[-- Type: text/plain, Size: 6173 bytes --]
Index: ira-costs.c
===================================================================
--- ira-costs.c (revision 210069)
+++ ira-costs.c (working copy)
@@ -762,10 +762,11 @@ record_reg_classes (int n_alts, int n_op
into that class. */
if (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER)
{
- if (classes[i] == NO_REGS)
+ if (classes[i] == NO_REGS && ! allows_mem[i])
{
/* We must always fail if the operand is a REG, but
- we did not find a suitable class.
+ we did not find a suitable class and memory is
+ not allowed.
Otherwise we may perform an uninitialized read
from this_op_costs after the `continue' statement
@@ -783,50 +784,90 @@ record_reg_classes (int n_alts, int n_op
bool out_p = recog_data.operand_type[i] != OP_IN;
enum reg_class op_class = classes[i];
move_table *move_in_cost, *move_out_cost;
+ short (*mem_cost)[2];
ira_init_register_move_cost_if_necessary (mode);
if (! in_p)
{
ira_assert (out_p);
- move_out_cost = ira_may_move_out_cost[mode];
- for (k = cost_classes_ptr->num - 1; k >= 0; k--)
+ if (op_class == NO_REGS)
{
- rclass = cost_classes[k];
- pp_costs[k]
- = move_out_cost[op_class][rclass] * frequency;
+ mem_cost = ira_memory_move_cost[mode];
+ for (k = cost_classes_ptr->num - 1; k >= 0; k--)
+ {
+ rclass = cost_classes[k];
+ pp_costs[k] = mem_cost[rclass][0] * frequency;
+ }
+ }
+ else
+ {
+ move_out_cost = ira_may_move_out_cost[mode];
+ for (k = cost_classes_ptr->num - 1; k >= 0; k--)
+ {
+ rclass = cost_classes[k];
+ pp_costs[k]
+ = move_out_cost[op_class][rclass] * frequency;
+ }
}
}
else if (! out_p)
{
ira_assert (in_p);
- move_in_cost = ira_may_move_in_cost[mode];
- for (k = cost_classes_ptr->num - 1; k >= 0; k--)
+ if (op_class == NO_REGS)
{
- rclass = cost_classes[k];
- pp_costs[k]
- = move_in_cost[rclass][op_class] * frequency;
+ mem_cost = ira_memory_move_cost[mode];
+ for (k = cost_classes_ptr->num - 1; k >= 0; k--)
+ {
+ rclass = cost_classes[k];
+ pp_costs[k] = mem_cost[rclass][1] * frequency;
+ }
+ }
+ else
+ {
+ move_in_cost = ira_may_move_in_cost[mode];
+ for (k = cost_classes_ptr->num - 1; k >= 0; k--)
+ {
+ rclass = cost_classes[k];
+ pp_costs[k]
+ = move_in_cost[rclass][op_class] * frequency;
+ }
}
}
else
{
- move_in_cost = ira_may_move_in_cost[mode];
- move_out_cost = ira_may_move_out_cost[mode];
- for (k = cost_classes_ptr->num - 1; k >= 0; k--)
- {
- rclass = cost_classes[k];
- pp_costs[k] = ((move_in_cost[rclass][op_class]
- + move_out_cost[op_class][rclass])
- * frequency);
+ if (op_class == NO_REGS)
+ {
+ mem_cost = ira_memory_move_cost[mode];
+ for (k = cost_classes_ptr->num - 1; k >= 0; k--)
+ {
+ rclass = cost_classes[k];
+ pp_costs[k] = ((mem_cost[rclass][0]
+ + mem_cost[rclass][1])
+ * frequency);
+ }
+ }
+ else
+ {
+ move_in_cost = ira_may_move_in_cost[mode];
+ move_out_cost = ira_may_move_out_cost[mode];
+ for (k = cost_classes_ptr->num - 1; k >= 0; k--)
+ {
+ rclass = cost_classes[k];
+ pp_costs[k] = ((move_in_cost[rclass][op_class]
+ + move_out_cost[op_class][rclass])
+ * frequency);
+ }
}
}
/* If the alternative actually allows memory, make
things a bit cheaper since we won't need an extra
insn to load it. */
- pp->mem_cost
- = ((out_p ? ira_memory_move_cost[mode][op_class][0] : 0)
- + (in_p ? ira_memory_move_cost[mode][op_class][1] : 0)
- - allows_mem[i]) * frequency;
+ if (op_class != NO_REGS)
+ pp->mem_cost
+ = ((out_p ? ira_memory_move_cost[mode][op_class][0] : 0)
+ + (in_p ? ira_memory_move_cost[mode][op_class][1] : 0)
+ - allows_mem[i]) * frequency;
/* If we have assigned a class to this allocno in
our first pass, add a cost to this alternative
corresponding to what we would add if this
@@ -836,15 +877,28 @@ record_reg_classes (int n_alts, int n_op
enum reg_class pref_class = pref[COST_INDEX (REGNO (op))];
if (pref_class == NO_REGS)
+ {
+ if (op_class != NO_REGS)
+ alt_cost
+ += ((out_p
+ ? ira_memory_move_cost[mode][op_class][0]
+ : 0)
+ + (in_p
+ ? ira_memory_move_cost[mode][op_class][1]
+ : 0));
+ }
+ else if (op_class == NO_REGS)
alt_cost
+= ((out_p
- ? ira_memory_move_cost[mode][op_class][0] : 0)
+ ? ira_memory_move_cost[mode][pref_class][1]
+ : 0)
+ (in_p
- ? ira_memory_move_cost[mode][op_class][1]
+ ? ira_memory_move_cost[mode][pref_class][0]
: 0));
else if (ira_reg_class_intersect[pref_class][op_class]
== NO_REGS)
- alt_cost += ira_register_move_cost[mode][pref_class][op_class];
+ alt_cost += (ira_register_move_cost
+ [mode][pref_class][op_class]);
}
}
}
Index: testsuite/g++.dg/pr60969.C
===================================================================
--- testsuite/g++.dg/pr60969.C (revision 0)
+++ testsuite/g++.dg/pr60969.C (working copy)
@@ -0,0 +1,30 @@
+/* { dg-do compile { target i?86-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -march=pentium4" } */
+
+struct A
+{
+ float f, g, h, k;
+ A () {}
+ A (float v0, float x, float y) : f(v0), g(x), h(y), k(0.0f) {}
+ A bar (A &a, float t) { return A (f + a.f * t, g + a.g * t, h + a.h * t); }
+};
+
+A
+baz (A &x, A &y, float t)
+{
+ return x.bar (y, t);
+}
+
+A *
+foo (A &s, A &t, A &u, A &v, int y, int z)
+{
+ A *x = new A[y * z];
+ for (int i = 0; i < 7; i++)
+ {
+ A s = baz (s, u, i / (float) z);
+ A t = baz (t, v, i / (float) z);
+ for (int j = 0; j < 7; j++)
+ x[i * y + j] = baz (s, t, j / (float) y);
+ }
+ return x;
+}
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: patch to fix PR60969
2014-05-16 17:49 patch to fix PR60969 Vladimir Makarov
@ 2014-05-19 21:37 ` James Greenhalgh
2014-05-20 0:25 ` H.J. Lu
2014-05-20 14:37 ` Vladimir Makarov
0 siblings, 2 replies; 6+ messages in thread
From: James Greenhalgh @ 2014-05-19 21:37 UTC (permalink / raw)
To: Vladimir Makarov; +Cc: GCC Patches
On Fri, May 16, 2014 at 06:49:45PM +0100, Vladimir Makarov wrote:
> The following patch fixes
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60969
>
> The patch was bootstrapped and tested on x86/x86-64.
>
> Committed as rev. 210519 to gcc 4.9 branch and as rev. 210520 to trunk.
>
> 2014-05-16 Vladimir Makarov <vmakarov@redhat.com>
>
> PR rtl-optimization/60969
> * ira-costs.c (record_reg_classes): Allow only memory for pseudo.
> Calculate costs for this case.
>
> 2014-05-16 Vladimir Makarov <vmakarov@redhat.com>
>
> PR rtl-optimization/60969
> * g++.dg/pr60969.C: New.
This seems to have cause gcc.target/aarch64/vect-abs-compile.c to begin
failing on aarch64-none-elf:
FAIL: gcc.target/aarch64/table-intrinsics.c (internal compiler error)
FAIL: gcc.target/aarch64/table-intrinsics.c (test for excess errors)
Excess errors:
/work/gcc-clean/src/gcc/gcc/testsuite/gcc.target/aarch64/table-intrinsics.c:172:1: internal compiler error: Max. number of generated reload insns per insn is achieved (90)
0x8923cd lra_constraints(bool)
/work/gcc-clean/src/gcc/gcc/lra-constraints.c:4140
0x882f62 lra(_IO_FILE*)
/work/gcc-clean/src/gcc/gcc/lra.c:2353
0x8453f6 do_reload
/work/gcc-clean/src/gcc/gcc/ira.c:5457
0x8453f6 execute
/work/gcc-clean/src/gcc/gcc/ira.c:5618
Thanks,
James
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: patch to fix PR60969
2014-05-19 21:37 ` James Greenhalgh
@ 2014-05-20 0:25 ` H.J. Lu
2014-05-20 14:37 ` Vladimir Makarov
1 sibling, 0 replies; 6+ messages in thread
From: H.J. Lu @ 2014-05-20 0:25 UTC (permalink / raw)
To: James Greenhalgh; +Cc: Vladimir Makarov, GCC Patches
On Mon, May 19, 2014 at 2:37 PM, James Greenhalgh
<james.greenhalgh@arm.com> wrote:
> On Fri, May 16, 2014 at 06:49:45PM +0100, Vladimir Makarov wrote:
>> The following patch fixes
>>
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60969
>>
>> The patch was bootstrapped and tested on x86/x86-64.
>>
>> Committed as rev. 210519 to gcc 4.9 branch and as rev. 210520 to trunk.
>>
>> 2014-05-16 Vladimir Makarov <vmakarov@redhat.com>
>>
>> PR rtl-optimization/60969
>> * ira-costs.c (record_reg_classes): Allow only memory for pseudo.
>> Calculate costs for this case.
>>
>> 2014-05-16 Vladimir Makarov <vmakarov@redhat.com>
>>
>> PR rtl-optimization/60969
>> * g++.dg/pr60969.C: New.
>
> This seems to have cause gcc.target/aarch64/vect-abs-compile.c to begin
> failing on aarch64-none-elf:
>
> FAIL: gcc.target/aarch64/table-intrinsics.c (internal compiler error)
> FAIL: gcc.target/aarch64/table-intrinsics.c (test for excess errors)
> Excess errors:
> /work/gcc-clean/src/gcc/gcc/testsuite/gcc.target/aarch64/table-intrinsics.c:172:1: internal compiler error: Max. number of generated reload insns per insn is achieved (90)
> 0x8923cd lra_constraints(bool)
> /work/gcc-clean/src/gcc/gcc/lra-constraints.c:4140
> 0x882f62 lra(_IO_FILE*)
> /work/gcc-clean/src/gcc/gcc/lra.c:2353
> 0x8453f6 do_reload
> /work/gcc-clean/src/gcc/gcc/ira.c:5457
> 0x8453f6 execute
> /work/gcc-clean/src/gcc/gcc/ira.c:5618
>
I think x86 backend should disable 3DNOW mode if
3DNOW isn't enabled. Allowing SFmode with MMX
doesn't buy us anything, but trouble.
--
H.J.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: patch to fix PR60969
2014-05-19 21:37 ` James Greenhalgh
2014-05-20 0:25 ` H.J. Lu
@ 2014-05-20 14:37 ` Vladimir Makarov
2014-05-22 9:18 ` Ramana Radhakrishnan
1 sibling, 1 reply; 6+ messages in thread
From: Vladimir Makarov @ 2014-05-20 14:37 UTC (permalink / raw)
To: James Greenhalgh; +Cc: GCC Patches
On 05/19/2014 05:37 PM, James Greenhalgh wrote:
> On Fri, May 16, 2014 at 06:49:45PM +0100, Vladimir Makarov wrote:
>> The following patch fixes
>>
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60969
>>
>> The patch was bootstrapped and tested on x86/x86-64.
>>
>> Committed as rev. 210519 to gcc 4.9 branch and as rev. 210520 to trunk.
>>
>> 2014-05-16 Vladimir Makarov <vmakarov@redhat.com>
>>
>> PR rtl-optimization/60969
>> * ira-costs.c (record_reg_classes): Allow only memory for pseudo.
>> Calculate costs for this case.
>>
>> 2014-05-16 Vladimir Makarov <vmakarov@redhat.com>
>>
>> PR rtl-optimization/60969
>> * g++.dg/pr60969.C: New.
> This seems to have cause gcc.target/aarch64/vect-abs-compile.c to begin
> failing on aarch64-none-elf:
>
> FAIL: gcc.target/aarch64/table-intrinsics.c (internal compiler error)
> FAIL: gcc.target/aarch64/table-intrinsics.c (test for excess errors)
> Excess errors:
> /work/gcc-clean/src/gcc/gcc/testsuite/gcc.target/aarch64/table-intrinsics.c:172:1: internal compiler error: Max. number of generated reload insns per insn is achieved (90)
> 0x8923cd lra_constraints(bool)
> /work/gcc-clean/src/gcc/gcc/lra-constraints.c:4140
> 0x882f62 lra(_IO_FILE*)
> /work/gcc-clean/src/gcc/gcc/lra.c:2353
> 0x8453f6 do_reload
> /work/gcc-clean/src/gcc/gcc/ira.c:5457
> 0x8453f6 execute
> /work/gcc-clean/src/gcc/gcc/ira.c:5618
>
>
Sorry, I have no aarch64 machine. Could you sent me the pre-processed
file of the test.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: patch to fix PR60969
2014-05-20 14:37 ` Vladimir Makarov
@ 2014-05-22 9:18 ` Ramana Radhakrishnan
0 siblings, 0 replies; 6+ messages in thread
From: Ramana Radhakrishnan @ 2014-05-22 9:18 UTC (permalink / raw)
To: Vladimir Makarov; +Cc: James Greenhalgh, GCC Patches
>>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60969
>> FAIL: gcc.target/aarch64/table-intrinsics.c (internal compiler error)
>> FAIL: gcc.target/aarch64/table-intrinsics.c (test for excess errors)
>> Excess errors:
>> /work/gcc-clean/src/gcc/gcc/testsuite/gcc.target/aarch64/table-intrinsics.c:172:1: internal compiler error: Max. number of generated reload insns per insn is achieved (90)
>> 0x8923cd lra_constraints(bool)
>> /work/gcc-clean/src/gcc/gcc/lra-constraints.c:4140
>> 0x882f62 lra(_IO_FILE*)
>> /work/gcc-clean/src/gcc/gcc/lra.c:2353
>> 0x8453f6 do_reload
>> /work/gcc-clean/src/gcc/gcc/ira.c:5457
>> 0x8453f6 execute
>> /work/gcc-clean/src/gcc/gcc/ira.c:5618
>>
>>
> Sorry, I have no aarch64 machine. Could you sent me the pre-processed
> file of the test.
Please find inline a reduced testcase that fails.
Compiler configured with
$SRC/gcc/configure --target=aarch64-none-elf
$>./xgcc -B`pwd` -S -O2 try.c
try.c: In function 'qtbl_tests8_2':
try.c:26:1: internal compiler error: Max. number of generated reload
insns per insn is achieved (90)
}
^
0x8653f7 lra_constraints(bool)
/work/wa1/src/gcc/gcc/lra-constraints.c:4140
0x855ca6 lra(_IO_FILE*)
/work/wa1/src/gcc/gcc/lra.c:2353
0x81eada do_reload
/work/wa1/src/gcc/gcc/ira.c:5457
0x81eada execute
/work/wa1/src/gcc/gcc/ira.c:5618
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See <http://gcc.gnu.org/bugs.html> for instructions.
compilation status=1
$>cat try.c
typedef __builtin_aarch64_simd_qi int8x8_t
__attribute__ ((__vector_size__ (8)));
typedef __builtin_aarch64_simd_uqi uint8x8_t
__attribute__ ((__vector_size__ (8)));
typedef __builtin_aarch64_simd_qi int8x16_t
__attribute__ ((__vector_size__ (16)));
typedef struct int8x16x2_t
{
int8x16_t val[2];
} int8x16x2_t;
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
{
int8x8_t result;
__asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
:"=w"(result)
:"Q"(tab),"w"(idx)
:"memory", "v16", "v17");
return result;
}
int8x8_t
qtbl_tests8_2 (int8x16x2_t tab, uint8x8_t idx)
{
return vqtbl2_s8 (tab, idx);
}
>
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: patch to fix PR60969
@ 2014-05-17 11:00 Uros Bizjak
0 siblings, 0 replies; 6+ messages in thread
From: Uros Bizjak @ 2014-05-17 11:00 UTC (permalink / raw)
To: gcc-patches; +Cc: Vladimir Makarov, H.J. Lu
[-- Attachment #1: Type: text/plain, Size: 399 bytes --]
Hello!
Attached patch enhances the testcase to also check for presence of MMX
registers on all 32bit x86 targets.
2014-05-17 Uros Bizjak <ubizjak@gmail.com>
* g++.dg/pr60969.C: Compile for all ilp32 x86 targets.
(dg-options): Add -mfpmath=387.
(dg-final): Check that no MMX registers are used.
Tested on x86-64-pc-linux-gnu {,-m32} and committed to mainline and 4.9 branch.
Uros.
[-- Attachment #2: t.diff.txt --]
[-- Type: text/plain, Size: 522 bytes --]
Index: pr60969.C
===================================================================
--- pr60969.C (revision 210549)
+++ pr60969.C (working copy)
@@ -1,5 +1,5 @@
-/* { dg-do compile { target i?86-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -march=pentium4" } */
+/* { dg-do compile { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+/* { dg-options "-O2 -ftree-vectorize -march=pentium4 -mfpmath=387" } */
struct A
{
@@ -28,3 +28,5 @@
}
return x;
}
+
+/* { dg-final { scan-assembler-not "%mm" } } */
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2014-05-22 9:18 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-05-16 17:49 patch to fix PR60969 Vladimir Makarov
2014-05-19 21:37 ` James Greenhalgh
2014-05-20 0:25 ` H.J. Lu
2014-05-20 14:37 ` Vladimir Makarov
2014-05-22 9:18 ` Ramana Radhakrishnan
2014-05-17 11:00 Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).