public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Fix Thumb-2 NEON ICE
@ 2009-10-09  0:19 Joseph S. Myers
  2009-10-16 16:39 ` Ping " Joseph S. Myers
  2009-10-20 11:11 ` Mikael Pettersson
  0 siblings, 2 replies; 7+ messages in thread
From: Joseph S. Myers @ 2009-10-09  0:19 UTC (permalink / raw)
  To: gcc-patches

This patch fixes an ICE for Thumb-2 NEON on the included testcase.

output_move_neon has operands[0]

  (mem:V16QI (plus:SI (reg:SI 3 r3) (const_int -256)))

and operands[1]

  (reg:V16QI 95 d16)

and calls adjust_address (mem, SImode, 8 * i).  This ends up
validating the address for SImode, but the offset of -256 is not valid
for SImode for Thumb-2.  The new address is only used with vldr/vstr
instructions; there seems to be no need for it to be valid for SImode,
so this patch makes it use DImode instead, for which all valid NEON
offsets are accepted.

Tested with no regressions with cross to arm-none-eabi.  OK to commit?

2009-10-08  Joseph Myers  <joseph@codesourcery.com>

	* config/arm/arm.c (output_move_neon): Use DImode in call to
	adjust_address.

testsuite:
2009-10-08  Joseph Myers  <joseph@codesourcery.com>

	* gcc.target/arm/neon-thumb2-move.c: New test.

Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c	(revision 152576)
+++ gcc/config/arm/arm.c	(working copy)
@@ -12269,7 +12269,7 @@ output_move_neon (rtx *operands)
 	  {
 	    /* We're only using DImode here because it's a convenient size.  */
 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
-	    ops[1] = adjust_address (mem, SImode, 8 * i);
+	    ops[1] = adjust_address (mem, DImode, 8 * i);
 	    if (reg_overlap_mentioned_p (ops[0], mem))
 	      {
 		gcc_assert (overlap == -1);
Index: gcc/testsuite/gcc.target/arm/neon-thumb2-move.c
===================================================================
--- gcc/testsuite/gcc.target/arm/neon-thumb2-move.c	(revision 0)
+++ gcc/testsuite/gcc.target/arm/neon-thumb2-move.c	(revision 0)
@@ -0,0 +1,98 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon" } */
+
+#include <arm_neon.h>
+#include <stddef.h>
+
+void *
+memset (DST, C, LENGTH)
+     void *DST;
+     int C;
+     size_t LENGTH;
+{
+  void* DST0 = DST;
+  unsigned char C_BYTE = C;
+
+
+  if (__builtin_expect(LENGTH < 4, 1)) {
+    size_t i = 0;
+    while (i < LENGTH) {
+      ((char*)DST)[i] = C_BYTE;
+      i++;
+    }
+    return DST;
+  }
+
+  const char* DST_end = (char*)DST + LENGTH;
+
+
+  while ((uintptr_t)DST % 4 != 0) {
+    *(char*) (DST++) = C_BYTE;
+  }
+
+
+  uint32_t C_SHORTWORD = (uint32_t)(unsigned char)(C_BYTE) * 0x01010101;
+
+
+  if (__builtin_expect(DST_end - (char*)DST >= 16, 0)) {
+    while ((uintptr_t)DST % 16 != 0) {
+      *((uint32_t*)((char*)(DST) + (0))) = C_SHORTWORD;
+      DST += 4;
+    }
+
+
+    uint8x16_t C_WORD = vdupq_n_u8(C_BYTE);
+
+
+
+
+
+    size_t i = 0;
+    LENGTH = DST_end - (char*)DST;
+    while (i + 16 * 16 <= LENGTH) {
+      *((uint8x16_t*)((char*)(DST) + (i))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 1))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 2))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 3))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 4))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 5))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 6))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 7))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 8))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 9))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 10))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 11))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 12))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 13))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 14))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 15))) = C_WORD;
+      i += 16 * 16;
+    }
+    while (i + 16 * 4 <= LENGTH) {
+      *((uint8x16_t*)((char*)(DST) + (i))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 1))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 2))) = C_WORD;
+      *((uint8x16_t*)((char*)(DST) + (i + 16 * 3))) = C_WORD;
+      i += 16 * 4;
+    }
+    while (i + 16 <= LENGTH) {
+      *((uint8x16_t*)((char*)(DST) + (i))) = C_WORD;
+      i += 16;
+    }
+    DST += i;
+  }
+
+  while (4 <= DST_end - (char*)DST) {
+    *((uint32_t*)((char*)(DST) + (0))) = C_SHORTWORD;
+    DST += 4;
+  }
+
+
+  while ((char*)DST < DST_end) {
+    *((char*)DST) = C_BYTE;
+    DST++;
+  }
+
+  return DST0;
+}

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Ping Re: Fix Thumb-2 NEON ICE
  2009-10-09  0:19 Fix Thumb-2 NEON ICE Joseph S. Myers
@ 2009-10-16 16:39 ` Joseph S. Myers
       [not found]   ` <1255964118.19735.66.camel@e200601-lin.cambridge.arm.com>
  2009-10-20 11:11 ` Mikael Pettersson
  1 sibling, 1 reply; 7+ messages in thread
From: Joseph S. Myers @ 2009-10-16 16:39 UTC (permalink / raw)
  To: gcc-patches

Ping.  This patch 
<http://gcc.gnu.org/ml/gcc-patches/2009-10/msg00566.html> is pending 
review.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Ping Re: Fix Thumb-2 NEON ICE
       [not found]   ` <1255964118.19735.66.camel@e200601-lin.cambridge.arm.com>
@ 2009-10-19 15:10     ` Joseph S. Myers
  2009-10-19 15:24       ` Richard Earnshaw
  0 siblings, 1 reply; 7+ messages in thread
From: Joseph S. Myers @ 2009-10-19 15:10 UTC (permalink / raw)
  To: Richard Earnshaw; +Cc: gcc-patches

On Mon, 19 Oct 2009, Richard Earnshaw wrote:

> On Fri, 2009-10-16 at 15:55 +0000, Joseph S. Myers wrote:
> > Ping.  This patch 
> > <http://gcc.gnu.org/ml/gcc-patches/2009-10/msg00566.html> is pending 
> > review.
> > 
> 
> This is probably OK, but why aren't you just using the natural mode for
> the mem in adjust_address (ie V16QImode)?

Since the REG generated is DImode, and this is splitting into 8-byte 
moves, an 8-byte mode such as DImode (matching that of the REG) seems 
natural to me.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Ping Re: Fix Thumb-2 NEON ICE
  2009-10-19 15:10     ` Joseph S. Myers
@ 2009-10-19 15:24       ` Richard Earnshaw
  0 siblings, 0 replies; 7+ messages in thread
From: Richard Earnshaw @ 2009-10-19 15:24 UTC (permalink / raw)
  To: Joseph S. Myers; +Cc: gcc-patches


On Mon, 2009-10-19 at 15:09 +0000, Joseph S. Myers wrote:
> On Mon, 19 Oct 2009, Richard Earnshaw wrote:
> 
> > On Fri, 2009-10-16 at 15:55 +0000, Joseph S. Myers wrote:
> > > Ping.  This patch 
> > > <http://gcc.gnu.org/ml/gcc-patches/2009-10/msg00566.html> is pending 
> > > review.
> > > 
> > 
> > This is probably OK, but why aren't you just using the natural mode for
> > the mem in adjust_address (ie V16QImode)?
> 
> Since the REG generated is DImode, and this is splitting into 8-byte 
> moves, an 8-byte mode such as DImode (matching that of the REG) seems 
> natural to me.


OK.

R.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Fix Thumb-2 NEON ICE
  2009-10-09  0:19 Fix Thumb-2 NEON ICE Joseph S. Myers
  2009-10-16 16:39 ` Ping " Joseph S. Myers
@ 2009-10-20 11:11 ` Mikael Pettersson
  2009-10-20 14:48   ` Joseph S. Myers
  1 sibling, 1 reply; 7+ messages in thread
From: Mikael Pettersson @ 2009-10-20 11:11 UTC (permalink / raw)
  To: Joseph S. Myers; +Cc: gcc-patches

Joseph S. Myers writes:
 > This patch fixes an ICE for Thumb-2 NEON on the included testcase.
 > 
 > output_move_neon has operands[0]
 > 
 >   (mem:V16QI (plus:SI (reg:SI 3 r3) (const_int -256)))
 > 
 > and operands[1]
 > 
 >   (reg:V16QI 95 d16)
 > 
 > and calls adjust_address (mem, SImode, 8 * i).  This ends up
 > validating the address for SImode, but the offset of -256 is not valid
 > for SImode for Thumb-2.  The new address is only used with vldr/vstr
 > instructions; there seems to be no need for it to be valid for SImode,
 > so this patch makes it use DImode instead, for which all valid NEON
 > offsets are accepted.
 > 
 > Tested with no regressions with cross to arm-none-eabi.  OK to commit?
 > 
 > 2009-10-08  Joseph Myers  <joseph@codesourcery.com>
 > 
 > 	* config/arm/arm.c (output_move_neon): Use DImode in call to
 > 	adjust_address.
 > 
 > testsuite:
 > 2009-10-08  Joseph Myers  <joseph@codesourcery.com>
 > 
 > 	* gcc.target/arm/neon-thumb2-move.c: New test.

This test case also ICEs gcc-4.4.2, but not gcc-4.3.4.
Will you apply the patch to the 4.4 branch too?

I checked that the patch applies to 4.4.2 and fixes the ICE,
but I haven't tested it beyond that.

/Mikael

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Fix Thumb-2 NEON ICE
  2009-10-20 11:11 ` Mikael Pettersson
@ 2009-10-20 14:48   ` Joseph S. Myers
  2009-10-20 16:22     ` Mikael Pettersson
  0 siblings, 1 reply; 7+ messages in thread
From: Joseph S. Myers @ 2009-10-20 14:48 UTC (permalink / raw)
  To: Mikael Pettersson; +Cc: gcc-patches

On Tue, 20 Oct 2009, Mikael Pettersson wrote:

> This test case also ICEs gcc-4.4.2, but not gcc-4.3.4.
> Will you apply the patch to the 4.4 branch too?

I have now applied this to 4.4 branch.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Fix Thumb-2 NEON ICE
  2009-10-20 14:48   ` Joseph S. Myers
@ 2009-10-20 16:22     ` Mikael Pettersson
  0 siblings, 0 replies; 7+ messages in thread
From: Mikael Pettersson @ 2009-10-20 16:22 UTC (permalink / raw)
  To: Joseph S. Myers; +Cc: Mikael Pettersson, gcc-patches

Joseph S. Myers writes:
 > On Tue, 20 Oct 2009, Mikael Pettersson wrote:
 > 
 > > This test case also ICEs gcc-4.4.2, but not gcc-4.3.4.
 > > Will you apply the patch to the 4.4 branch too?
 > 
 > I have now applied this to 4.4 branch.

Thanks.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2009-10-20 16:18 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-10-09  0:19 Fix Thumb-2 NEON ICE Joseph S. Myers
2009-10-16 16:39 ` Ping " Joseph S. Myers
     [not found]   ` <1255964118.19735.66.camel@e200601-lin.cambridge.arm.com>
2009-10-19 15:10     ` Joseph S. Myers
2009-10-19 15:24       ` Richard Earnshaw
2009-10-20 11:11 ` Mikael Pettersson
2009-10-20 14:48   ` Joseph S. Myers
2009-10-20 16:22     ` Mikael Pettersson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).