public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][ARM] PR 68143 Properly update memory offsets when expanding setmem
@ 2015-11-06 10:46 Kyrill Tkachov
  2015-11-09  9:27 ` Kyrill Tkachov
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Kyrill Tkachov @ 2015-11-06 10:46 UTC (permalink / raw)
  To: GCC Patches; +Cc: Ramana Radhakrishnan, Richard Earnshaw

[-- Attachment #1: Type: text/plain, Size: 1381 bytes --]

Hi all,

In this wrong-code PR the vector setmem expansion and arm_block_set_aligned_vect in particular
use the wrong offset when calling adjust_automodify_address. In the attached testcase during the
initial zeroing out we get two V16QI stores, but they both are recorded by adjust_automodify_address
as modifying x+0 rather than x+0 and x+12 (the total size to be written is 28).

This led to the scheduling pass moving the store from "x.g = 2;" to before the zeroing stores.

This patch fixes the problem by keeping track of the offset to which stores are emitted and
passing it to adjust_automodify_address as appropriate.

 From inspection I see arm_block_set_unaligned_vect also has this issue so I performed the same
fix in that function as well.

Bootstrapped and tested on arm-none-linux-gnueabihf.

Ok for trunk?

This bug appears on GCC 5 too and I'm currently testing this patch there.
Ok to backport to GCC 5 as well?

Thanks,
Kyrill

2015-11-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     PR target/68143
     * config/arm/arm.c (arm_block_set_unaligned_vect): Keep track of
     offset from dstbase and use it appropriately in
     adjust_automodify_address.
     (arm_block_set_aligned_vect): Likewise.

2015-11-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     PR target/68143
     * gcc.target/arm/pr68143_1.c: New test.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: arm-setmem-offset.patch --]
[-- Type: text/x-patch; name=arm-setmem-offset.patch, Size: 6040 bytes --]

commit 78c6989a7af1df672ea227057180d79d717ed5f3
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Wed Oct 28 17:29:18 2015 +0000

    [ARM] Properly update memory offsets when expanding setmem

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 66e8afc..adf3143 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -29268,7 +29268,7 @@ arm_block_set_unaligned_vect (rtx dstbase,
   rtx (*gen_func) (rtx, rtx);
   machine_mode mode;
   unsigned HOST_WIDE_INT v = value;
-
+  unsigned int offset = 0;
   gcc_assert ((align & 0x3) != 0);
   nelt_v8 = GET_MODE_NUNITS (V8QImode);
   nelt_v16 = GET_MODE_NUNITS (V16QImode);
@@ -29289,7 +29289,7 @@ arm_block_set_unaligned_vect (rtx dstbase,
     return false;
 
   dst = copy_addr_to_reg (XEXP (dstbase, 0));
-  mem = adjust_automodify_address (dstbase, mode, dst, 0);
+  mem = adjust_automodify_address (dstbase, mode, dst, offset);
 
   v = sext_hwi (v, BITS_PER_WORD);
   val_elt = GEN_INT (v);
@@ -29306,7 +29306,11 @@ arm_block_set_unaligned_vect (rtx dstbase,
     {
       emit_insn ((*gen_func) (mem, reg));
       if (i + 2 * nelt_mode <= length)
-	emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
+	{
+	  emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
+	  offset += nelt_mode;
+	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
+	}
     }
 
   /* If there are not less than nelt_v8 bytes leftover, we must be in
@@ -29317,6 +29321,9 @@ arm_block_set_unaligned_vect (rtx dstbase,
   if (i + nelt_v8 < length)
     {
       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
+      offset += length - i;
+      mem = adjust_automodify_address (dstbase, mode, dst, offset);
+
       /* We are shifting bytes back, set the alignment accordingly.  */
       if ((length & 1) != 0 && align >= 2)
 	set_mem_align (mem, BITS_PER_UNIT);
@@ -29327,12 +29334,13 @@ arm_block_set_unaligned_vect (rtx dstbase,
   else if (i < length && i + nelt_v8 >= length)
     {
       if (mode == V16QImode)
-	{
-	  reg = gen_lowpart (V8QImode, reg);
-	  mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
-	}
+	reg = gen_lowpart (V8QImode, reg);
+
       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
 					      + (nelt_mode - nelt_v8))));
+      offset += (length - i) + (nelt_mode - nelt_v8);
+      mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
+
       /* We are shifting bytes back, set the alignment accordingly.  */
       if ((length & 1) != 0 && align >= 2)
 	set_mem_align (mem, BITS_PER_UNIT);
@@ -29359,6 +29367,7 @@ arm_block_set_aligned_vect (rtx dstbase,
   rtx rval[MAX_VECT_LEN];
   machine_mode mode;
   unsigned HOST_WIDE_INT v = value;
+  unsigned int offset = 0;
 
   gcc_assert ((align & 0x3) == 0);
   nelt_v8 = GET_MODE_NUNITS (V8QImode);
@@ -29390,14 +29399,15 @@ arm_block_set_aligned_vect (rtx dstbase,
   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
   if (mode == V16QImode)
     {
-      mem = adjust_automodify_address (dstbase, mode, dst, 0);
+      mem = adjust_automodify_address (dstbase, mode, dst, offset);
       emit_insn (gen_movmisalignv16qi (mem, reg));
       i += nelt_mode;
       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
       if (i + nelt_v8 < length && i + nelt_v16 > length)
 	{
 	  emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
-	  mem = adjust_automodify_address (dstbase, mode, dst, 0);
+	  offset += length - nelt_mode;
+	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
 	  /* We are shifting bytes back, set the alignment accordingly.  */
 	  if ((length & 0x3) == 0)
 	    set_mem_align (mem, BITS_PER_UNIT * 4);
@@ -29419,7 +29429,7 @@ arm_block_set_aligned_vect (rtx dstbase,
   for (; (i + nelt_mode <= length); i += nelt_mode)
     {
       addr = plus_constant (Pmode, dst, i);
-      mem = adjust_automodify_address (dstbase, mode, addr, i);
+      mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
       emit_move_insn (mem, reg);
     }
 
@@ -29428,8 +29438,8 @@ arm_block_set_aligned_vect (rtx dstbase,
   if (i + UNITS_PER_WORD == length)
     {
       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
-      mem = adjust_automodify_address (dstbase, mode,
-				       addr, i - UNITS_PER_WORD);
+      offset += i - UNITS_PER_WORD;
+      mem = adjust_automodify_address (dstbase, mode, addr, offset);
       /* We are shifting 4 bytes back, set the alignment accordingly.  */
       if (align > UNITS_PER_WORD)
 	set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
@@ -29441,7 +29451,8 @@ arm_block_set_aligned_vect (rtx dstbase,
   else if (i < length)
     {
       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
-      mem = adjust_automodify_address (dstbase, mode, dst, 0);
+      offset += length - nelt_mode;
+      mem = adjust_automodify_address (dstbase, mode, dst, offset);
       /* We are shifting bytes back, set the alignment accordingly.  */
       if ((length & 1) == 0)
 	set_mem_align (mem, BITS_PER_UNIT * 2);
diff --git a/gcc/testsuite/gcc.target/arm/pr68143_1.c b/gcc/testsuite/gcc.target/arm/pr68143_1.c
new file mode 100644
index 0000000..323473f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pr68143_1.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O3 -mcpu=cortex-a57" } */
+/* { dg-add-options arm_neon } */
+
+#define NULL 0
+
+struct stuff
+{
+    int a;
+    int b;
+    int c;
+    int d;
+    int e;
+    char *f;
+    int g;
+};
+
+void __attribute__ ((noinline))
+bar (struct stuff *x)
+{
+  if (x->g != 2)
+    __builtin_abort ();
+}
+
+int
+main (int argc, char** argv)
+{
+  struct stuff x = {0, 0, 0, 0, 0, NULL, 0};
+  x.a = 100;
+  x.d = 100;
+  x.g = 2;
+  /* Struct should now look like {100, 0, 0, 100, 0, 0, 0, 2}.  */
+  bar (&x);
+  return 0;
+}

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH][ARM] PR 68143 Properly update memory offsets when expanding setmem
  2015-11-06 10:46 [PATCH][ARM] PR 68143 Properly update memory offsets when expanding setmem Kyrill Tkachov
@ 2015-11-09  9:27 ` Kyrill Tkachov
  2015-11-13 11:52 ` Kyrill Tkachov
  2015-11-17 12:02 ` Ramana Radhakrishnan
  2 siblings, 0 replies; 6+ messages in thread
From: Kyrill Tkachov @ 2015-11-09  9:27 UTC (permalink / raw)
  To: GCC Patches; +Cc: Ramana Radhakrishnan, Richard Earnshaw


On 06/11/15 10:46, Kyrill Tkachov wrote:
> Hi all,
>
> In this wrong-code PR the vector setmem expansion and arm_block_set_aligned_vect in particular
> use the wrong offset when calling adjust_automodify_address. In the attached testcase during the
> initial zeroing out we get two V16QI stores, but they both are recorded by adjust_automodify_address
> as modifying x+0 rather than x+0 and x+12 (the total size to be written is 28).
>
> This led to the scheduling pass moving the store from "x.g = 2;" to before the zeroing stores.
>
> This patch fixes the problem by keeping track of the offset to which stores are emitted and
> passing it to adjust_automodify_address as appropriate.
>
> From inspection I see arm_block_set_unaligned_vect also has this issue so I performed the same
> fix in that function as well.
>
> Bootstrapped and tested on arm-none-linux-gnueabihf.
>
> Ok for trunk?
>
> This bug appears on GCC 5 too and I'm currently testing this patch there.
> Ok to backport to GCC 5 as well?

Bootstrap and testing for arm-none-linux-gnueabihf on GCC 5 branch looks ok too.

Kyrill

>
> Thanks,
> Kyrill
>
> 2015-11-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>
>     PR target/68143
>     * config/arm/arm.c (arm_block_set_unaligned_vect): Keep track of
>     offset from dstbase and use it appropriately in
>     adjust_automodify_address.
>     (arm_block_set_aligned_vect): Likewise.
>
> 2015-11-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>
>     PR target/68143
>     * gcc.target/arm/pr68143_1.c: New test.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH][ARM] PR 68143 Properly update memory offsets when expanding setmem
  2015-11-06 10:46 [PATCH][ARM] PR 68143 Properly update memory offsets when expanding setmem Kyrill Tkachov
  2015-11-09  9:27 ` Kyrill Tkachov
@ 2015-11-13 11:52 ` Kyrill Tkachov
  2015-11-17 12:02 ` Ramana Radhakrishnan
  2 siblings, 0 replies; 6+ messages in thread
From: Kyrill Tkachov @ 2015-11-13 11:52 UTC (permalink / raw)
  To: GCC Patches; +Cc: Ramana Radhakrishnan, Richard Earnshaw

Ping.
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00581.html

Thanks,
Kyrill
On 06/11/15 10:46, Kyrill Tkachov wrote:
> Hi all,
>
> In this wrong-code PR the vector setmem expansion and arm_block_set_aligned_vect in particular
> use the wrong offset when calling adjust_automodify_address. In the attached testcase during the
> initial zeroing out we get two V16QI stores, but they both are recorded by adjust_automodify_address
> as modifying x+0 rather than x+0 and x+12 (the total size to be written is 28).
>
> This led to the scheduling pass moving the store from "x.g = 2;" to before the zeroing stores.
>
> This patch fixes the problem by keeping track of the offset to which stores are emitted and
> passing it to adjust_automodify_address as appropriate.
>
> From inspection I see arm_block_set_unaligned_vect also has this issue so I performed the same
> fix in that function as well.
>
> Bootstrapped and tested on arm-none-linux-gnueabihf.
>
> Ok for trunk?
>
> This bug appears on GCC 5 too and I'm currently testing this patch there.
> Ok to backport to GCC 5 as well?
>
> Thanks,
> Kyrill
>
> 2015-11-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>
>     PR target/68143
>     * config/arm/arm.c (arm_block_set_unaligned_vect): Keep track of
>     offset from dstbase and use it appropriately in
>     adjust_automodify_address.
>     (arm_block_set_aligned_vect): Likewise.
>
> 2015-11-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>
>     PR target/68143
>     * gcc.target/arm/pr68143_1.c: New test.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH][ARM] PR 68143 Properly update memory offsets when expanding setmem
  2015-11-06 10:46 [PATCH][ARM] PR 68143 Properly update memory offsets when expanding setmem Kyrill Tkachov
  2015-11-09  9:27 ` Kyrill Tkachov
  2015-11-13 11:52 ` Kyrill Tkachov
@ 2015-11-17 12:02 ` Ramana Radhakrishnan
  2015-11-17 12:58   ` Kyrill Tkachov
  2 siblings, 1 reply; 6+ messages in thread
From: Ramana Radhakrishnan @ 2015-11-17 12:02 UTC (permalink / raw)
  To: Kyrill Tkachov, GCC Patches; +Cc: Ramana Radhakrishnan, Richard Earnshaw



On 06/11/15 10:46, Kyrill Tkachov wrote:
> Hi all,
> 
> In this wrong-code PR the vector setmem expansion and arm_block_set_aligned_vect in particular
> use the wrong offset when calling adjust_automodify_address. In the attached testcase during the
> initial zeroing out we get two V16QI stores, but they both are recorded by adjust_automodify_address
> as modifying x+0 rather than x+0 and x+12 (the total size to be written is 28).
> 
> This led to the scheduling pass moving the store from "x.g = 2;" to before the zeroing stores.
> 
> This patch fixes the problem by keeping track of the offset to which stores are emitted and
> passing it to adjust_automodify_address as appropriate.
> 
> From inspection I see arm_block_set_unaligned_vect also has this issue so I performed the same
> fix in that function as well.
> 
> Bootstrapped and tested on arm-none-linux-gnueabihf.
> 
> Ok for trunk?
> 
> This bug appears on GCC 5 too and I'm currently testing this patch there.
> Ok to backport to GCC 5 as well?

> 
> Thanks,
> Kyrill
> 
> 2015-11-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
> 
>     PR target/68143
>     * config/arm/arm.c (arm_block_set_unaligned_vect): Keep track of
>     offset from dstbase and use it appropriately in
>     adjust_automodify_address.
>     (arm_block_set_aligned_vect): Likewise.
> 
> 2015-11-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
> 
>     PR target/68143
>     * gcc.target/arm/pr68143_1.c: New test.

Sorry about the delay in reviewing this. There's nothing arm specific about this test - I'd just put this in gcc.c-torture/execute, there are enough auto-testers with neon on that will show up issues if this starts failing.

Ok with that change.

Ramana

> 
> arm-setmem-offset.patch
> 
> 
> commit 78c6989a7af1df672ea227057180d79d717ed5f3
> Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
> Date:   Wed Oct 28 17:29:18 2015 +0000
> 
>     [ARM] Properly update memory offsets when expanding setmem
> 
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 66e8afc..adf3143 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -29268,7 +29268,7 @@ arm_block_set_unaligned_vect (rtx dstbase,
>    rtx (*gen_func) (rtx, rtx);
>    machine_mode mode;
>    unsigned HOST_WIDE_INT v = value;
> -
> +  unsigned int offset = 0;
>    gcc_assert ((align & 0x3) != 0);
>    nelt_v8 = GET_MODE_NUNITS (V8QImode);
>    nelt_v16 = GET_MODE_NUNITS (V16QImode);
> @@ -29289,7 +29289,7 @@ arm_block_set_unaligned_vect (rtx dstbase,
>      return false;
>  
>    dst = copy_addr_to_reg (XEXP (dstbase, 0));
> -  mem = adjust_automodify_address (dstbase, mode, dst, 0);
> +  mem = adjust_automodify_address (dstbase, mode, dst, offset);
>  
>    v = sext_hwi (v, BITS_PER_WORD);
>    val_elt = GEN_INT (v);
> @@ -29306,7 +29306,11 @@ arm_block_set_unaligned_vect (rtx dstbase,
>      {
>        emit_insn ((*gen_func) (mem, reg));
>        if (i + 2 * nelt_mode <= length)
> -	emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
> +	{
> +	  emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
> +	  offset += nelt_mode;
> +	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
> +	}
>      }
>  
>    /* If there are not less than nelt_v8 bytes leftover, we must be in
> @@ -29317,6 +29321,9 @@ arm_block_set_unaligned_vect (rtx dstbase,
>    if (i + nelt_v8 < length)
>      {
>        emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
> +      offset += length - i;
> +      mem = adjust_automodify_address (dstbase, mode, dst, offset);
> +
>        /* We are shifting bytes back, set the alignment accordingly.  */
>        if ((length & 1) != 0 && align >= 2)
>  	set_mem_align (mem, BITS_PER_UNIT);
> @@ -29327,12 +29334,13 @@ arm_block_set_unaligned_vect (rtx dstbase,
>    else if (i < length && i + nelt_v8 >= length)
>      {
>        if (mode == V16QImode)
> -	{
> -	  reg = gen_lowpart (V8QImode, reg);
> -	  mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
> -	}
> +	reg = gen_lowpart (V8QImode, reg);
> +
>        emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
>  					      + (nelt_mode - nelt_v8))));
> +      offset += (length - i) + (nelt_mode - nelt_v8);
> +      mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
> +
>        /* We are shifting bytes back, set the alignment accordingly.  */
>        if ((length & 1) != 0 && align >= 2)
>  	set_mem_align (mem, BITS_PER_UNIT);
> @@ -29359,6 +29367,7 @@ arm_block_set_aligned_vect (rtx dstbase,
>    rtx rval[MAX_VECT_LEN];
>    machine_mode mode;
>    unsigned HOST_WIDE_INT v = value;
> +  unsigned int offset = 0;
>  
>    gcc_assert ((align & 0x3) == 0);
>    nelt_v8 = GET_MODE_NUNITS (V8QImode);
> @@ -29390,14 +29399,15 @@ arm_block_set_aligned_vect (rtx dstbase,
>    /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
>    if (mode == V16QImode)
>      {
> -      mem = adjust_automodify_address (dstbase, mode, dst, 0);
> +      mem = adjust_automodify_address (dstbase, mode, dst, offset);
>        emit_insn (gen_movmisalignv16qi (mem, reg));
>        i += nelt_mode;
>        /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
>        if (i + nelt_v8 < length && i + nelt_v16 > length)
>  	{
>  	  emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
> -	  mem = adjust_automodify_address (dstbase, mode, dst, 0);
> +	  offset += length - nelt_mode;
> +	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
>  	  /* We are shifting bytes back, set the alignment accordingly.  */
>  	  if ((length & 0x3) == 0)
>  	    set_mem_align (mem, BITS_PER_UNIT * 4);
> @@ -29419,7 +29429,7 @@ arm_block_set_aligned_vect (rtx dstbase,
>    for (; (i + nelt_mode <= length); i += nelt_mode)
>      {
>        addr = plus_constant (Pmode, dst, i);
> -      mem = adjust_automodify_address (dstbase, mode, addr, i);
> +      mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
>        emit_move_insn (mem, reg);
>      }
>  
> @@ -29428,8 +29438,8 @@ arm_block_set_aligned_vect (rtx dstbase,
>    if (i + UNITS_PER_WORD == length)
>      {
>        addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
> -      mem = adjust_automodify_address (dstbase, mode,
> -				       addr, i - UNITS_PER_WORD);
> +      offset += i - UNITS_PER_WORD;
> +      mem = adjust_automodify_address (dstbase, mode, addr, offset);
>        /* We are shifting 4 bytes back, set the alignment accordingly.  */
>        if (align > UNITS_PER_WORD)
>  	set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
> @@ -29441,7 +29451,8 @@ arm_block_set_aligned_vect (rtx dstbase,
>    else if (i < length)
>      {
>        emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
> -      mem = adjust_automodify_address (dstbase, mode, dst, 0);
> +      offset += length - nelt_mode;
> +      mem = adjust_automodify_address (dstbase, mode, dst, offset);
>        /* We are shifting bytes back, set the alignment accordingly.  */
>        if ((length & 1) == 0)
>  	set_mem_align (mem, BITS_PER_UNIT * 2);
> diff --git a/gcc/testsuite/gcc.target/arm/pr68143_1.c b/gcc/testsuite/gcc.target/arm/pr68143_1.c
> new file mode 100644
> index 0000000..323473f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/pr68143_1.c
> @@ -0,0 +1,36 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O3 -mcpu=cortex-a57" } */
> +/* { dg-add-options arm_neon } */
> +
> +#define NULL 0
> +
> +struct stuff
> +{
> +    int a;
> +    int b;
> +    int c;
> +    int d;
> +    int e;
> +    char *f;
> +    int g;
> +};
> +
> +void __attribute__ ((noinline))
> +bar (struct stuff *x)
> +{
> +  if (x->g != 2)
> +    __builtin_abort ();
> +}
> +
> +int
> +main (int argc, char** argv)
> +{
> +  struct stuff x = {0, 0, 0, 0, 0, NULL, 0};
> +  x.a = 100;
> +  x.d = 100;
> +  x.g = 2;
> +  /* Struct should now look like {100, 0, 0, 100, 0, 0, 0, 2}.  */
> +  bar (&x);
> +  return 0;
> +}
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH][ARM] PR 68143 Properly update memory offsets when expanding setmem
  2015-11-17 12:02 ` Ramana Radhakrishnan
@ 2015-11-17 12:58   ` Kyrill Tkachov
  2015-11-17 13:22     ` Kyrill Tkachov
  0 siblings, 1 reply; 6+ messages in thread
From: Kyrill Tkachov @ 2015-11-17 12:58 UTC (permalink / raw)
  To: Ramana Radhakrishnan, GCC Patches; +Cc: Ramana Radhakrishnan, Richard Earnshaw

Hi Ramana,

On 17/11/15 12:02, Ramana Radhakrishnan wrote:
>
> On 06/11/15 10:46, Kyrill Tkachov wrote:
>> Hi all,
>>
>> In this wrong-code PR the vector setmem expansion and arm_block_set_aligned_vect in particular
>> use the wrong offset when calling adjust_automodify_address. In the attached testcase during the
>> initial zeroing out we get two V16QI stores, but they both are recorded by adjust_automodify_address
>> as modifying x+0 rather than x+0 and x+12 (the total size to be written is 28).
>>
>> This led to the scheduling pass moving the store from "x.g = 2;" to before the zeroing stores.
>>
>> This patch fixes the problem by keeping track of the offset to which stores are emitted and
>> passing it to adjust_automodify_address as appropriate.
>>
>>  From inspection I see arm_block_set_unaligned_vect also has this issue so I performed the same
>> fix in that function as well.
>>
>> Bootstrapped and tested on arm-none-linux-gnueabihf.
>>
>> Ok for trunk?
>>
>> This bug appears on GCC 5 too and I'm currently testing this patch there.
>> Ok to backport to GCC 5 as well?
>> Thanks,
>> Kyrill
>>
>> 2015-11-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>
>>      PR target/68143
>>      * config/arm/arm.c (arm_block_set_unaligned_vect): Keep track of
>>      offset from dstbase and use it appropriately in
>>      adjust_automodify_address.
>>      (arm_block_set_aligned_vect): Likewise.
>>
>> 2015-11-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>
>>      PR target/68143
>>      * gcc.target/arm/pr68143_1.c: New test.
> Sorry about the delay in reviewing this. There's nothing arm specific about this test - I'd just put this in gcc.c-torture/execute, there are enough auto-testers with neon on that will show up issues if this starts failing.

Thanks, will do. I was on the fence about whether this should go in torture.
I'll put it there.

Kyrill

>
> Ok with that change.
>
> Ramana
>
>> arm-setmem-offset.patch
>>
>>
>> commit 78c6989a7af1df672ea227057180d79d717ed5f3
>> Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
>> Date:   Wed Oct 28 17:29:18 2015 +0000
>>
>>      [ARM] Properly update memory offsets when expanding setmem
>>
>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
>> index 66e8afc..adf3143 100644
>> --- a/gcc/config/arm/arm.c
>> +++ b/gcc/config/arm/arm.c
>> @@ -29268,7 +29268,7 @@ arm_block_set_unaligned_vect (rtx dstbase,
>>     rtx (*gen_func) (rtx, rtx);
>>     machine_mode mode;
>>     unsigned HOST_WIDE_INT v = value;
>> -
>> +  unsigned int offset = 0;
>>     gcc_assert ((align & 0x3) != 0);
>>     nelt_v8 = GET_MODE_NUNITS (V8QImode);
>>     nelt_v16 = GET_MODE_NUNITS (V16QImode);
>> @@ -29289,7 +29289,7 @@ arm_block_set_unaligned_vect (rtx dstbase,
>>       return false;
>>   
>>     dst = copy_addr_to_reg (XEXP (dstbase, 0));
>> -  mem = adjust_automodify_address (dstbase, mode, dst, 0);
>> +  mem = adjust_automodify_address (dstbase, mode, dst, offset);
>>   
>>     v = sext_hwi (v, BITS_PER_WORD);
>>     val_elt = GEN_INT (v);
>> @@ -29306,7 +29306,11 @@ arm_block_set_unaligned_vect (rtx dstbase,
>>       {
>>         emit_insn ((*gen_func) (mem, reg));
>>         if (i + 2 * nelt_mode <= length)
>> -	emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
>> +	{
>> +	  emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
>> +	  offset += nelt_mode;
>> +	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
>> +	}
>>       }
>>   
>>     /* If there are not less than nelt_v8 bytes leftover, we must be in
>> @@ -29317,6 +29321,9 @@ arm_block_set_unaligned_vect (rtx dstbase,
>>     if (i + nelt_v8 < length)
>>       {
>>         emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
>> +      offset += length - i;
>> +      mem = adjust_automodify_address (dstbase, mode, dst, offset);
>> +
>>         /* We are shifting bytes back, set the alignment accordingly.  */
>>         if ((length & 1) != 0 && align >= 2)
>>   	set_mem_align (mem, BITS_PER_UNIT);
>> @@ -29327,12 +29334,13 @@ arm_block_set_unaligned_vect (rtx dstbase,
>>     else if (i < length && i + nelt_v8 >= length)
>>       {
>>         if (mode == V16QImode)
>> -	{
>> -	  reg = gen_lowpart (V8QImode, reg);
>> -	  mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
>> -	}
>> +	reg = gen_lowpart (V8QImode, reg);
>> +
>>         emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
>>   					      + (nelt_mode - nelt_v8))));
>> +      offset += (length - i) + (nelt_mode - nelt_v8);
>> +      mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
>> +
>>         /* We are shifting bytes back, set the alignment accordingly.  */
>>         if ((length & 1) != 0 && align >= 2)
>>   	set_mem_align (mem, BITS_PER_UNIT);
>> @@ -29359,6 +29367,7 @@ arm_block_set_aligned_vect (rtx dstbase,
>>     rtx rval[MAX_VECT_LEN];
>>     machine_mode mode;
>>     unsigned HOST_WIDE_INT v = value;
>> +  unsigned int offset = 0;
>>   
>>     gcc_assert ((align & 0x3) == 0);
>>     nelt_v8 = GET_MODE_NUNITS (V8QImode);
>> @@ -29390,14 +29399,15 @@ arm_block_set_aligned_vect (rtx dstbase,
>>     /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
>>     if (mode == V16QImode)
>>       {
>> -      mem = adjust_automodify_address (dstbase, mode, dst, 0);
>> +      mem = adjust_automodify_address (dstbase, mode, dst, offset);
>>         emit_insn (gen_movmisalignv16qi (mem, reg));
>>         i += nelt_mode;
>>         /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
>>         if (i + nelt_v8 < length && i + nelt_v16 > length)
>>   	{
>>   	  emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
>> -	  mem = adjust_automodify_address (dstbase, mode, dst, 0);
>> +	  offset += length - nelt_mode;
>> +	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
>>   	  /* We are shifting bytes back, set the alignment accordingly.  */
>>   	  if ((length & 0x3) == 0)
>>   	    set_mem_align (mem, BITS_PER_UNIT * 4);
>> @@ -29419,7 +29429,7 @@ arm_block_set_aligned_vect (rtx dstbase,
>>     for (; (i + nelt_mode <= length); i += nelt_mode)
>>       {
>>         addr = plus_constant (Pmode, dst, i);
>> -      mem = adjust_automodify_address (dstbase, mode, addr, i);
>> +      mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
>>         emit_move_insn (mem, reg);
>>       }
>>   
>> @@ -29428,8 +29438,8 @@ arm_block_set_aligned_vect (rtx dstbase,
>>     if (i + UNITS_PER_WORD == length)
>>       {
>>         addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
>> -      mem = adjust_automodify_address (dstbase, mode,
>> -				       addr, i - UNITS_PER_WORD);
>> +      offset += i - UNITS_PER_WORD;
>> +      mem = adjust_automodify_address (dstbase, mode, addr, offset);
>>         /* We are shifting 4 bytes back, set the alignment accordingly.  */
>>         if (align > UNITS_PER_WORD)
>>   	set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
>> @@ -29441,7 +29451,8 @@ arm_block_set_aligned_vect (rtx dstbase,
>>     else if (i < length)
>>       {
>>         emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
>> -      mem = adjust_automodify_address (dstbase, mode, dst, 0);
>> +      offset += length - nelt_mode;
>> +      mem = adjust_automodify_address (dstbase, mode, dst, offset);
>>         /* We are shifting bytes back, set the alignment accordingly.  */
>>         if ((length & 1) == 0)
>>   	set_mem_align (mem, BITS_PER_UNIT * 2);
>> diff --git a/gcc/testsuite/gcc.target/arm/pr68143_1.c b/gcc/testsuite/gcc.target/arm/pr68143_1.c
>> new file mode 100644
>> index 0000000..323473f
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/arm/pr68143_1.c
>> @@ -0,0 +1,36 @@
>> +/* { dg-do run } */
>> +/* { dg-require-effective-target arm_neon_hw } */
>> +/* { dg-options "-O3 -mcpu=cortex-a57" } */
>> +/* { dg-add-options arm_neon } */
>> +
>> +#define NULL 0
>> +
>> +struct stuff
>> +{
>> +    int a;
>> +    int b;
>> +    int c;
>> +    int d;
>> +    int e;
>> +    char *f;
>> +    int g;
>> +};
>> +
>> +void __attribute__ ((noinline))
>> +bar (struct stuff *x)
>> +{
>> +  if (x->g != 2)
>> +    __builtin_abort ();
>> +}
>> +
>> +int
>> +main (int argc, char** argv)
>> +{
>> +  struct stuff x = {0, 0, 0, 0, 0, NULL, 0};
>> +  x.a = 100;
>> +  x.d = 100;
>> +  x.g = 2;
>> +  /* Struct should now look like {100, 0, 0, 100, 0, 0, 0, 2}.  */
>> +  bar (&x);
>> +  return 0;
>> +}
>>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH][ARM] PR 68143 Properly update memory offsets when expanding setmem
  2015-11-17 12:58   ` Kyrill Tkachov
@ 2015-11-17 13:22     ` Kyrill Tkachov
  0 siblings, 0 replies; 6+ messages in thread
From: Kyrill Tkachov @ 2015-11-17 13:22 UTC (permalink / raw)
  To: Ramana Radhakrishnan, GCC Patches; +Cc: Ramana Radhakrishnan, Richard Earnshaw

[-- Attachment #1: Type: text/plain, Size: 9435 bytes --]


On 17/11/15 12:58, Kyrill Tkachov wrote:
> Hi Ramana,
>
> On 17/11/15 12:02, Ramana Radhakrishnan wrote:
>>
>> On 06/11/15 10:46, Kyrill Tkachov wrote:
>>> Hi all,
>>>
>>> In this wrong-code PR the vector setmem expansion and arm_block_set_aligned_vect in particular
>>> use the wrong offset when calling adjust_automodify_address. In the attached testcase during the
>>> initial zeroing out we get two V16QI stores, but they both are recorded by adjust_automodify_address
>>> as modifying x+0 rather than x+0 and x+12 (the total size to be written is 28).
>>>
>>> This led to the scheduling pass moving the store from "x.g = 2;" to before the zeroing stores.
>>>
>>> This patch fixes the problem by keeping track of the offset to which stores are emitted and
>>> passing it to adjust_automodify_address as appropriate.
>>>
>>>  From inspection I see arm_block_set_unaligned_vect also has this issue so I performed the same
>>> fix in that function as well.
>>>
>>> Bootstrapped and tested on arm-none-linux-gnueabihf.
>>>
>>> Ok for trunk?
>>>
>>> This bug appears on GCC 5 too and I'm currently testing this patch there.
>>> Ok to backport to GCC 5 as well?
>>> Thanks,
>>> Kyrill
>>>
>>> 2015-11-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>>
>>>      PR target/68143
>>>      * config/arm/arm.c (arm_block_set_unaligned_vect): Keep track of
>>>      offset from dstbase and use it appropriately in
>>>      adjust_automodify_address.
>>>      (arm_block_set_aligned_vect): Likewise.
>>>
>>> 2015-11-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>>
>>>      PR target/68143
>>>      * gcc.target/arm/pr68143_1.c: New test.
>> Sorry about the delay in reviewing this. There's nothing arm specific about this test - I'd just put this in gcc.c-torture/execute, there are enough auto-testers with neon on that will show up issues if this starts failing.
>
> Thanks, will do. I was on the fence about whether this should go in torture.
> I'll put it there.
>

For the record, here's what I committed with r230462.

2015-11-17  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     PR target/68143
     * config/arm/arm.c (arm_block_set_unaligned_vect): Keep track of
     offset from dstbase and use it appropriately in
     adjust_automodify_address.
     (arm_block_set_aligned_vect): Likewise.

2015-11-17  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     PR target/68143
     * gcc.c-torture/execute/pr68143_1.c: New test.

> Kyrill
>
>>
>> Ok with that change.
>>
>> Ramana
>>
>>> arm-setmem-offset.patch
>>>
>>>
>>> commit 78c6989a7af1df672ea227057180d79d717ed5f3
>>> Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
>>> Date:   Wed Oct 28 17:29:18 2015 +0000
>>>
>>>      [ARM] Properly update memory offsets when expanding setmem
>>>
>>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
>>> index 66e8afc..adf3143 100644
>>> --- a/gcc/config/arm/arm.c
>>> +++ b/gcc/config/arm/arm.c
>>> @@ -29268,7 +29268,7 @@ arm_block_set_unaligned_vect (rtx dstbase,
>>>     rtx (*gen_func) (rtx, rtx);
>>>     machine_mode mode;
>>>     unsigned HOST_WIDE_INT v = value;
>>> -
>>> +  unsigned int offset = 0;
>>>     gcc_assert ((align & 0x3) != 0);
>>>     nelt_v8 = GET_MODE_NUNITS (V8QImode);
>>>     nelt_v16 = GET_MODE_NUNITS (V16QImode);
>>> @@ -29289,7 +29289,7 @@ arm_block_set_unaligned_vect (rtx dstbase,
>>>       return false;
>>>       dst = copy_addr_to_reg (XEXP (dstbase, 0));
>>> -  mem = adjust_automodify_address (dstbase, mode, dst, 0);
>>> +  mem = adjust_automodify_address (dstbase, mode, dst, offset);
>>>       v = sext_hwi (v, BITS_PER_WORD);
>>>     val_elt = GEN_INT (v);
>>> @@ -29306,7 +29306,11 @@ arm_block_set_unaligned_vect (rtx dstbase,
>>>       {
>>>         emit_insn ((*gen_func) (mem, reg));
>>>         if (i + 2 * nelt_mode <= length)
>>> -    emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
>>> +    {
>>> +      emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
>>> +      offset += nelt_mode;
>>> +      mem = adjust_automodify_address (dstbase, mode, dst, offset);
>>> +    }
>>>       }
>>>       /* If there are not less than nelt_v8 bytes leftover, we must be in
>>> @@ -29317,6 +29321,9 @@ arm_block_set_unaligned_vect (rtx dstbase,
>>>     if (i + nelt_v8 < length)
>>>       {
>>>         emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
>>> +      offset += length - i;
>>> +      mem = adjust_automodify_address (dstbase, mode, dst, offset);
>>> +
>>>         /* We are shifting bytes back, set the alignment accordingly.  */
>>>         if ((length & 1) != 0 && align >= 2)
>>>       set_mem_align (mem, BITS_PER_UNIT);
>>> @@ -29327,12 +29334,13 @@ arm_block_set_unaligned_vect (rtx dstbase,
>>>     else if (i < length && i + nelt_v8 >= length)
>>>       {
>>>         if (mode == V16QImode)
>>> -    {
>>> -      reg = gen_lowpart (V8QImode, reg);
>>> -      mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
>>> -    }
>>> +    reg = gen_lowpart (V8QImode, reg);
>>> +
>>>         emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
>>>                             + (nelt_mode - nelt_v8))));
>>> +      offset += (length - i) + (nelt_mode - nelt_v8);
>>> +      mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
>>> +
>>>         /* We are shifting bytes back, set the alignment accordingly.  */
>>>         if ((length & 1) != 0 && align >= 2)
>>>       set_mem_align (mem, BITS_PER_UNIT);
>>> @@ -29359,6 +29367,7 @@ arm_block_set_aligned_vect (rtx dstbase,
>>>     rtx rval[MAX_VECT_LEN];
>>>     machine_mode mode;
>>>     unsigned HOST_WIDE_INT v = value;
>>> +  unsigned int offset = 0;
>>>       gcc_assert ((align & 0x3) == 0);
>>>     nelt_v8 = GET_MODE_NUNITS (V8QImode);
>>> @@ -29390,14 +29399,15 @@ arm_block_set_aligned_vect (rtx dstbase,
>>>     /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
>>>     if (mode == V16QImode)
>>>       {
>>> -      mem = adjust_automodify_address (dstbase, mode, dst, 0);
>>> +      mem = adjust_automodify_address (dstbase, mode, dst, offset);
>>>         emit_insn (gen_movmisalignv16qi (mem, reg));
>>>         i += nelt_mode;
>>>         /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
>>>         if (i + nelt_v8 < length && i + nelt_v16 > length)
>>>       {
>>>         emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
>>> -      mem = adjust_automodify_address (dstbase, mode, dst, 0);
>>> +      offset += length - nelt_mode;
>>> +      mem = adjust_automodify_address (dstbase, mode, dst, offset);
>>>         /* We are shifting bytes back, set the alignment accordingly.  */
>>>         if ((length & 0x3) == 0)
>>>           set_mem_align (mem, BITS_PER_UNIT * 4);
>>> @@ -29419,7 +29429,7 @@ arm_block_set_aligned_vect (rtx dstbase,
>>>     for (; (i + nelt_mode <= length); i += nelt_mode)
>>>       {
>>>         addr = plus_constant (Pmode, dst, i);
>>> -      mem = adjust_automodify_address (dstbase, mode, addr, i);
>>> +      mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
>>>         emit_move_insn (mem, reg);
>>>       }
>>>   @@ -29428,8 +29438,8 @@ arm_block_set_aligned_vect (rtx dstbase,
>>>     if (i + UNITS_PER_WORD == length)
>>>       {
>>>         addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
>>> -      mem = adjust_automodify_address (dstbase, mode,
>>> -                       addr, i - UNITS_PER_WORD);
>>> +      offset += i - UNITS_PER_WORD;
>>> +      mem = adjust_automodify_address (dstbase, mode, addr, offset);
>>>         /* We are shifting 4 bytes back, set the alignment accordingly.  */
>>>         if (align > UNITS_PER_WORD)
>>>       set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
>>> @@ -29441,7 +29451,8 @@ arm_block_set_aligned_vect (rtx dstbase,
>>>     else if (i < length)
>>>       {
>>>         emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
>>> -      mem = adjust_automodify_address (dstbase, mode, dst, 0);
>>> +      offset += length - nelt_mode;
>>> +      mem = adjust_automodify_address (dstbase, mode, dst, offset);
>>>         /* We are shifting bytes back, set the alignment accordingly.  */
>>>         if ((length & 1) == 0)
>>>       set_mem_align (mem, BITS_PER_UNIT * 2);
>>> diff --git a/gcc/testsuite/gcc.target/arm/pr68143_1.c b/gcc/testsuite/gcc.target/arm/pr68143_1.c
>>> new file mode 100644
>>> index 0000000..323473f
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/arm/pr68143_1.c
>>> @@ -0,0 +1,36 @@
>>> +/* { dg-do run } */
>>> +/* { dg-require-effective-target arm_neon_hw } */
>>> +/* { dg-options "-O3 -mcpu=cortex-a57" } */
>>> +/* { dg-add-options arm_neon } */
>>> +
>>> +#define NULL 0
>>> +
>>> +struct stuff
>>> +{
>>> +    int a;
>>> +    int b;
>>> +    int c;
>>> +    int d;
>>> +    int e;
>>> +    char *f;
>>> +    int g;
>>> +};
>>> +
>>> +void __attribute__ ((noinline))
>>> +bar (struct stuff *x)
>>> +{
>>> +  if (x->g != 2)
>>> +    __builtin_abort ();
>>> +}
>>> +
>>> +int
>>> +main (int argc, char** argv)
>>> +{
>>> +  struct stuff x = {0, 0, 0, 0, 0, NULL, 0};
>>> +  x.a = 100;
>>> +  x.d = 100;
>>> +  x.g = 2;
>>> +  /* Struct should now look like {100, 0, 0, 100, 0, 0, 0, 2}.  */
>>> +  bar (&x);
>>> +  return 0;
>>> +}
>>>
>


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: arm-setmem-offset.patch --]
[-- Type: text/x-patch; name=arm-setmem-offset.patch, Size: 5902 bytes --]

commit 7f329a2f9c3efdb5e7a6483792fcfab945cc7a84
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Wed Oct 28 17:29:18 2015 +0000

    [ARM] Properly update memory offsets when expanding setmem

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index d0fe028..8a92798 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -29171,7 +29171,7 @@ arm_block_set_unaligned_vect (rtx dstbase,
   rtx (*gen_func) (rtx, rtx);
   machine_mode mode;
   unsigned HOST_WIDE_INT v = value;
-
+  unsigned int offset = 0;
   gcc_assert ((align & 0x3) != 0);
   nelt_v8 = GET_MODE_NUNITS (V8QImode);
   nelt_v16 = GET_MODE_NUNITS (V16QImode);
@@ -29192,7 +29192,7 @@ arm_block_set_unaligned_vect (rtx dstbase,
     return false;
 
   dst = copy_addr_to_reg (XEXP (dstbase, 0));
-  mem = adjust_automodify_address (dstbase, mode, dst, 0);
+  mem = adjust_automodify_address (dstbase, mode, dst, offset);
 
   v = sext_hwi (v, BITS_PER_WORD);
   val_elt = GEN_INT (v);
@@ -29209,7 +29209,11 @@ arm_block_set_unaligned_vect (rtx dstbase,
     {
       emit_insn ((*gen_func) (mem, reg));
       if (i + 2 * nelt_mode <= length)
-	emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
+	{
+	  emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
+	  offset += nelt_mode;
+	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
+	}
     }
 
   /* If there are not less than nelt_v8 bytes leftover, we must be in
@@ -29220,6 +29224,9 @@ arm_block_set_unaligned_vect (rtx dstbase,
   if (i + nelt_v8 < length)
     {
       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
+      offset += length - i;
+      mem = adjust_automodify_address (dstbase, mode, dst, offset);
+
       /* We are shifting bytes back, set the alignment accordingly.  */
       if ((length & 1) != 0 && align >= 2)
 	set_mem_align (mem, BITS_PER_UNIT);
@@ -29230,12 +29237,13 @@ arm_block_set_unaligned_vect (rtx dstbase,
   else if (i < length && i + nelt_v8 >= length)
     {
       if (mode == V16QImode)
-	{
-	  reg = gen_lowpart (V8QImode, reg);
-	  mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
-	}
+	reg = gen_lowpart (V8QImode, reg);
+
       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
 					      + (nelt_mode - nelt_v8))));
+      offset += (length - i) + (nelt_mode - nelt_v8);
+      mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
+
       /* We are shifting bytes back, set the alignment accordingly.  */
       if ((length & 1) != 0 && align >= 2)
 	set_mem_align (mem, BITS_PER_UNIT);
@@ -29262,6 +29270,7 @@ arm_block_set_aligned_vect (rtx dstbase,
   rtx rval[MAX_VECT_LEN];
   machine_mode mode;
   unsigned HOST_WIDE_INT v = value;
+  unsigned int offset = 0;
 
   gcc_assert ((align & 0x3) == 0);
   nelt_v8 = GET_MODE_NUNITS (V8QImode);
@@ -29293,14 +29302,15 @@ arm_block_set_aligned_vect (rtx dstbase,
   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
   if (mode == V16QImode)
     {
-      mem = adjust_automodify_address (dstbase, mode, dst, 0);
+      mem = adjust_automodify_address (dstbase, mode, dst, offset);
       emit_insn (gen_movmisalignv16qi (mem, reg));
       i += nelt_mode;
       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
       if (i + nelt_v8 < length && i + nelt_v16 > length)
 	{
 	  emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
-	  mem = adjust_automodify_address (dstbase, mode, dst, 0);
+	  offset += length - nelt_mode;
+	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
 	  /* We are shifting bytes back, set the alignment accordingly.  */
 	  if ((length & 0x3) == 0)
 	    set_mem_align (mem, BITS_PER_UNIT * 4);
@@ -29322,7 +29332,7 @@ arm_block_set_aligned_vect (rtx dstbase,
   for (; (i + nelt_mode <= length); i += nelt_mode)
     {
       addr = plus_constant (Pmode, dst, i);
-      mem = adjust_automodify_address (dstbase, mode, addr, i);
+      mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
       emit_move_insn (mem, reg);
     }
 
@@ -29331,8 +29341,8 @@ arm_block_set_aligned_vect (rtx dstbase,
   if (i + UNITS_PER_WORD == length)
     {
       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
-      mem = adjust_automodify_address (dstbase, mode,
-				       addr, i - UNITS_PER_WORD);
+      offset += i - UNITS_PER_WORD;
+      mem = adjust_automodify_address (dstbase, mode, addr, offset);
       /* We are shifting 4 bytes back, set the alignment accordingly.  */
       if (align > UNITS_PER_WORD)
 	set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
@@ -29344,7 +29354,8 @@ arm_block_set_aligned_vect (rtx dstbase,
   else if (i < length)
     {
       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
-      mem = adjust_automodify_address (dstbase, mode, dst, 0);
+      offset += length - nelt_mode;
+      mem = adjust_automodify_address (dstbase, mode, dst, offset);
       /* We are shifting bytes back, set the alignment accordingly.  */
       if ((length & 1) == 0)
 	set_mem_align (mem, BITS_PER_UNIT * 2);
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr68143_1.c b/gcc/testsuite/gcc.c-torture/execute/pr68143_1.c
new file mode 100644
index 0000000..cbfbbc2
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr68143_1.c
@@ -0,0 +1,31 @@
+#define NULL 0
+
+struct stuff
+{
+    int a;
+    int b;
+    int c;
+    int d;
+    int e;
+    char *f;
+    int g;
+};
+
+void __attribute__ ((noinline))
+bar (struct stuff *x)
+{
+  if (x->g != 2)
+    __builtin_abort ();
+}
+
+int
+main (int argc, char** argv)
+{
+  struct stuff x = {0, 0, 0, 0, 0, NULL, 0};
+  x.a = 100;
+  x.d = 100;
+  x.g = 2;
+  /* Struct should now look like {100, 0, 0, 100, 0, 0, 0, 2}.  */
+  bar (&x);
+  return 0;
+}

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-11-17 13:22 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-06 10:46 [PATCH][ARM] PR 68143 Properly update memory offsets when expanding setmem Kyrill Tkachov
2015-11-09  9:27 ` Kyrill Tkachov
2015-11-13 11:52 ` Kyrill Tkachov
2015-11-17 12:02 ` Ramana Radhakrishnan
2015-11-17 12:58   ` Kyrill Tkachov
2015-11-17 13:22     ` Kyrill Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).