public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix gimple store merging (PR tree-optimization/78436)
@ 2016-11-21 19:15 Jakub Jelinek
  2016-11-22  7:58 ` Richard Biener
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2016-11-21 19:15 UTC (permalink / raw)
  To: Kyrill Tkachov, Richard Biener; +Cc: gcc-patches

Hi!

The
   if (!BYTES_BIG_ENDIAN)
-    shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
+    {
+      shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
+      if (shift_amnt == 0)
+       byte_size--;
+    }
hunk below is the actual fix for the PR, where we originally store:
8-bit 0 at offset 24-bits followed by 24-bit negative value at offset 0,
little endian.  encode_tree_to_bitpos actually allocates 1 extra byte in the
buffer and byte_size is also 1 byte longer, for the case where the
bits need to be shifted (it only cares about shifts within bytes, so 0 to
BITS_PER_UNIT - 1).  If no shifting is done and there is no padding, we are
also fine, because native_encode_expr will only actually write the size of
TYPE_MODE bytes.  But in this case padding is 1 byte, so native_encode_expr
writes 4 bytes (the last one is 0xff), byte_size is initially 5, as padding
is 1, it is decremented to 4.  But we actually want to store just 3 bytes,
not 4; when we store 4, we overwrite the earlier value of the following
byte.

The rest of the patch are just cleanups.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-11-21  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/78436
	* gimple-ssa-store-merging.c (zero_char_buf): Removed.
	(shift_bytes_in_array, shift_bytes_in_array_right,
	merged_store_group::apply_stores): Formatting fixes.
	(clear_bit_region): Likewise.  Use memset.
	(encode_tree_to_bitpos): Formatting fixes.  Fix comment typos - EPXR
	instead of EXPR and inerted instead of inserted.  Use memset instead
	of zero_char_buf.  For !BYTES_BIG_ENDIAN decrease byte_size by 1
	if shift_amnt is 0.

	* gcc.c-torture/execute/pr78436.c: New test.

--- gcc/gimple-ssa-store-merging.c.jj	2016-11-09 15:22:36.000000000 +0100
+++ gcc/gimple-ssa-store-merging.c	2016-11-21 10:54:51.746090238 +0100
@@ -199,17 +199,6 @@ dump_char_array (FILE *fd, unsigned char
   fprintf (fd, "\n");
 }
 
-/* Fill a byte array PTR of SZ elements with zeroes.  This is to be used by
-   encode_tree_to_bitpos to zero-initialize most likely small arrays but
-   with a non-compile-time-constant size.  */
-
-static inline void
-zero_char_buf (unsigned char *ptr, unsigned int sz)
-{
-  for (unsigned int i = 0; i < sz; i++)
-    ptr[i] = 0;
-}
-
 /* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
    bits between adjacent elements.  AMNT should be within
    [0, BITS_PER_UNIT).
@@ -224,14 +213,13 @@ shift_bytes_in_array (unsigned char *ptr
     return;
 
   unsigned char carry_over = 0U;
-  unsigned char carry_mask = (~0U) << ((unsigned char)(BITS_PER_UNIT - amnt));
+  unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
   unsigned char clear_mask = (~0U) << amnt;
 
   for (unsigned int i = 0; i < sz; i++)
     {
       unsigned prev_carry_over = carry_over;
-      carry_over
-	= (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
+      carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
 
       ptr[i] <<= amnt;
       if (i != 0)
@@ -263,10 +251,9 @@ shift_bytes_in_array_right (unsigned cha
   for (unsigned int i = 0; i < sz; i++)
     {
       unsigned prev_carry_over = carry_over;
-      carry_over
-	= (ptr[i] & carry_mask);
+      carry_over = ptr[i] & carry_mask;
 
-     carry_over <<= ((unsigned char)BITS_PER_UNIT - amnt);
+     carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
      ptr[i] >>= amnt;
      ptr[i] |= prev_carry_over;
     }
@@ -327,7 +314,7 @@ clear_bit_region (unsigned char *ptr, un
   /* Second base case.  */
   else if ((start + len) <= BITS_PER_UNIT)
     {
-      unsigned char mask = (~0U) << ((unsigned char)(BITS_PER_UNIT - len));
+      unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
       mask >>= BITS_PER_UNIT - (start + len);
 
       ptr[0] &= ~mask;
@@ -346,8 +333,7 @@ clear_bit_region (unsigned char *ptr, un
       unsigned int nbytes = len / BITS_PER_UNIT;
       /* We could recurse on each byte but do the loop here to avoid
 	 recursing too deep.  */
-      for (unsigned int i = 0; i < nbytes; i++)
-	ptr[i] = 0U;
+      memset (ptr, '\0', nbytes);
       /* Clear the remaining sub-byte region if there is one.  */
       if (len % BITS_PER_UNIT != 0)
 	clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
@@ -362,7 +348,7 @@ clear_bit_region (unsigned char *ptr, un
 
 static bool
 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
-			unsigned int total_bytes)
+		       unsigned int total_bytes)
 {
   unsigned int first_byte = bitpos / BITS_PER_UNIT;
   tree tmp_int = expr;
@@ -370,8 +356,8 @@ encode_tree_to_bitpos (tree expr, unsign
 			|| mode_for_size (bitlen, MODE_INT, 0) == BLKmode;
 
   if (!sub_byte_op_p)
-    return native_encode_expr (tmp_int, ptr + first_byte, total_bytes, 0)
-	   != 0;
+    return (native_encode_expr (tmp_int, ptr + first_byte, total_bytes, 0)
+	    != 0);
 
   /* LITTLE-ENDIAN
      We are writing a non byte-sized quantity or at a position that is not
@@ -381,7 +367,7 @@ encode_tree_to_bitpos (tree expr, unsign
            xxx xxxxxxxx xxx< bp>
            |______EXPR____|
 
-     First native_encode_expr EPXR into a temporary buffer and shift each
+     First native_encode_expr EXPR into a temporary buffer and shift each
      byte in the buffer by 'bp' (carrying the bits over as necessary).
      |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
                                               <------bitlen---->< bp>
@@ -400,7 +386,7 @@ encode_tree_to_bitpos (tree expr, unsign
                        <bp >xxx xxxxxxxx xxx
                             |_____EXPR_____|
 
-     First native_encode_expr EPXR into a temporary buffer and shift each
+     First native_encode_expr EXPR into a temporary buffer and shift each
      byte in the buffer to the right by (carrying the bits over as necessary).
      We shift by as much as needed to align the most significant bit of EXPR
      with bitpos:
@@ -418,7 +404,7 @@ encode_tree_to_bitpos (tree expr, unsign
   /* Allocate an extra byte so that we have space to shift into.  */
   unsigned int byte_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (expr))) + 1;
   unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
-  zero_char_buf (tmpbuf, byte_size);
+  memset (tmpbuf, '\0', byte_size);
   /* The store detection code should only have allowed constants that are
      accepted by native_encode_expr.  */
   if (native_encode_expr (expr, tmpbuf, byte_size, 0) == 0)
@@ -453,7 +439,7 @@ encode_tree_to_bitpos (tree expr, unsign
     }
 
   /* Clear the bit region in PTR where the bits from TMPBUF will be
-     inerted into.  */
+     inserted into.  */
   if (BYTES_BIG_ENDIAN)
     clear_bit_region_be (ptr + first_byte,
 			 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
@@ -493,7 +479,11 @@ encode_tree_to_bitpos (tree expr, unsign
 
   /* Create the shifted version of EXPR.  */
   if (!BYTES_BIG_ENDIAN)
-    shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
+    {
+      shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
+      if (shift_amnt == 0)
+	byte_size--;
+    }
   else
     {
       gcc_assert (BYTES_BIG_ENDIAN);
@@ -648,8 +638,7 @@ merged_store_group::apply_stores ()
   /* Create a buffer of a size that is 2 times the number of bytes we're
      storing.  That way native_encode_expr can write power-of-2-sized
      chunks without overrunning.  */
-  buf_size
-    = 2 * (ROUND_UP (width, BITS_PER_UNIT) / BITS_PER_UNIT);
+  buf_size = 2 * (ROUND_UP (width, BITS_PER_UNIT) / BITS_PER_UNIT);
   val = XCNEWVEC (unsigned char, buf_size);
 
   FOR_EACH_VEC_ELT (stores, i, info)
--- gcc/testsuite/gcc.c-torture/execute/pr78436.c.jj	2016-11-21 10:58:28.209378756 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr78436.c	2016-11-21 10:57:45.000000000 +0100
@@ -0,0 +1,23 @@
+/* PR tree-optimization/78436 */
+
+struct S
+{
+  long int a : 24;
+  signed char b : 8;
+} s;
+
+__attribute__((noinline, noclone)) void
+foo ()
+{
+  s.b = 0;
+  s.a = -1193165L;
+}
+
+int
+main ()
+{
+  foo ();
+  if (s.b != 0)
+    __builtin_abort ();
+  return 0;
+}

	Jakub

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Fix gimple store merging (PR tree-optimization/78436)
  2016-11-21 19:15 [PATCH] Fix gimple store merging (PR tree-optimization/78436) Jakub Jelinek
@ 2016-11-22  7:58 ` Richard Biener
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2016-11-22  7:58 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Kyrill Tkachov, gcc-patches

On Mon, 21 Nov 2016, Jakub Jelinek wrote:

> Hi!
> 
> The
>    if (!BYTES_BIG_ENDIAN)
> -    shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
> +    {
> +      shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
> +      if (shift_amnt == 0)
> +       byte_size--;
> +    }
> hunk below is the actual fix for the PR, where we originally store:
> 8-bit 0 at offset 24-bits followed by 24-bit negative value at offset 0,
> little endian.  encode_tree_to_bitpos actually allocates 1 extra byte in the
> buffer and byte_size is also 1 byte longer, for the case where the
> bits need to be shifted (it only cares about shifts within bytes, so 0 to
> BITS_PER_UNIT - 1).  If no shifting is done and there is no padding, we are
> also fine, because native_encode_expr will only actually write the size of
> TYPE_MODE bytes.  But in this case padding is 1 byte, so native_encode_expr
> writes 4 bytes (the last one is 0xff), byte_size is initially 5, as padding
> is 1, it is decremented to 4.  But we actually want to store just 3 bytes,
> not 4; when we store 4, we overwrite the earlier value of the following
> byte.
> 
> The rest of the patch are just cleanups.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Richard.

> 2016-11-21  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/78436
> 	* gimple-ssa-store-merging.c (zero_char_buf): Removed.
> 	(shift_bytes_in_array, shift_bytes_in_array_right,
> 	merged_store_group::apply_stores): Formatting fixes.
> 	(clear_bit_region): Likewise.  Use memset.
> 	(encode_tree_to_bitpos): Formatting fixes.  Fix comment typos - EPXR
> 	instead of EXPR and inerted instead of inserted.  Use memset instead
> 	of zero_char_buf.  For !BYTES_BIG_ENDIAN decrease byte_size by 1
> 	if shift_amnt is 0.
> 
> 	* gcc.c-torture/execute/pr78436.c: New test.
> 
> --- gcc/gimple-ssa-store-merging.c.jj	2016-11-09 15:22:36.000000000 +0100
> +++ gcc/gimple-ssa-store-merging.c	2016-11-21 10:54:51.746090238 +0100
> @@ -199,17 +199,6 @@ dump_char_array (FILE *fd, unsigned char
>    fprintf (fd, "\n");
>  }
>  
> -/* Fill a byte array PTR of SZ elements with zeroes.  This is to be used by
> -   encode_tree_to_bitpos to zero-initialize most likely small arrays but
> -   with a non-compile-time-constant size.  */
> -
> -static inline void
> -zero_char_buf (unsigned char *ptr, unsigned int sz)
> -{
> -  for (unsigned int i = 0; i < sz; i++)
> -    ptr[i] = 0;
> -}
> -
>  /* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
>     bits between adjacent elements.  AMNT should be within
>     [0, BITS_PER_UNIT).
> @@ -224,14 +213,13 @@ shift_bytes_in_array (unsigned char *ptr
>      return;
>  
>    unsigned char carry_over = 0U;
> -  unsigned char carry_mask = (~0U) << ((unsigned char)(BITS_PER_UNIT - amnt));
> +  unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
>    unsigned char clear_mask = (~0U) << amnt;
>  
>    for (unsigned int i = 0; i < sz; i++)
>      {
>        unsigned prev_carry_over = carry_over;
> -      carry_over
> -	= (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
> +      carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
>  
>        ptr[i] <<= amnt;
>        if (i != 0)
> @@ -263,10 +251,9 @@ shift_bytes_in_array_right (unsigned cha
>    for (unsigned int i = 0; i < sz; i++)
>      {
>        unsigned prev_carry_over = carry_over;
> -      carry_over
> -	= (ptr[i] & carry_mask);
> +      carry_over = ptr[i] & carry_mask;
>  
> -     carry_over <<= ((unsigned char)BITS_PER_UNIT - amnt);
> +     carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
>       ptr[i] >>= amnt;
>       ptr[i] |= prev_carry_over;
>      }
> @@ -327,7 +314,7 @@ clear_bit_region (unsigned char *ptr, un
>    /* Second base case.  */
>    else if ((start + len) <= BITS_PER_UNIT)
>      {
> -      unsigned char mask = (~0U) << ((unsigned char)(BITS_PER_UNIT - len));
> +      unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
>        mask >>= BITS_PER_UNIT - (start + len);
>  
>        ptr[0] &= ~mask;
> @@ -346,8 +333,7 @@ clear_bit_region (unsigned char *ptr, un
>        unsigned int nbytes = len / BITS_PER_UNIT;
>        /* We could recurse on each byte but do the loop here to avoid
>  	 recursing too deep.  */
> -      for (unsigned int i = 0; i < nbytes; i++)
> -	ptr[i] = 0U;
> +      memset (ptr, '\0', nbytes);
>        /* Clear the remaining sub-byte region if there is one.  */
>        if (len % BITS_PER_UNIT != 0)
>  	clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
> @@ -362,7 +348,7 @@ clear_bit_region (unsigned char *ptr, un
>  
>  static bool
>  encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
> -			unsigned int total_bytes)
> +		       unsigned int total_bytes)
>  {
>    unsigned int first_byte = bitpos / BITS_PER_UNIT;
>    tree tmp_int = expr;
> @@ -370,8 +356,8 @@ encode_tree_to_bitpos (tree expr, unsign
>  			|| mode_for_size (bitlen, MODE_INT, 0) == BLKmode;
>  
>    if (!sub_byte_op_p)
> -    return native_encode_expr (tmp_int, ptr + first_byte, total_bytes, 0)
> -	   != 0;
> +    return (native_encode_expr (tmp_int, ptr + first_byte, total_bytes, 0)
> +	    != 0);
>  
>    /* LITTLE-ENDIAN
>       We are writing a non byte-sized quantity or at a position that is not
> @@ -381,7 +367,7 @@ encode_tree_to_bitpos (tree expr, unsign
>             xxx xxxxxxxx xxx< bp>
>             |______EXPR____|
>  
> -     First native_encode_expr EPXR into a temporary buffer and shift each
> +     First native_encode_expr EXPR into a temporary buffer and shift each
>       byte in the buffer by 'bp' (carrying the bits over as necessary).
>       |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
>                                                <------bitlen---->< bp>
> @@ -400,7 +386,7 @@ encode_tree_to_bitpos (tree expr, unsign
>                         <bp >xxx xxxxxxxx xxx
>                              |_____EXPR_____|
>  
> -     First native_encode_expr EPXR into a temporary buffer and shift each
> +     First native_encode_expr EXPR into a temporary buffer and shift each
>       byte in the buffer to the right by (carrying the bits over as necessary).
>       We shift by as much as needed to align the most significant bit of EXPR
>       with bitpos:
> @@ -418,7 +404,7 @@ encode_tree_to_bitpos (tree expr, unsign
>    /* Allocate an extra byte so that we have space to shift into.  */
>    unsigned int byte_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (expr))) + 1;
>    unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
> -  zero_char_buf (tmpbuf, byte_size);
> +  memset (tmpbuf, '\0', byte_size);
>    /* The store detection code should only have allowed constants that are
>       accepted by native_encode_expr.  */
>    if (native_encode_expr (expr, tmpbuf, byte_size, 0) == 0)
> @@ -453,7 +439,7 @@ encode_tree_to_bitpos (tree expr, unsign
>      }
>  
>    /* Clear the bit region in PTR where the bits from TMPBUF will be
> -     inerted into.  */
> +     inserted into.  */
>    if (BYTES_BIG_ENDIAN)
>      clear_bit_region_be (ptr + first_byte,
>  			 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
> @@ -493,7 +479,11 @@ encode_tree_to_bitpos (tree expr, unsign
>  
>    /* Create the shifted version of EXPR.  */
>    if (!BYTES_BIG_ENDIAN)
> -    shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
> +    {
> +      shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
> +      if (shift_amnt == 0)
> +	byte_size--;
> +    }
>    else
>      {
>        gcc_assert (BYTES_BIG_ENDIAN);
> @@ -648,8 +638,7 @@ merged_store_group::apply_stores ()
>    /* Create a buffer of a size that is 2 times the number of bytes we're
>       storing.  That way native_encode_expr can write power-of-2-sized
>       chunks without overrunning.  */
> -  buf_size
> -    = 2 * (ROUND_UP (width, BITS_PER_UNIT) / BITS_PER_UNIT);
> +  buf_size = 2 * (ROUND_UP (width, BITS_PER_UNIT) / BITS_PER_UNIT);
>    val = XCNEWVEC (unsigned char, buf_size);
>  
>    FOR_EACH_VEC_ELT (stores, i, info)
> --- gcc/testsuite/gcc.c-torture/execute/pr78436.c.jj	2016-11-21 10:58:28.209378756 +0100
> +++ gcc/testsuite/gcc.c-torture/execute/pr78436.c	2016-11-21 10:57:45.000000000 +0100
> @@ -0,0 +1,23 @@
> +/* PR tree-optimization/78436 */
> +
> +struct S
> +{
> +  long int a : 24;
> +  signed char b : 8;
> +} s;
> +
> +__attribute__((noinline, noclone)) void
> +foo ()
> +{
> +  s.b = 0;
> +  s.a = -1193165L;
> +}
> +
> +int
> +main ()
> +{
> +  foo ();
> +  if (s.b != 0)
> +    __builtin_abort ();
> +  return 0;
> +}
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2016-11-22  7:58 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-21 19:15 [PATCH] Fix gimple store merging (PR tree-optimization/78436) Jakub Jelinek
2016-11-22  7:58 ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).