public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] middle-end: Prefer no RMW in __builtin_clear_padding implementation where possible
@ 2020-11-24  8:15 Jakub Jelinek
  2020-11-24  9:32 ` Richard Biener
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2020-11-24  8:15 UTC (permalink / raw)
  To: Richard Biener, Jason Merrill; +Cc: gcc-patches

Hi!

Currently the __builtin_clear_padding expansion code emits no code for
full words that don't have any padding bits, and most of the time if
the only padding bytes are from the start of the word it attempts to merge
them with previous padding store (via {}) or if the only padding bytes are
from the end of the word, it attempts to merge it with following padding
bytes.  For everything else it was using a RMW, except when it found
an aligned char/short/int covering all the padding bytes and all those
padding bytes were all ones in that store.

The following patch changes it, such that we only use RMW if the padding has
any bytes which have some padding and some non-padding bits (i.e. bitfields
are involved), often it is the same amount of instructions in the end and
avoids being thread-unsafe unless necessary (and avoids having to wait for
the reads to make it into the CPU).  So, if there are no bitfields,
the function will just store some zero bytes, shorts, ints, long longs etc.
where needed.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-11-24  Jakub Jelinek  <jakub@redhat.com>

	* gimple-fold.c (clear_padding_flush): If a word contains only 0
	or 0xff bytes of padding other than all set, all clear, all set
	followed by all clear or all clear followed by all set, don't emit
	a RMW operation on the whole word or parts of it, but instead
	clear the individual bytes of padding.  For paddings of one byte
	size, don't use char[1] and {}, but instead just char and 0.

--- gcc/gimple-fold.c.jj	2020-11-23 11:59:18.790279409 +0100
+++ gcc/gimple-fold.c	2020-11-23 15:48:36.723510615 +0100
@@ -4033,7 +4033,9 @@ clear_padding_flush (clear_padding_struc
     {
       size_t nonzero_first = wordsize;
       size_t nonzero_last = 0;
-      bool all_ones = true;
+      size_t zero_first = wordsize;
+      size_t zero_last = 0;
+      bool all_ones = true, bytes_only = true;
       if ((unsigned HOST_WIDE_INT) (buf->off + i + wordsize)
 	  > (unsigned HOST_WIDE_INT) buf->sz)
 	{
@@ -4055,9 +4057,19 @@ clear_padding_flush (clear_padding_struc
 		all_ones = false;
 	      nonzero_last = j + 1 - i;
 	    }
+	  else
+	    {
+	      if (zero_first == wordsize)
+		zero_first = j - i;
+	      zero_last = j + 1 - i;
+	    }
 	  if (buf->buf[j] != 0 && buf->buf[j] != (unsigned char) ~0)
-	    all_ones = false;
+	    {
+	      all_ones = false;
+	      bytes_only = false;
+	    }
 	}
+      size_t padding_end = i;
       if (padding_bytes)
 	{
 	  if (nonzero_first == 0
@@ -4069,7 +4081,6 @@ clear_padding_flush (clear_padding_struc
 	      padding_bytes += wordsize;
 	      continue;
 	    }
-	  size_t padding_end = i;
 	  if (all_ones && nonzero_first == 0)
 	    {
 	      padding_bytes += nonzero_last;
@@ -4077,12 +4088,27 @@ clear_padding_flush (clear_padding_struc
 	      nonzero_first = wordsize;
 	      nonzero_last = 0;
 	    }
-	  tree atype = build_array_type_nelts (char_type_node, padding_bytes);
+	  else if (bytes_only && nonzero_first == 0)
+	    {
+	      gcc_assert (zero_first && zero_first != wordsize);
+	      padding_bytes += zero_first;
+	      padding_end += zero_first;
+	    }
+	  tree atype, src;
+	  if (padding_bytes == 1)
+	    {
+	      atype = char_type_node;
+	      src = build_zero_cst (char_type_node);
+	    }
+	  else
+	    {
+	      atype = build_array_type_nelts (char_type_node, padding_bytes);
+	      src = build_constructor (atype, NULL);
+	    }
 	  tree dst = build2_loc (buf->loc, MEM_REF, atype, buf->base,
 				 build_int_cst (buf->alias_type,
 						buf->off + padding_end
 						- padding_bytes));
-	  tree src = build_constructor (atype, NULL);
 	  gimple *g = gimple_build_assign (dst, src);
 	  gimple_set_location (g, buf->loc);
 	  gsi_insert_before (buf->gsi, g, GSI_SAME_STMT);
@@ -4099,6 +4125,45 @@ clear_padding_flush (clear_padding_struc
 	  padding_bytes = nonzero_last - nonzero_first;
 	  continue;
 	}
+      if (bytes_only)
+	{
+	  /* If bitfields aren't involved in this word, prefer storing
+	     individual bytes or groups of them over performing a RMW
+	     operation on the whole word.  */
+	  gcc_assert (i + zero_last <= end);
+	  for (size_t j = padding_end; j < i + zero_last; j++)
+	    {
+	      if (buf->buf[j])
+		{
+		  size_t k;
+		  for (k = j; k < i + zero_last; k++)
+		    if (buf->buf[k] == 0)
+		      break;
+		  HOST_WIDE_INT off = buf->off + j;
+		  tree atype, src;
+		  if (k - j == 1)
+		    {
+		      atype = char_type_node;
+		      src = build_zero_cst (char_type_node);
+		    }
+		  else
+		    {
+		      atype = build_array_type_nelts (char_type_node, k - j);
+		      src = build_constructor (atype, NULL);
+		    }
+		  tree dst = build2_loc (buf->loc, MEM_REF, atype,
+					 buf->base,
+					 build_int_cst (buf->alias_type, off));
+		  gimple *g = gimple_build_assign (dst, src);
+		  gimple_set_location (g, buf->loc);
+		  gsi_insert_before (buf->gsi, g, GSI_SAME_STMT);
+		  j = k;
+		}
+	    }
+	  if (nonzero_last == wordsize)
+	    padding_bytes = nonzero_last - zero_last;
+	  continue;
+	}
       for (size_t eltsz = 1; eltsz <= wordsize; eltsz <<= 1)
 	{
 	  if (nonzero_last - nonzero_first <= eltsz
@@ -4153,12 +4218,21 @@ clear_padding_flush (clear_padding_struc
     {
       if (padding_bytes)
 	{
-	  tree atype = build_array_type_nelts (char_type_node, padding_bytes);
+	  tree atype, src;
+	  if (padding_bytes == 1)
+	    {
+	      atype = char_type_node;
+	      src = build_zero_cst (char_type_node);
+	    }
+	  else
+	    {
+	      atype = build_array_type_nelts (char_type_node, padding_bytes);
+	      src = build_constructor (atype, NULL);
+	    }
 	  tree dst = build2_loc (buf->loc, MEM_REF, atype, buf->base,
 				 build_int_cst (buf->alias_type,
 						buf->off + end
 						- padding_bytes));
-	  tree src = build_constructor (atype, NULL);
 	  gimple *g = gimple_build_assign (dst, src);
 	  gimple_set_location (g, buf->loc);
 	  gsi_insert_before (buf->gsi, g, GSI_SAME_STMT);

	Jakub


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] middle-end: Prefer no RMW in __builtin_clear_padding implementation where possible
  2020-11-24  8:15 [PATCH] middle-end: Prefer no RMW in __builtin_clear_padding implementation where possible Jakub Jelinek
@ 2020-11-24  9:32 ` Richard Biener
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2020-11-24  9:32 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Jason Merrill, gcc-patches

On Tue, 24 Nov 2020, Jakub Jelinek wrote:

> Hi!
> 
> Currently the __builtin_clear_padding expansion code emits no code for
> full words that don't have any padding bits, and most of the time if
> the only padding bytes are from the start of the word it attempts to merge
> them with previous padding store (via {}) or if the only padding bytes are
> from the end of the word, it attempts to merge it with following padding
> bytes.  For everything else it was using a RMW, except when it found
> an aligned char/short/int covering all the padding bytes and all those
> padding bytes were all ones in that store.
> 
> The following patch changes it, such that we only use RMW if the padding has
> any bytes which have some padding and some non-padding bits (i.e. bitfields
> are involved), often it is the same amount of instructions in the end and
> avoids being thread-unsafe unless necessary (and avoids having to wait for
> the reads to make it into the CPU).  So, if there are no bitfields,
> the function will just store some zero bytes, shorts, ints, long longs etc.
> where needed.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2020-11-24  Jakub Jelinek  <jakub@redhat.com>
> 
> 	* gimple-fold.c (clear_padding_flush): If a word contains only 0
> 	or 0xff bytes of padding other than all set, all clear, all set
> 	followed by all clear or all clear followed by all set, don't emit
> 	a RMW operation on the whole word or parts of it, but instead
> 	clear the individual bytes of padding.  For paddings of one byte
> 	size, don't use char[1] and {}, but instead just char and 0.
> 
> --- gcc/gimple-fold.c.jj	2020-11-23 11:59:18.790279409 +0100
> +++ gcc/gimple-fold.c	2020-11-23 15:48:36.723510615 +0100
> @@ -4033,7 +4033,9 @@ clear_padding_flush (clear_padding_struc
>      {
>        size_t nonzero_first = wordsize;
>        size_t nonzero_last = 0;
> -      bool all_ones = true;
> +      size_t zero_first = wordsize;
> +      size_t zero_last = 0;
> +      bool all_ones = true, bytes_only = true;
>        if ((unsigned HOST_WIDE_INT) (buf->off + i + wordsize)
>  	  > (unsigned HOST_WIDE_INT) buf->sz)
>  	{
> @@ -4055,9 +4057,19 @@ clear_padding_flush (clear_padding_struc
>  		all_ones = false;
>  	      nonzero_last = j + 1 - i;
>  	    }
> +	  else
> +	    {
> +	      if (zero_first == wordsize)
> +		zero_first = j - i;
> +	      zero_last = j + 1 - i;
> +	    }
>  	  if (buf->buf[j] != 0 && buf->buf[j] != (unsigned char) ~0)
> -	    all_ones = false;
> +	    {
> +	      all_ones = false;
> +	      bytes_only = false;
> +	    }
>  	}
> +      size_t padding_end = i;
>        if (padding_bytes)
>  	{
>  	  if (nonzero_first == 0
> @@ -4069,7 +4081,6 @@ clear_padding_flush (clear_padding_struc
>  	      padding_bytes += wordsize;
>  	      continue;
>  	    }
> -	  size_t padding_end = i;
>  	  if (all_ones && nonzero_first == 0)
>  	    {
>  	      padding_bytes += nonzero_last;
> @@ -4077,12 +4088,27 @@ clear_padding_flush (clear_padding_struc
>  	      nonzero_first = wordsize;
>  	      nonzero_last = 0;
>  	    }
> -	  tree atype = build_array_type_nelts (char_type_node, padding_bytes);
> +	  else if (bytes_only && nonzero_first == 0)
> +	    {
> +	      gcc_assert (zero_first && zero_first != wordsize);
> +	      padding_bytes += zero_first;
> +	      padding_end += zero_first;
> +	    }
> +	  tree atype, src;
> +	  if (padding_bytes == 1)
> +	    {
> +	      atype = char_type_node;
> +	      src = build_zero_cst (char_type_node);
> +	    }
> +	  else
> +	    {
> +	      atype = build_array_type_nelts (char_type_node, padding_bytes);
> +	      src = build_constructor (atype, NULL);
> +	    }
>  	  tree dst = build2_loc (buf->loc, MEM_REF, atype, buf->base,
>  				 build_int_cst (buf->alias_type,
>  						buf->off + padding_end
>  						- padding_bytes));
> -	  tree src = build_constructor (atype, NULL);
>  	  gimple *g = gimple_build_assign (dst, src);
>  	  gimple_set_location (g, buf->loc);
>  	  gsi_insert_before (buf->gsi, g, GSI_SAME_STMT);
> @@ -4099,6 +4125,45 @@ clear_padding_flush (clear_padding_struc
>  	  padding_bytes = nonzero_last - nonzero_first;
>  	  continue;
>  	}
> +      if (bytes_only)
> +	{
> +	  /* If bitfields aren't involved in this word, prefer storing
> +	     individual bytes or groups of them over performing a RMW
> +	     operation on the whole word.  */
> +	  gcc_assert (i + zero_last <= end);
> +	  for (size_t j = padding_end; j < i + zero_last; j++)
> +	    {
> +	      if (buf->buf[j])
> +		{
> +		  size_t k;
> +		  for (k = j; k < i + zero_last; k++)
> +		    if (buf->buf[k] == 0)
> +		      break;
> +		  HOST_WIDE_INT off = buf->off + j;
> +		  tree atype, src;
> +		  if (k - j == 1)
> +		    {
> +		      atype = char_type_node;
> +		      src = build_zero_cst (char_type_node);
> +		    }
> +		  else
> +		    {
> +		      atype = build_array_type_nelts (char_type_node, k - j);
> +		      src = build_constructor (atype, NULL);
> +		    }
> +		  tree dst = build2_loc (buf->loc, MEM_REF, atype,
> +					 buf->base,
> +					 build_int_cst (buf->alias_type, off));
> +		  gimple *g = gimple_build_assign (dst, src);
> +		  gimple_set_location (g, buf->loc);
> +		  gsi_insert_before (buf->gsi, g, GSI_SAME_STMT);
> +		  j = k;
> +		}
> +	    }
> +	  if (nonzero_last == wordsize)
> +	    padding_bytes = nonzero_last - zero_last;
> +	  continue;
> +	}
>        for (size_t eltsz = 1; eltsz <= wordsize; eltsz <<= 1)
>  	{
>  	  if (nonzero_last - nonzero_first <= eltsz
> @@ -4153,12 +4218,21 @@ clear_padding_flush (clear_padding_struc
>      {
>        if (padding_bytes)
>  	{
> -	  tree atype = build_array_type_nelts (char_type_node, padding_bytes);
> +	  tree atype, src;
> +	  if (padding_bytes == 1)
> +	    {
> +	      atype = char_type_node;
> +	      src = build_zero_cst (char_type_node);
> +	    }
> +	  else
> +	    {
> +	      atype = build_array_type_nelts (char_type_node, padding_bytes);
> +	      src = build_constructor (atype, NULL);
> +	    }
>  	  tree dst = build2_loc (buf->loc, MEM_REF, atype, buf->base,
>  				 build_int_cst (buf->alias_type,
>  						buf->off + end
>  						- padding_bytes));
> -	  tree src = build_constructor (atype, NULL);
>  	  gimple *g = gimple_build_assign (dst, src);
>  	  gimple_set_location (g, buf->loc);
>  	  gsi_insert_before (buf->gsi, g, GSI_SAME_STMT);
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imend

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2020-11-24  9:32 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-24  8:15 [PATCH] middle-end: Prefer no RMW in __builtin_clear_padding implementation where possible Jakub Jelinek
2020-11-24  9:32 ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).