Ping: [PATCH] enable ATOMIC_COMPARE_EXCHANGE opt for floating type or types contains padding

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: xndcn <xndchn@gmail.com>
To: gcc-patches@gcc.gnu.org, jakub@redhat.com
Subject: Ping: [PATCH] enable ATOMIC_COMPARE_EXCHANGE opt for floating type or types contains padding
Date: Wed, 3 Jan 2024 23:42:58 +0800	[thread overview]
Message-ID: <CAJ=gGT2zjNN6Pf88rkKw5c0P0k4McCeYpkKO9-VCE1bvB4tmAg@mail.gmail.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 6233 bytes --]

Hi, I am new to this, and I really need your advice, thanks.

I noticed PR71716 and I want to enable ATOMIC_COMPARE_EXCHANGE
internal-fn optimization

for floating type or types contains padding (e.g., long double).
Please correct me if I happen to
make any mistakes, Thanks!

Firstly, about the concerns of sNaNs float/doduble value, it seems
work well and shall have been
covered by testsuite/gcc.dg/atomic/c11-atomic-exec-5.c

Secondly, since ATOMIC_COMPARE_EXCHANGE is only enabled when expected
var is only addressable
because of the call, the padding bits can not be modified by any other
stmts. So we can save all
bits after ATOMIC_COMPARE_EXCHANGE call and extract the padding bits.
After first iteration, the
extracted padding bits can be mixed with the expected var.

Bootstrapped/regtested on x86_64-linux.

I did some benchmarks, and there is some significant time optimization
for float/double types,

while there is no regression for long double type.

Thanks,

xndcn


gcc/ChangeLog:

	* gimple-fold.cc (optimize_atomic_compare_exchange_p): enable
	for SCALAR_FLOAT_TYPE_P type of expected var, or if TYPE_PRECISION
	is different from mode's bitsize
	(fold_builtin_atomic_compare_exchange): if TYPE_PRECISION is
	different from mode's bitsize, try to keep track all the bits and
	mix it with VIEW_CONVERT_EXPR<itype>(expected).

Signed-off-by: xndcn <xndchn@gmail.com>
---
 gcc/gimple-fold.cc | 77 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 71 insertions(+), 6 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index cb4b57250..321ff4f41 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -5306,12 +5306,7 @@ optimize_atomic_compare_exchange_p (gimple *stmt)
       || !auto_var_in_fn_p (TREE_OPERAND (expected, 0), current_function_decl)
       || TREE_THIS_VOLATILE (etype)
       || VECTOR_TYPE_P (etype)
-      || TREE_CODE (etype) == COMPLEX_TYPE
-      /* Don't optimize floating point expected vars, VIEW_CONVERT_EXPRs
-	 might not preserve all the bits.  See PR71716.  */
-      || SCALAR_FLOAT_TYPE_P (etype)
-      || maybe_ne (TYPE_PRECISION (etype),
-		   GET_MODE_BITSIZE (TYPE_MODE (etype))))
+      || TREE_CODE (etype) == COMPLEX_TYPE)
     return false;

   tree weak = gimple_call_arg (stmt, 3);
@@ -5350,8 +5345,10 @@ fold_builtin_atomic_compare_exchange
(gimple_stmt_iterator *gsi)
   tree itype = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (parmt)));
   tree ctype = build_complex_type (itype);
   tree expected = TREE_OPERAND (gimple_call_arg (stmt, 1), 0);
+  tree etype = TREE_TYPE (expected);
   bool throws = false;
   edge e = NULL;
+  tree allbits = NULL_TREE;
   gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)),
 				   expected);
   gsi_insert_before (gsi, g, GSI_SAME_STMT);
@@ -5362,6 +5359,67 @@ fold_builtin_atomic_compare_exchange
(gimple_stmt_iterator *gsi)
 			       build1 (VIEW_CONVERT_EXPR, itype,
 				       gimple_assign_lhs (g)));
       gsi_insert_before (gsi, g, GSI_SAME_STMT);
+
+      // VIEW_CONVERT_EXPRs might not preserve all the bits.  See PR71716.
+      // so we have to keep track all bits here.
+      if (maybe_ne (TYPE_PRECISION (etype),
+		    GET_MODE_BITSIZE (TYPE_MODE (etype))))
+	{
+	  gimple_stmt_iterator cgsi
+	    = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+	  allbits = create_tmp_var (itype);
+	  // allbits is initialized to 0, which can be ignored first time
+	  gimple *init_stmt
+	    = gimple_build_assign (allbits, build_int_cst (itype, 0));
+	  gsi_insert_before (&cgsi, init_stmt, GSI_SAME_STMT);
+	  tree maskbits = create_tmp_var (itype);
+	  // maskbits is initialized to full 1 (0xFFF...)
+	  init_stmt = gimple_build_assign (maskbits, build1 (BIT_NOT_EXPR,
+							     itype, allbits));
+	  gsi_insert_before (&cgsi, init_stmt, GSI_SAME_STMT);
+
+	  // g = g & maskbits
+	  g = gimple_build_assign (make_ssa_name (itype),
+				   build2 (BIT_AND_EXPR, itype,
+					   gimple_assign_lhs (g), maskbits));
+	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
+
+	  gimple *def_mask = gimple_build_assign (
+	    make_ssa_name (itype),
+	    build2 (LSHIFT_EXPR, itype, build_int_cst (itype, 1),
+		    build_int_cst (itype, TYPE_PRECISION (etype))));
+	  gsi_insert_before (gsi, def_mask, GSI_SAME_STMT);
+	  def_mask = gimple_build_assign (make_ssa_name (itype),
+					  build2 (MINUS_EXPR, itype,
+						  gimple_assign_lhs (def_mask),
+						  build_int_cst (itype, 1)));
+	  gsi_insert_before (gsi, def_mask, GSI_SAME_STMT);
+	  // maskbits = (1 << TYPE_PRECISION (etype)) - 1
+	  def_mask = gimple_build_assign (maskbits, SSA_NAME,
+					  gimple_assign_lhs (def_mask));
+	  gsi_insert_before (gsi, def_mask, GSI_SAME_STMT);
+
+	  // paddingbits = (~maskbits) & allbits
+	  def_mask
+	    = gimple_build_assign (make_ssa_name (itype),
+				   build1 (BIT_NOT_EXPR, itype,
+					   gimple_assign_lhs (def_mask)));
+	  gsi_insert_before (gsi, def_mask, GSI_SAME_STMT);
+	  def_mask
+	    = gimple_build_assign (make_ssa_name (itype),
+				   build2 (BIT_AND_EXPR, itype, allbits,
+					   gimple_assign_lhs (def_mask)));
+	  gsi_insert_before (gsi, def_mask, GSI_SAME_STMT);
+
+	  // g = g | paddingbits, i.e.,
+	  // g = (VIEW_CONVERT_EXPR<itype>(expected) & maskbits)
+	  //       | (allbits &(~maskbits))
+	  g = gimple_build_assign (make_ssa_name (itype),
+				   build2 (BIT_IOR_EXPR, itype,
+					   gimple_assign_lhs (g),
+					   gimple_assign_lhs (def_mask)));
+	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
+	}
     }
   int flag = (integer_onep (gimple_call_arg (stmt, 3)) ? 256 : 0)
 	     + int_size_in_bytes (itype);
@@ -5410,6 +5468,13 @@ fold_builtin_atomic_compare_exchange
(gimple_stmt_iterator *gsi)
     gsi_insert_after (gsi, g, GSI_NEW_STMT);
   if (!useless_type_conversion_p (TREE_TYPE (expected), itype))
     {
+      // save all bits here
+      if (maybe_ne (TYPE_PRECISION (etype),
+		    GET_MODE_BITSIZE (TYPE_MODE (etype))))
+	{
+	  g = gimple_build_assign (allbits, SSA_NAME, gimple_assign_lhs (g));
+	  gsi_insert_after (gsi, g, GSI_NEW_STMT);
+	}
       g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)),
 			       VIEW_CONVERT_EXPR,
 			       build1 (VIEW_CONVERT_EXPR, TREE_TYPE (expected),
-- 
2.25.1

next             reply	other threads:[~2024-01-03 15:43 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-03 15:42 xndcn [this message]
2024-01-03 15:52 ` Jakub Jelinek
2024-01-04 16:19   ` xndcn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAJ=gGT2zjNN6Pf88rkKw5c0P0k4McCeYpkKO9-VCE1bvB4tmAg@mail.gmail.com' \
    --to=xndchn@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=jakub@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).