public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Robin Dapp <rdapp@linux.ibm.com>
To: GCC Patches <gcc-patches@gcc.gnu.org>,
	Richard Biener <richard.guenther@gmail.com>,
	pinskia@gmail.com
Subject: [PATCH] expand: Convert cst - x into cst xor x.
Date: Tue, 6 Sep 2022 11:42:34 +0200	[thread overview]
Message-ID: <a034561f-c4b8-3af0-3631-7720c143c7cb@linux.ibm.com> (raw)

Hi,

posting this separately from PR91213 now.  I wrote an s390 test and most
likely it could also be done for x86 which will give it broader coverage.

Depending on the backend it might be better to convert
  cst - x
into
  cst xor x
if cst + 1 is a power of two and 0 <= x <= cst.  This patch compares
both sequences and emits the less expensive one.

Does this look like a viable approach? Bootstrapped and regtested on
s390[x], waited with x86 tests until a first round of comments.

Regards
 Robin

gcc/ChangeLog:

	PR middle-end/91213
	* expr.cc (expand_expr_real_2): Call new function.
	(maybe_optimize_cst_sub): New function.
	* expr.h (maybe_optimize_cst_sub): Define.

gcc/testsuite/ChangeLog:

	* gcc.target/s390/cst-minus-var.c: New test.

---
 gcc/expr.cc                                   | 79 +++++++++++++++++++
 gcc/expr.h                                    |  2 +
 gcc/testsuite/gcc.target/s390/cst-minus-var.c | 55 +++++++++++++
 3 files changed, 136 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/s390/cst-minus-var.c

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 80bb1b8a4c5b..80f25720d7b6 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -9397,6 +9397,21 @@ expand_expr_real_2 (sepops ops, rtx target,
machine_mode tmode,
 	  return simplify_gen_binary (MINUS, mode, op0, op1);
 	}

+      /* Convert const - A to const xor A if integer_pow2p (const + 1)
+	 and 0 <= A <= const.  */
+      if (code == MINUS_EXPR
+	  && SCALAR_INT_MODE_P (mode)
+	  && TREE_CODE (treeop0) == INTEGER_CST
+	  && TREE_CODE (TREE_TYPE (treeop1)) == INTEGER_TYPE
+	  && wi::exact_log2 (wi::to_widest (treeop0) + 1) != -1)
+	{
+	  rtx res = maybe_optimize_cst_sub (code, treeop0, treeop1,
+					    mode, unsignedp, type,
+					    target, subtarget);
+	  if (res)
+	    return res;
+	}
+
       /* No sense saving up arithmetic to be done
 	 if it's all in the wrong mode to form part of an address.
 	 And force_operand won't know whether to sign-extend or
@@ -12692,6 +12707,70 @@ maybe_optimize_mod_cmp (enum tree_code code,
tree *arg0, tree *arg1)
   return code == EQ_EXPR ? LE_EXPR : GT_EXPR;
 }

+/* Convert const - A to const xor A if integer_pow2p (const + 1)
+   and 0 <= A <= const.  */
+
+rtx
+maybe_optimize_cst_sub (enum tree_code code, tree treeop0, tree treeop1,
+			machine_mode mode, int unsignedp, tree type,
+			rtx target, rtx subtarget)
+{
+  gcc_checking_assert (code == MINUS_EXPR);
+  gcc_checking_assert (SCALAR_INT_MODE_P (mode));
+  gcc_checking_assert (TREE_CODE (treeop0) == INTEGER_CST);
+  gcc_checking_assert (TREE_CODE (TREE_TYPE (treeop1)) == INTEGER_TYPE);
+  gcc_checking_assert (wi::exact_log2 (wi::to_widest (treeop0) + 1) != -1);
+
+  if (!optimize)
+    return NULL_RTX;
+
+  optab this_optab;
+  rtx op0, op1;
+
+  if (wi::leu_p (tree_nonzero_bits (treeop1), tree_nonzero_bits (treeop0)))
+    {
+      expand_operands (treeop0, treeop1, subtarget, &op0, &op1,
+		       EXPAND_NORMAL);
+      bool speed_p = optimize_insn_for_speed_p ();
+      do_pending_stack_adjust ();
+      start_sequence ();
+      this_optab = optab_for_tree_code (MINUS_EXPR, type,
+					optab_default);
+      rtx subi = expand_binop (mode, this_optab, op0, op1, target,
+			       unsignedp, OPTAB_LIB_WIDEN);
+
+      rtx_insn *sub_insns = get_insns ();
+      end_sequence ();
+      start_sequence ();
+      this_optab = optab_for_tree_code (BIT_XOR_EXPR, type,
+					optab_default);
+      rtx xori = expand_binop (mode, this_optab, op0, op1, target,
+			       unsignedp, OPTAB_LIB_WIDEN);
+      rtx_insn *xor_insns = get_insns ();
+      end_sequence ();
+      unsigned sub_cost = seq_cost (sub_insns, speed_p);
+      unsigned xor_cost = seq_cost (xor_insns, speed_p);
+      /* If costs are the same then use as tie breaker the other other
+	 factor.  */
+      if (sub_cost == xor_cost)
+	{
+	  sub_cost = seq_cost (sub_insns, !speed_p);
+	  xor_cost = seq_cost (xor_insns, !speed_p);
+	}
+
+      if (sub_cost <= xor_cost)
+	{
+	  emit_insn (sub_insns);
+	  return subi;
+	}
+
+      emit_insn (xor_insns);
+      return xori;
+    }
+
+  return NULL_RTX;
+}
+
 /* Optimize x - y < 0 into x < 0 if x - y has undefined overflow.  */

 void
diff --git a/gcc/expr.h b/gcc/expr.h
index 08b59b8d869a..43ea11042d26 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -326,6 +326,8 @@ extern tree string_constant (tree, tree *, tree *,
tree *);
 extern tree byte_representation (tree, tree *, tree *, tree *);

 extern enum tree_code maybe_optimize_mod_cmp (enum tree_code, tree *,
tree *);
+extern rtx maybe_optimize_cst_sub (enum tree_code, tree, tree,
+				   machine_mode, int, tree , rtx, rtx);
 extern void maybe_optimize_sub_cmp_0 (enum tree_code, tree *, tree *);

 /* Two different ways of generating switch statements.  */
diff --git a/gcc/testsuite/gcc.target/s390/cst-minus-var.c
b/gcc/testsuite/gcc.target/s390/cst-minus-var.c
new file mode 100644
index 000000000000..c713624a9784
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/cst-minus-var.c
@@ -0,0 +1,55 @@
+/* Check that we can convert const - x to const xor x if
+   const + 1 is a power of two and 0 <= x <= const.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -mzarch" } */
+/* { dg-final { scan-assembler-times 8 "xr\t" { target { ! 390_z10_hw }
} } } */
+/* { dg-final { scan-assembler-times 8 "xilf\t" { target { s390_z10_hw
} } } } */
+
+unsigned long long foo (unsigned long long a)
+{
+    if (a > 1) __builtin_unreachable();
+    return 1 - a;
+}
+
+unsigned long long bar (unsigned long long a)
+{
+    if (a > 65535) __builtin_unreachable();
+    return 65535 - a;
+}
+
+unsigned long long baz (unsigned long long a)
+{
+    if (a > 4294967295) __builtin_unreachable();
+    return 4294967295 - a;
+}
+
+int fooi (int a)
+{
+    if (a > 127 || a < 0) __builtin_unreachable();
+    return 127 - a;
+}
+
+int bari (int a)
+{
+    if (a > 65535 || a < 0) __builtin_unreachable();
+    return 65535 - a;
+}
+
+long bazl (long a)
+{
+    if (a > 4294967295 || a < 0) __builtin_unreachable();
+    return 4294967295 - a;
+}
+
+short foos (short a)
+{
+    if (a > 31 || a < 0) __builtin_unreachable();
+    return 31 - a;
+}
+
+short bars (int a)
+{
+    if (a > 65535 || a < 0) __builtin_unreachable();
+    return 65535 - a;
+}
-- 
2.31.1

             reply	other threads:[~2022-09-06  9:42 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-06  9:42 Robin Dapp [this message]
2022-09-06 12:31 ` Richard Biener
2022-09-06 14:01   ` Robin Dapp
2022-09-07 12:08     ` Richard Biener
2022-09-07 12:20       ` Robin Dapp
2022-09-07 12:45         ` Richard Biener
2022-09-07 16:30           ` Jeff Law
2022-10-21  9:13           ` Robin Dapp

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a034561f-c4b8-3af0-3631-7720c143c7cb@linux.ibm.com \
    --to=rdapp@linux.ibm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=pinskia@gmail.com \
    --cc=richard.guenther@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).