public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] expand: Convert cst - x into cst xor x.
@ 2022-09-06  9:42 Robin Dapp
  2022-09-06 12:31 ` Richard Biener
  0 siblings, 1 reply; 8+ messages in thread
From: Robin Dapp @ 2022-09-06  9:42 UTC (permalink / raw)
  To: GCC Patches, Richard Biener, pinskia

Hi,

posting this separately from PR91213 now.  I wrote an s390 test and most
likely it could also be done for x86 which will give it broader coverage.

Depending on the backend it might be better to convert
  cst - x
into
  cst xor x
if cst + 1 is a power of two and 0 <= x <= cst.  This patch compares
both sequences and emits the less expensive one.

Does this look like a viable approach? Bootstrapped and regtested on
s390[x], waited with x86 tests until a first round of comments.

Regards
 Robin

gcc/ChangeLog:

	PR middle-end/91213
	* expr.cc (expand_expr_real_2): Call new function.
	(maybe_optimize_cst_sub): New function.
	* expr.h (maybe_optimize_cst_sub): Define.

gcc/testsuite/ChangeLog:

	* gcc.target/s390/cst-minus-var.c: New test.

---
 gcc/expr.cc                                   | 79 +++++++++++++++++++
 gcc/expr.h                                    |  2 +
 gcc/testsuite/gcc.target/s390/cst-minus-var.c | 55 +++++++++++++
 3 files changed, 136 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/s390/cst-minus-var.c

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 80bb1b8a4c5b..80f25720d7b6 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -9397,6 +9397,21 @@ expand_expr_real_2 (sepops ops, rtx target,
machine_mode tmode,
 	  return simplify_gen_binary (MINUS, mode, op0, op1);
 	}

+      /* Convert const - A to const xor A if integer_pow2p (const + 1)
+	 and 0 <= A <= const.  */
+      if (code == MINUS_EXPR
+	  && SCALAR_INT_MODE_P (mode)
+	  && TREE_CODE (treeop0) == INTEGER_CST
+	  && TREE_CODE (TREE_TYPE (treeop1)) == INTEGER_TYPE
+	  && wi::exact_log2 (wi::to_widest (treeop0) + 1) != -1)
+	{
+	  rtx res = maybe_optimize_cst_sub (code, treeop0, treeop1,
+					    mode, unsignedp, type,
+					    target, subtarget);
+	  if (res)
+	    return res;
+	}
+
       /* No sense saving up arithmetic to be done
 	 if it's all in the wrong mode to form part of an address.
 	 And force_operand won't know whether to sign-extend or
@@ -12692,6 +12707,70 @@ maybe_optimize_mod_cmp (enum tree_code code,
tree *arg0, tree *arg1)
   return code == EQ_EXPR ? LE_EXPR : GT_EXPR;
 }

+/* Convert const - A to const xor A if integer_pow2p (const + 1)
+   and 0 <= A <= const.  */
+
+rtx
+maybe_optimize_cst_sub (enum tree_code code, tree treeop0, tree treeop1,
+			machine_mode mode, int unsignedp, tree type,
+			rtx target, rtx subtarget)
+{
+  gcc_checking_assert (code == MINUS_EXPR);
+  gcc_checking_assert (SCALAR_INT_MODE_P (mode));
+  gcc_checking_assert (TREE_CODE (treeop0) == INTEGER_CST);
+  gcc_checking_assert (TREE_CODE (TREE_TYPE (treeop1)) == INTEGER_TYPE);
+  gcc_checking_assert (wi::exact_log2 (wi::to_widest (treeop0) + 1) != -1);
+
+  if (!optimize)
+    return NULL_RTX;
+
+  optab this_optab;
+  rtx op0, op1;
+
+  if (wi::leu_p (tree_nonzero_bits (treeop1), tree_nonzero_bits (treeop0)))
+    {
+      expand_operands (treeop0, treeop1, subtarget, &op0, &op1,
+		       EXPAND_NORMAL);
+      bool speed_p = optimize_insn_for_speed_p ();
+      do_pending_stack_adjust ();
+      start_sequence ();
+      this_optab = optab_for_tree_code (MINUS_EXPR, type,
+					optab_default);
+      rtx subi = expand_binop (mode, this_optab, op0, op1, target,
+			       unsignedp, OPTAB_LIB_WIDEN);
+
+      rtx_insn *sub_insns = get_insns ();
+      end_sequence ();
+      start_sequence ();
+      this_optab = optab_for_tree_code (BIT_XOR_EXPR, type,
+					optab_default);
+      rtx xori = expand_binop (mode, this_optab, op0, op1, target,
+			       unsignedp, OPTAB_LIB_WIDEN);
+      rtx_insn *xor_insns = get_insns ();
+      end_sequence ();
+      unsigned sub_cost = seq_cost (sub_insns, speed_p);
+      unsigned xor_cost = seq_cost (xor_insns, speed_p);
+      /* If costs are the same then use as tie breaker the other other
+	 factor.  */
+      if (sub_cost == xor_cost)
+	{
+	  sub_cost = seq_cost (sub_insns, !speed_p);
+	  xor_cost = seq_cost (xor_insns, !speed_p);
+	}
+
+      if (sub_cost <= xor_cost)
+	{
+	  emit_insn (sub_insns);
+	  return subi;
+	}
+
+      emit_insn (xor_insns);
+      return xori;
+    }
+
+  return NULL_RTX;
+}
+
 /* Optimize x - y < 0 into x < 0 if x - y has undefined overflow.  */

 void
diff --git a/gcc/expr.h b/gcc/expr.h
index 08b59b8d869a..43ea11042d26 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -326,6 +326,8 @@ extern tree string_constant (tree, tree *, tree *,
tree *);
 extern tree byte_representation (tree, tree *, tree *, tree *);

 extern enum tree_code maybe_optimize_mod_cmp (enum tree_code, tree *,
tree *);
+extern rtx maybe_optimize_cst_sub (enum tree_code, tree, tree,
+				   machine_mode, int, tree , rtx, rtx);
 extern void maybe_optimize_sub_cmp_0 (enum tree_code, tree *, tree *);

 /* Two different ways of generating switch statements.  */
diff --git a/gcc/testsuite/gcc.target/s390/cst-minus-var.c
b/gcc/testsuite/gcc.target/s390/cst-minus-var.c
new file mode 100644
index 000000000000..c713624a9784
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/cst-minus-var.c
@@ -0,0 +1,55 @@
+/* Check that we can convert const - x to const xor x if
+   const + 1 is a power of two and 0 <= x <= const.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -mzarch" } */
+/* { dg-final { scan-assembler-times 8 "xr\t" { target { ! 390_z10_hw }
} } } */
+/* { dg-final { scan-assembler-times 8 "xilf\t" { target { s390_z10_hw
} } } } */
+
+unsigned long long foo (unsigned long long a)
+{
+    if (a > 1) __builtin_unreachable();
+    return 1 - a;
+}
+
+unsigned long long bar (unsigned long long a)
+{
+    if (a > 65535) __builtin_unreachable();
+    return 65535 - a;
+}
+
+unsigned long long baz (unsigned long long a)
+{
+    if (a > 4294967295) __builtin_unreachable();
+    return 4294967295 - a;
+}
+
+int fooi (int a)
+{
+    if (a > 127 || a < 0) __builtin_unreachable();
+    return 127 - a;
+}
+
+int bari (int a)
+{
+    if (a > 65535 || a < 0) __builtin_unreachable();
+    return 65535 - a;
+}
+
+long bazl (long a)
+{
+    if (a > 4294967295 || a < 0) __builtin_unreachable();
+    return 4294967295 - a;
+}
+
+short foos (short a)
+{
+    if (a > 31 || a < 0) __builtin_unreachable();
+    return 31 - a;
+}
+
+short bars (int a)
+{
+    if (a > 65535 || a < 0) __builtin_unreachable();
+    return 65535 - a;
+}
-- 
2.31.1

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-10-21  9:14 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-06  9:42 [PATCH] expand: Convert cst - x into cst xor x Robin Dapp
2022-09-06 12:31 ` Richard Biener
2022-09-06 14:01   ` Robin Dapp
2022-09-07 12:08     ` Richard Biener
2022-09-07 12:20       ` Robin Dapp
2022-09-07 12:45         ` Richard Biener
2022-09-07 16:30           ` Jeff Law
2022-10-21  9:13           ` Robin Dapp

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).