[PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations
@ 2022-10-31 11:53 Tamar Christina
  2022-10-31 11:53 ` [PATCH 2/2]AArch64 Support new tbranch optab Tamar Christina
                   ` (2 more replies)
  0 siblings, 3 replies; 33+ messages in thread
From: Tamar Christina @ 2022-10-31 11:53 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, rguenther, jeffreyalaw

[-- Attachment #1: Type: text/plain, Size: 15179 bytes --]

Hi All,

This adds a new test-and-branch optab that can be used to do a conditional test
of a bit and branch.   This is similar to the cbranch optab but instead can
test any arbitrary bit inside the register.

This patch recognizes boolean comparisons and single bit mask tests.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* dojump.cc (do_jump): Pass along value.
	(do_jump_by_parts_greater_rtx): Likewise.
	(do_jump_by_parts_zero_rtx): Likewise.
	(do_jump_by_parts_equality_rtx): Likewise.
	(do_compare_rtx_and_jump): Likewise.
	(do_compare_and_jump): Likewise.
	* dojump.h (do_compare_rtx_and_jump): New.
	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
	(validate_test_and_branch): New.
	(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
	supplied then check if it's suitable for tbranch.
	* optabs.def (tbranch$a4): New.
	* doc/md.texi (tbranch@var{mode}4): Document it.
	* optabs.h (emit_cmp_and_jump_insns):
	* tree.h (tree_zero_one_valued_p): New.

--- inline copy of patch -- 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index c08691ab4c9a4bfe55ae81e5e228a414d6242d78..f8b32ec12f46d3fb3815f121a16b5a8a1819b66a 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6972,6 +6972,13 @@ case, you can and should make operand 1's predicate reject some operators
 in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
 from the machine description.
 
+@cindex @code{tbranch@var{mode}4} instruction pattern
+@item @samp{tbranch@var{mode}4}
+Conditional branch instruction combined with a bit test-and-compare
+instruction. Operand 0 is a comparison operator.  Operand 1 is the
+operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
+Operand 3 is the @code{code_label} to jump to.
+
 @cindex @code{cbranch@var{mode}4} instruction pattern
 @item @samp{cbranch@var{mode}4}
 Conditional branch instruction combined with a compare instruction.
diff --git a/gcc/dojump.h b/gcc/dojump.h
index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644
--- a/gcc/dojump.h
+++ b/gcc/dojump.h
@@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
 extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
 			 profile_probability);
 
+extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
+				     machine_mode, rtx, rtx_code_label *,
+				     rtx_code_label *, profile_probability);
+
 extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
 				     machine_mode, rtx, rtx_code_label *,
 				     rtx_code_label *, profile_probability);
diff --git a/gcc/dojump.cc b/gcc/dojump.cc
index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644
--- a/gcc/dojump.cc
+++ b/gcc/dojump.cc
@@ -619,7 +619,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
 	}
       do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
 			       NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
-			       GET_MODE (temp), NULL_RTX,
+			       exp, GET_MODE (temp), NULL_RTX,
 			       if_false_label, if_true_label, prob);
     }
 
@@ -687,7 +687,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 
       /* All but high-order word must be compared as unsigned.  */
       do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
-			       word_mode, NULL_RTX, NULL, if_true_label,
+			       NULL, word_mode, NULL_RTX, NULL, if_true_label,
 			       prob);
 
       /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
@@ -695,8 +695,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 	break;
 
       /* Consider lower words only if these are equal.  */
-      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
-			       NULL_RTX, NULL, if_false_label,
+      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
+			       word_mode, NULL_RTX, NULL, if_false_label,
 			       prob.invert ());
     }
 
@@ -755,7 +755,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   if (part != 0)
     {
-      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
+      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
 			       NULL_RTX, if_false_label, if_true_label, prob);
       return;
     }
@@ -766,7 +766,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
+			     const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -809,8 +809,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             operand_subword_force (op1, i, mode),
-                             EQ, 0, word_mode, NULL_RTX,
+			     operand_subword_force (op1, i, mode),
+			     EQ, 0, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -962,6 +962,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 			 rtx_code_label *if_false_label,
 			 rtx_code_label *if_true_label,
 			 profile_probability prob)
+{
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
+			  if_false_label, if_true_label, prob);
+}
+
+/* Like do_compare_and_jump but expects the values to compare as two rtx's.
+   The decision as to signed or unsigned comparison must be made by the caller.
+
+   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
+   compared.  */
+
+void
+do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
+			 tree val, machine_mode mode, rtx size,
+			 rtx_code_label *if_false_label,
+			 rtx_code_label *if_true_label,
+			 profile_probability prob)
 {
   rtx tem;
   rtx_code_label *dummy_label = NULL;
@@ -1177,8 +1194,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 		    }
 		  else
 		    dest_label = if_false_label;
-                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, dest_label, NULL, first_prob);
+
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, dest_label, NULL,
+					   first_prob);
 		}
 	      /* For !and_them we want to split:
 		 if (x) goto t; // prob;
@@ -1192,8 +1211,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
               else
 		{
 		  profile_probability first_prob = prob.split (cprob);
-		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, NULL, if_true_label, first_prob);
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, NULL,
+					   if_true_label, first_prob);
 		  if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
 		    {
 		      /* x != y can be split into x unord y || x ltgt y
@@ -1215,7 +1235,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 	    }
 	}
 
-      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
+      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
 			       if_true_label, prob);
     }
 
@@ -1289,9 +1309,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
       op1 = new_op1;
     }
 
-  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
-                           ((mode == BLKmode)
-                            ? expr_size (treeop0) : NULL_RTX),
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
+			   ((mode == BLKmode)
+			    ? expr_size (treeop0) : NULL_RTX),
 			   if_false_label, if_true_label, prob);
 }
 
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index cff37ccb0dfc3dd79b97d0abfd872f340855dc96..5b368f77e91d3fce29870f1a5b54a0301e6b7794 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "libfuncs.h"
 #include "internal-fn.h"
 #include "langhooks.h"
+#include "gimple.h"
 
 static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
 				   machine_mode *);
@@ -4620,7 +4621,7 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
 
 static void
 emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
-			  profile_probability prob)
+			  direct_optab cmp_optab, profile_probability prob)
 {
   machine_mode optab_mode;
   enum mode_class mclass;
@@ -4629,7 +4630,7 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
 
   mclass = GET_MODE_CLASS (mode);
   optab_mode = (mclass == MODE_CC) ? CCmode : mode;
-  icode = optab_handler (cbranch_optab, optab_mode);
+  icode = optab_handler (cmp_optab, optab_mode);
 
   gcc_assert (icode != CODE_FOR_nothing);
   gcc_assert (insn_operand_matches (icode, 0, test));
@@ -4644,6 +4645,71 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
     add_reg_br_prob_note (insn, prob);
 }
 
+/* Check to see if the supplied comparison in PTEST can be performed as a
+   bit-test-and-branch instead.  VAL must contain the original tree
+   expression of the non-zero operand which will be used to rewrite the
+   comparison in PTEST.
+
+   Returns TRUE if operation succeeds and returns updated PMODE and PTEST,
+   else FALSE.  */
+
+bool
+static validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode)
+{
+  if (!val)
+    return false;
+
+  machine_mode mode = TYPE_MODE (TREE_TYPE (val));
+  rtx test = *ptest;
+
+  if (GET_CODE (test) != EQ && GET_CODE (test) != NE)
+    return false;
+
+  if (tree_zero_one_valued_p (val))
+    {
+      auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
+      XEXP (test, 1) = gen_int_mode (pos, mode);
+      *ptest = test;
+      *pmode = mode;
+      return true;
+    }
+
+  if (TREE_CODE (val) != SSA_NAME)
+    return false;
+
+  gimple *def = SSA_NAME_DEF_STMT (val);
+  if (!is_gimple_assign (def)
+      || gimple_assign_rhs_code (def) != BIT_AND_EXPR)
+    return false;
+
+  tree cst = gimple_assign_rhs2 (def);
+
+  if (!tree_fits_uhwi_p (cst))
+    return false;
+
+  tree op0 = gimple_assign_rhs1 (def);
+  if (TREE_CODE (op0) == SSA_NAME)
+    {
+      def = SSA_NAME_DEF_STMT (op0);
+      if (gimple_assign_cast_p (def))
+	op0 = gimple_assign_rhs1 (def);
+    }
+
+  wide_int wcst = wi::uhwi (tree_to_uhwi (cst),
+			    TYPE_PRECISION (TREE_TYPE (op0)));
+  int bitpos;
+
+  if ((bitpos = wi::exact_log2 (wcst)) == -1)
+    return false;
+
+  mode = TYPE_MODE (TREE_TYPE (op0));
+  auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
+  XEXP (test, 1) = gen_int_mode (pos, mode);
+  *ptest = test;
+  *pmode = mode;
+  return true;
+}
+
 /* Generate code to compare X with Y so that the condition codes are
    set and to jump to LABEL if the condition is true.  If X is a
    constant and Y is not a constant, then the comparison is swapped to
@@ -4661,15 +4727,18 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
    It will be potentially converted into an unsigned variant based on
    UNSIGNEDP to select a proper jump instruction.
    
-   PROB is the probability of jumping to LABEL.  */
+   PROB is the probability of jumping to LABEL.  If the comparison is against
+   zero then VAL contains the expression from which the non-zero RTL is
+   derived.  */
 
 void
 emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
-			 machine_mode mode, int unsignedp, rtx label,
+			 machine_mode mode, int unsignedp, tree val, rtx label,
                          profile_probability prob)
 {
   rtx op0 = x, op1 = y;
   rtx test;
+  enum insn_code icode;
 
   /* Swap operands and condition to ensure canonical RTL.  */
   if (swap_commutative_operands_p (x, y)
@@ -4690,10 +4759,37 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
 
   prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
 		    &test, &mode);
-  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
+
+  /* Check if we're comparing a truth type with 0, and if so check if
+     the target supports tbranch.  */
+  machine_mode tmode = mode;
+  if (op1 == CONST0_RTX (GET_MODE (op1))
+      && validate_test_and_branch (val, &test, &tmode))
+    {
+      /* If the target supports the testbit comparison directly, great.  */
+      icode = direct_optab_handler (tbranch_optab, tmode);
+      if (icode != CODE_FOR_nothing)
+	{
+	  emit_cmp_and_jump_insn_1 (test, tmode, label, tbranch_optab, prob);
+	  return;
+	}
+    }
+
+  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob);
 }
 
-\f
+/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
+
+void
+emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
+			 machine_mode mode, int unsignedp, rtx label,
+			 profile_probability prob)
+{
+  emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
+			   label, prob);
+}
+
+
 /* Emit a library call comparison between floating point X and Y.
    COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).  */
 
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 9947aed67fb8a3b675cb0aab9aeb059f89644106..623a596aca2f538a03602e02e6ac12f43f3303c4 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -220,6 +220,7 @@ OPTAB_D (reload_in_optab, "reload_in$a")
 OPTAB_D (reload_out_optab, "reload_out$a")
 
 OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
+OPTAB_D (tbranch_optab, "tbranch$a4")
 OPTAB_D (addcc_optab, "add$acc")
 OPTAB_D (negcc_optab, "neg$acc")
 OPTAB_D (notcc_optab, "not$acc")
diff --git a/gcc/optabs.h b/gcc/optabs.h
index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
 				     machine_mode, int, rtx,
 				     profile_probability prob
 					= profile_probability::uninitialized ());
+extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
+				     machine_mode, int, tree, rtx,
+				     profile_probability prob
+					= profile_probability::uninitialized ());
 
 /* Generate code to indirectly jump to a location given in the rtx LOC.  */
 extern void emit_indirect_jump (rtx);
diff --git a/gcc/tree.h b/gcc/tree.h
index 8f8a9660c9e0605eb516de194640b8c1b531b798..be3d2dee82f692e81082cf21c878c10f9fe9e1f1 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4690,6 +4690,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
 extern tree signed_type_for (tree);
 extern tree unsigned_type_for (tree);
 extern bool is_truth_type_for (tree, tree);
+extern bool tree_zero_one_valued_p (tree);
 extern tree truth_type_for (tree);
 extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
 extern tree build_pointer_type (tree);




-- 

[-- Attachment #2: rb16485.patch --]
[-- Type: text/plain, Size: 14074 bytes --]

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index c08691ab4c9a4bfe55ae81e5e228a414d6242d78..f8b32ec12f46d3fb3815f121a16b5a8a1819b66a 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6972,6 +6972,13 @@ case, you can and should make operand 1's predicate reject some operators
 in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
 from the machine description.
 
+@cindex @code{tbranch@var{mode}4} instruction pattern
+@item @samp{tbranch@var{mode}4}
+Conditional branch instruction combined with a bit test-and-compare
+instruction. Operand 0 is a comparison operator.  Operand 1 is the
+operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
+Operand 3 is the @code{code_label} to jump to.
+
 @cindex @code{cbranch@var{mode}4} instruction pattern
 @item @samp{cbranch@var{mode}4}
 Conditional branch instruction combined with a compare instruction.
diff --git a/gcc/dojump.h b/gcc/dojump.h
index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644
--- a/gcc/dojump.h
+++ b/gcc/dojump.h
@@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
 extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
 			 profile_probability);
 
+extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
+				     machine_mode, rtx, rtx_code_label *,
+				     rtx_code_label *, profile_probability);
+
 extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
 				     machine_mode, rtx, rtx_code_label *,
 				     rtx_code_label *, profile_probability);
diff --git a/gcc/dojump.cc b/gcc/dojump.cc
index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644
--- a/gcc/dojump.cc
+++ b/gcc/dojump.cc
@@ -619,7 +619,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
 	}
       do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
 			       NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
-			       GET_MODE (temp), NULL_RTX,
+			       exp, GET_MODE (temp), NULL_RTX,
 			       if_false_label, if_true_label, prob);
     }
 
@@ -687,7 +687,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 
       /* All but high-order word must be compared as unsigned.  */
       do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
-			       word_mode, NULL_RTX, NULL, if_true_label,
+			       NULL, word_mode, NULL_RTX, NULL, if_true_label,
 			       prob);
 
       /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
@@ -695,8 +695,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 	break;
 
       /* Consider lower words only if these are equal.  */
-      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
-			       NULL_RTX, NULL, if_false_label,
+      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
+			       word_mode, NULL_RTX, NULL, if_false_label,
 			       prob.invert ());
     }
 
@@ -755,7 +755,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   if (part != 0)
     {
-      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
+      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
 			       NULL_RTX, if_false_label, if_true_label, prob);
       return;
     }
@@ -766,7 +766,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
+			     const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -809,8 +809,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             operand_subword_force (op1, i, mode),
-                             EQ, 0, word_mode, NULL_RTX,
+			     operand_subword_force (op1, i, mode),
+			     EQ, 0, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -962,6 +962,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 			 rtx_code_label *if_false_label,
 			 rtx_code_label *if_true_label,
 			 profile_probability prob)
+{
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
+			  if_false_label, if_true_label, prob);
+}
+
+/* Like do_compare_and_jump but expects the values to compare as two rtx's.
+   The decision as to signed or unsigned comparison must be made by the caller.
+
+   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
+   compared.  */
+
+void
+do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
+			 tree val, machine_mode mode, rtx size,
+			 rtx_code_label *if_false_label,
+			 rtx_code_label *if_true_label,
+			 profile_probability prob)
 {
   rtx tem;
   rtx_code_label *dummy_label = NULL;
@@ -1177,8 +1194,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 		    }
 		  else
 		    dest_label = if_false_label;
-                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, dest_label, NULL, first_prob);
+
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, dest_label, NULL,
+					   first_prob);
 		}
 	      /* For !and_them we want to split:
 		 if (x) goto t; // prob;
@@ -1192,8 +1211,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
               else
 		{
 		  profile_probability first_prob = prob.split (cprob);
-		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, NULL, if_true_label, first_prob);
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, NULL,
+					   if_true_label, first_prob);
 		  if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
 		    {
 		      /* x != y can be split into x unord y || x ltgt y
@@ -1215,7 +1235,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 	    }
 	}
 
-      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
+      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
 			       if_true_label, prob);
     }
 
@@ -1289,9 +1309,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
       op1 = new_op1;
     }
 
-  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
-                           ((mode == BLKmode)
-                            ? expr_size (treeop0) : NULL_RTX),
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
+			   ((mode == BLKmode)
+			    ? expr_size (treeop0) : NULL_RTX),
 			   if_false_label, if_true_label, prob);
 }
 
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index cff37ccb0dfc3dd79b97d0abfd872f340855dc96..5b368f77e91d3fce29870f1a5b54a0301e6b7794 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "libfuncs.h"
 #include "internal-fn.h"
 #include "langhooks.h"
+#include "gimple.h"
 
 static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
 				   machine_mode *);
@@ -4620,7 +4621,7 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
 
 static void
 emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
-			  profile_probability prob)
+			  direct_optab cmp_optab, profile_probability prob)
 {
   machine_mode optab_mode;
   enum mode_class mclass;
@@ -4629,7 +4630,7 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
 
   mclass = GET_MODE_CLASS (mode);
   optab_mode = (mclass == MODE_CC) ? CCmode : mode;
-  icode = optab_handler (cbranch_optab, optab_mode);
+  icode = optab_handler (cmp_optab, optab_mode);
 
   gcc_assert (icode != CODE_FOR_nothing);
   gcc_assert (insn_operand_matches (icode, 0, test));
@@ -4644,6 +4645,71 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
     add_reg_br_prob_note (insn, prob);
 }
 
+/* Check to see if the supplied comparison in PTEST can be performed as a
+   bit-test-and-branch instead.  VAL must contain the original tree
+   expression of the non-zero operand which will be used to rewrite the
+   comparison in PTEST.
+
+   Returns TRUE if operation succeeds and returns updated PMODE and PTEST,
+   else FALSE.  */
+
+bool
+static validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode)
+{
+  if (!val)
+    return false;
+
+  machine_mode mode = TYPE_MODE (TREE_TYPE (val));
+  rtx test = *ptest;
+
+  if (GET_CODE (test) != EQ && GET_CODE (test) != NE)
+    return false;
+
+  if (tree_zero_one_valued_p (val))
+    {
+      auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
+      XEXP (test, 1) = gen_int_mode (pos, mode);
+      *ptest = test;
+      *pmode = mode;
+      return true;
+    }
+
+  if (TREE_CODE (val) != SSA_NAME)
+    return false;
+
+  gimple *def = SSA_NAME_DEF_STMT (val);
+  if (!is_gimple_assign (def)
+      || gimple_assign_rhs_code (def) != BIT_AND_EXPR)
+    return false;
+
+  tree cst = gimple_assign_rhs2 (def);
+
+  if (!tree_fits_uhwi_p (cst))
+    return false;
+
+  tree op0 = gimple_assign_rhs1 (def);
+  if (TREE_CODE (op0) == SSA_NAME)
+    {
+      def = SSA_NAME_DEF_STMT (op0);
+      if (gimple_assign_cast_p (def))
+	op0 = gimple_assign_rhs1 (def);
+    }
+
+  wide_int wcst = wi::uhwi (tree_to_uhwi (cst),
+			    TYPE_PRECISION (TREE_TYPE (op0)));
+  int bitpos;
+
+  if ((bitpos = wi::exact_log2 (wcst)) == -1)
+    return false;
+
+  mode = TYPE_MODE (TREE_TYPE (op0));
+  auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
+  XEXP (test, 1) = gen_int_mode (pos, mode);
+  *ptest = test;
+  *pmode = mode;
+  return true;
+}
+
 /* Generate code to compare X with Y so that the condition codes are
    set and to jump to LABEL if the condition is true.  If X is a
    constant and Y is not a constant, then the comparison is swapped to
@@ -4661,15 +4727,18 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
    It will be potentially converted into an unsigned variant based on
    UNSIGNEDP to select a proper jump instruction.
    
-   PROB is the probability of jumping to LABEL.  */
+   PROB is the probability of jumping to LABEL.  If the comparison is against
+   zero then VAL contains the expression from which the non-zero RTL is
+   derived.  */
 
 void
 emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
-			 machine_mode mode, int unsignedp, rtx label,
+			 machine_mode mode, int unsignedp, tree val, rtx label,
                          profile_probability prob)
 {
   rtx op0 = x, op1 = y;
   rtx test;
+  enum insn_code icode;
 
   /* Swap operands and condition to ensure canonical RTL.  */
   if (swap_commutative_operands_p (x, y)
@@ -4690,10 +4759,37 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
 
   prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
 		    &test, &mode);
-  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
+
+  /* Check if we're comparing a truth type with 0, and if so check if
+     the target supports tbranch.  */
+  machine_mode tmode = mode;
+  if (op1 == CONST0_RTX (GET_MODE (op1))
+      && validate_test_and_branch (val, &test, &tmode))
+    {
+      /* If the target supports the testbit comparison directly, great.  */
+      icode = direct_optab_handler (tbranch_optab, tmode);
+      if (icode != CODE_FOR_nothing)
+	{
+	  emit_cmp_and_jump_insn_1 (test, tmode, label, tbranch_optab, prob);
+	  return;
+	}
+    }
+
+  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob);
 }
 
-\f
+/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
+
+void
+emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
+			 machine_mode mode, int unsignedp, rtx label,
+			 profile_probability prob)
+{
+  emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
+			   label, prob);
+}
+
+
 /* Emit a library call comparison between floating point X and Y.
    COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).  */
 
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 9947aed67fb8a3b675cb0aab9aeb059f89644106..623a596aca2f538a03602e02e6ac12f43f3303c4 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -220,6 +220,7 @@ OPTAB_D (reload_in_optab, "reload_in$a")
 OPTAB_D (reload_out_optab, "reload_out$a")
 
 OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
+OPTAB_D (tbranch_optab, "tbranch$a4")
 OPTAB_D (addcc_optab, "add$acc")
 OPTAB_D (negcc_optab, "neg$acc")
 OPTAB_D (notcc_optab, "not$acc")
diff --git a/gcc/optabs.h b/gcc/optabs.h
index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
 				     machine_mode, int, rtx,
 				     profile_probability prob
 					= profile_probability::uninitialized ());
+extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
+				     machine_mode, int, tree, rtx,
+				     profile_probability prob
+					= profile_probability::uninitialized ());
 
 /* Generate code to indirectly jump to a location given in the rtx LOC.  */
 extern void emit_indirect_jump (rtx);
diff --git a/gcc/tree.h b/gcc/tree.h
index 8f8a9660c9e0605eb516de194640b8c1b531b798..be3d2dee82f692e81082cf21c878c10f9fe9e1f1 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4690,6 +4690,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
 extern tree signed_type_for (tree);
 extern tree unsigned_type_for (tree);
 extern bool is_truth_type_for (tree, tree);
+extern bool tree_zero_one_valued_p (tree);
 extern tree truth_type_for (tree);
 extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
 extern tree build_pointer_type (tree);




^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 2/2]AArch64 Support new tbranch optab.
  2022-10-31 11:53 [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations Tamar Christina
@ 2022-10-31 11:53 ` Tamar Christina
  2022-11-14 15:58   ` Tamar Christina
  2022-10-31 11:54 ` [PATCH]AArch64 Extend umov and sbfx patterns Tamar Christina
  2022-10-31 21:16 ` [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations Jeff Law
  2 siblings, 1 reply; 33+ messages in thread
From: Tamar Christina @ 2022-10-31 11:53 UTC (permalink / raw)
  To: gcc-patches
  Cc: nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov,
	richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 6867 bytes --]

Hi All,

This implements the new tbranch optab for AArch64.

Instead of emitting the instruction directly I've chosen to expand the pattern
using a zero extract and generating the existing pattern for comparisons for two
reasons:

  1. Allows for CSE of the actual comparison.
  2. It looks like the code in expand makes the label as unused and removed it
     if it doesn't see a separate reference to it.

Because of this expansion though I disable the pattern at -O0 since we have no
combine in that case so we'd end up with worse code.  I did try emitting the
pattern directly, but as mentioned in no#2 expand would then kill the label.

While doing this I noticed that the version that checks the signbit doesn't work
The reason for this looks like an incorrect pattern.  The [us]fbx
instructions are defined for index + size == regiter size.  They architecturally
alias to different instructions and binutils handles this correctly.

In GCC however we tried to prematurely optimize this and added a separate split
pattern.  But this pattern is also missing alternatives only handling DImode.

This just removes this and relaxes the constraints on the normal bfx pattern.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
	(*tb<optab><ALLI:mode><GPI:mode>1): ... this.
	(tbranch<mode>4): New.
	(*<optab><mode>): Rename to...
	(*<optab><GPI:mode><ALLI:mode>): ... this.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/tbz_1.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..6a4494a9a370139313cc8e57447717aafa14da2d 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -943,12 +943,28 @@ (define_insn "*cb<optab><mode>1"
 		      (const_int 1)))]
 )
 
-(define_insn "*tb<optab><mode>1"
+(define_expand "tbranch<mode>4"
   [(set (pc) (if_then_else
-	      (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
-				    (const_int 1)
-				    (match_operand 1
-				      "aarch64_simd_shift_imm_<mode>" "n"))
+		(match_operator 0 "aarch64_comparison_operator"
+		 [(match_operand:ALLI 1 "register_operand")
+		  (match_operand:ALLI 2 "aarch64_simd_shift_imm_<ALLI:mode>")])
+		(label_ref (match_operand 3 "" ""))
+		(pc)))]
+  "optimize > 0"
+{
+  rtx bitvalue = gen_reg_rtx (DImode);
+  emit_insn (gen_extzv (bitvalue, operands[1], const1_rtx, operands[2]));
+  operands[2] = const0_rtx;
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), bitvalue,
+					 operands[2]);
+})
+
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
+  [(set (pc) (if_then_else
+	      (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
+				     (const_int 1)
+				     (match_operand 1
+				       "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
 		   (const_int 0))
 	     (label_ref (match_operand 2 "" ""))
 	     (pc)))
@@ -959,15 +975,15 @@ (define_insn "*tb<optab><mode>1"
       {
 	if (get_attr_far_branch (insn) == 1)
 	  return aarch64_gen_far_branch (operands, 2, "Ltb",
-					 "<inv_tb>\\t%<w>0, %1, ");
+					 "<inv_tb>\\t%<ALLI:w>0, %1, ");
 	else
 	  {
 	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
-	    return "tst\t%<w>0, %1\;<bcond>\t%l2";
+	    return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
 	  }
       }
     else
-      return "<tbz>\t%<w>0, %1, %l2";
+      return "<tbz>\t%<ALLI:w>0, %1, %l2";
   }
   [(set_attr "type" "branch")
    (set (attr "length")
@@ -5752,39 +5768,19 @@ (define_expand "<optab>"
 )
 
 
-(define_insn "*<optab><mode>"
+(define_insn "*<optab><GPI:mode><ALLI:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-	(ANY_EXTRACT:GPI (match_operand:GPI 1 "register_operand" "r")
+	(ANY_EXTRACT:GPI (match_operand:ALLI 1 "register_operand" "r")
 			 (match_operand 2
-			   "aarch64_simd_shift_imm_offset_<mode>" "n")
+			   "aarch64_simd_shift_imm_offset_<ALLI:mode>" "n")
 			 (match_operand 3
-			   "aarch64_simd_shift_imm_<mode>" "n")))]
+			   "aarch64_simd_shift_imm_<ALLI:mode>" "n")))]
   "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]),
-	     1, GET_MODE_BITSIZE (<MODE>mode) - 1)"
-  "<su>bfx\\t%<w>0, %<w>1, %3, %2"
+	     1, GET_MODE_BITSIZE (<ALLI:MODE>mode))"
+  "<su>bfx\\t%<GPI:w>0, %<GPI:w>1, %3, %2"
   [(set_attr "type" "bfx")]
 )
 
-;; When the bit position and width add up to 32 we can use a W-reg LSR
-;; instruction taking advantage of the implicit zero-extension of the X-reg.
-(define_split
-  [(set (match_operand:DI 0 "register_operand")
-	(zero_extract:DI (match_operand:DI 1 "register_operand")
-			 (match_operand 2
-			   "aarch64_simd_shift_imm_offset_di")
-			 (match_operand 3
-			   "aarch64_simd_shift_imm_di")))]
-  "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]), 1,
-	     GET_MODE_BITSIZE (DImode) - 1)
-   && (INTVAL (operands[2]) + INTVAL (operands[3]))
-       == GET_MODE_BITSIZE (SImode)"
-  [(set (match_dup 0)
-	(zero_extend:DI (lshiftrt:SI (match_dup 4) (match_dup 3))))]
-  {
-    operands[4] = gen_lowpart (SImode, operands[1]);
-  }
-)
-
 ;; Bitfield Insert (insv)
 (define_expand "insv<mode>"
   [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549ce1a0cff6774463
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdbool.h>
+
+void h(void);
+
+/*
+** g1:
+** 	tbnz	x[0-9]+, #?0, .L([0-9]+)
+** 	ret
+**	...
+*/
+void g1(bool x)
+{
+  if (__builtin_expect (x, 0))
+    h ();
+}
+
+/*
+** g2:
+** 	tbz	x[0-9]+, #?0, .L([0-9]+)
+** 	b	h
+**	...
+*/
+void g2(bool x)
+{
+  if (__builtin_expect (x, 1))
+    h ();
+}
+
+/*
+** g3_ge:
+** 	tbnz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_ge(int x)
+{
+  if (__builtin_expect (x >= 0, 1))
+    h ();
+}
+
+/*
+** g3_gt:
+** 	cmp	w[0-9]+, 0
+** 	ble	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_gt(int x)
+{
+  if (__builtin_expect (x > 0, 1))
+    h ();
+}
+
+/*
+** g3_lt:
+** 	tbz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_lt(int x)
+{
+  if (__builtin_expect (x < 0, 1))
+    h ();
+}
+
+/*
+** g3_le:
+** 	cmp	w[0-9]+, 0
+** 	bgt	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_le(int x)
+{
+  if (__builtin_expect (x <= 0, 1))
+    h ();
+}
+
+/*
+** g5:
+** 	mov	w[0-9]+, 65279
+** 	tst	w[0-9]+, w[0-9]+
+** 	beq	.L[0-9]+
+** 	b	h
+**	...
+*/ 
+void g5(int x)
+{
+  if (__builtin_expect (x & 0xfeff, 1))
+    h ();
+}




-- 

[-- Attachment #2: rb16486.patch --]
[-- Type: text/plain, Size: 5264 bytes --]

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..6a4494a9a370139313cc8e57447717aafa14da2d 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -943,12 +943,28 @@ (define_insn "*cb<optab><mode>1"
 		      (const_int 1)))]
 )
 
-(define_insn "*tb<optab><mode>1"
+(define_expand "tbranch<mode>4"
   [(set (pc) (if_then_else
-	      (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
-				    (const_int 1)
-				    (match_operand 1
-				      "aarch64_simd_shift_imm_<mode>" "n"))
+		(match_operator 0 "aarch64_comparison_operator"
+		 [(match_operand:ALLI 1 "register_operand")
+		  (match_operand:ALLI 2 "aarch64_simd_shift_imm_<ALLI:mode>")])
+		(label_ref (match_operand 3 "" ""))
+		(pc)))]
+  "optimize > 0"
+{
+  rtx bitvalue = gen_reg_rtx (DImode);
+  emit_insn (gen_extzv (bitvalue, operands[1], const1_rtx, operands[2]));
+  operands[2] = const0_rtx;
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), bitvalue,
+					 operands[2]);
+})
+
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
+  [(set (pc) (if_then_else
+	      (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
+				     (const_int 1)
+				     (match_operand 1
+				       "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
 		   (const_int 0))
 	     (label_ref (match_operand 2 "" ""))
 	     (pc)))
@@ -959,15 +975,15 @@ (define_insn "*tb<optab><mode>1"
       {
 	if (get_attr_far_branch (insn) == 1)
 	  return aarch64_gen_far_branch (operands, 2, "Ltb",
-					 "<inv_tb>\\t%<w>0, %1, ");
+					 "<inv_tb>\\t%<ALLI:w>0, %1, ");
 	else
 	  {
 	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
-	    return "tst\t%<w>0, %1\;<bcond>\t%l2";
+	    return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
 	  }
       }
     else
-      return "<tbz>\t%<w>0, %1, %l2";
+      return "<tbz>\t%<ALLI:w>0, %1, %l2";
   }
   [(set_attr "type" "branch")
    (set (attr "length")
@@ -5752,39 +5768,19 @@ (define_expand "<optab>"
 )
 
 
-(define_insn "*<optab><mode>"
+(define_insn "*<optab><GPI:mode><ALLI:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-	(ANY_EXTRACT:GPI (match_operand:GPI 1 "register_operand" "r")
+	(ANY_EXTRACT:GPI (match_operand:ALLI 1 "register_operand" "r")
 			 (match_operand 2
-			   "aarch64_simd_shift_imm_offset_<mode>" "n")
+			   "aarch64_simd_shift_imm_offset_<ALLI:mode>" "n")
 			 (match_operand 3
-			   "aarch64_simd_shift_imm_<mode>" "n")))]
+			   "aarch64_simd_shift_imm_<ALLI:mode>" "n")))]
   "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]),
-	     1, GET_MODE_BITSIZE (<MODE>mode) - 1)"
-  "<su>bfx\\t%<w>0, %<w>1, %3, %2"
+	     1, GET_MODE_BITSIZE (<ALLI:MODE>mode))"
+  "<su>bfx\\t%<GPI:w>0, %<GPI:w>1, %3, %2"
   [(set_attr "type" "bfx")]
 )
 
-;; When the bit position and width add up to 32 we can use a W-reg LSR
-;; instruction taking advantage of the implicit zero-extension of the X-reg.
-(define_split
-  [(set (match_operand:DI 0 "register_operand")
-	(zero_extract:DI (match_operand:DI 1 "register_operand")
-			 (match_operand 2
-			   "aarch64_simd_shift_imm_offset_di")
-			 (match_operand 3
-			   "aarch64_simd_shift_imm_di")))]
-  "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]), 1,
-	     GET_MODE_BITSIZE (DImode) - 1)
-   && (INTVAL (operands[2]) + INTVAL (operands[3]))
-       == GET_MODE_BITSIZE (SImode)"
-  [(set (match_dup 0)
-	(zero_extend:DI (lshiftrt:SI (match_dup 4) (match_dup 3))))]
-  {
-    operands[4] = gen_lowpart (SImode, operands[1]);
-  }
-)
-
 ;; Bitfield Insert (insv)
 (define_expand "insv<mode>"
   [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549ce1a0cff6774463
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdbool.h>
+
+void h(void);
+
+/*
+** g1:
+** 	tbnz	x[0-9]+, #?0, .L([0-9]+)
+** 	ret
+**	...
+*/
+void g1(bool x)
+{
+  if (__builtin_expect (x, 0))
+    h ();
+}
+
+/*
+** g2:
+** 	tbz	x[0-9]+, #?0, .L([0-9]+)
+** 	b	h
+**	...
+*/
+void g2(bool x)
+{
+  if (__builtin_expect (x, 1))
+    h ();
+}
+
+/*
+** g3_ge:
+** 	tbnz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_ge(int x)
+{
+  if (__builtin_expect (x >= 0, 1))
+    h ();
+}
+
+/*
+** g3_gt:
+** 	cmp	w[0-9]+, 0
+** 	ble	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_gt(int x)
+{
+  if (__builtin_expect (x > 0, 1))
+    h ();
+}
+
+/*
+** g3_lt:
+** 	tbz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_lt(int x)
+{
+  if (__builtin_expect (x < 0, 1))
+    h ();
+}
+
+/*
+** g3_le:
+** 	cmp	w[0-9]+, 0
+** 	bgt	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_le(int x)
+{
+  if (__builtin_expect (x <= 0, 1))
+    h ();
+}
+
+/*
+** g5:
+** 	mov	w[0-9]+, 65279
+** 	tst	w[0-9]+, w[0-9]+
+** 	beq	.L[0-9]+
+** 	b	h
+**	...
+*/ 
+void g5(int x)
+{
+  if (__builtin_expect (x & 0xfeff, 1))
+    h ();
+}




^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH]AArch64 Extend umov and sbfx patterns.
  2022-10-31 11:53 [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations Tamar Christina
  2022-10-31 11:53 ` [PATCH 2/2]AArch64 Support new tbranch optab Tamar Christina
@ 2022-10-31 11:54 ` Tamar Christina
  2022-10-31 12:26   ` Richard Sandiford
  2022-10-31 21:16 ` [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations Jeff Law
  2 siblings, 1 reply; 33+ messages in thread
From: Tamar Christina @ 2022-10-31 11:54 UTC (permalink / raw)
  To: gcc-patches
  Cc: nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov,
	richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 7646 bytes --]

Hi All,

Our zero and sign extend and extract patterns are currently very limited and
only work for the original register size of the instructions. i.e. limited by
GPI patterns.  However these instructions extract bits and extend.  This means
that any register size can be used as an input as long as the extraction makes
logical sense.

The majority of the attached testcases fail currently to optimize.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* config/aarch64/aarch64-simd.md (aarch64_get_lane<mode>): Drop reload
	penalty.
	* config/aarch64/aarch64.md
	(*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>): Renamed to...
	(*<ANY_EXTEND:optab><GPI:mode>_ashl<ALLX:mode>): ...this.
	(*zero_extend<GPI:mode>_lshr<SHORT:mode>): Renamed to...
	(*zero_extend<GPI:mode>_<optab><ALLX:mode>): ...this.
	(*extend<GPI:mode>_ashr<SHORT:mode>): Rename to...
	(*extend<GPI:mode>_<optab><ALLX:mode>): ...this.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/bitmove_1.c: New test.
	* gcc.target/aarch64/bitmove_2.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 8bcc9e76b1cad4a2591fb176175db72d7a190d57..23909c62638b49722568da4555b33c71fd21337e 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4259,7 +4259,7 @@ (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQV_L:mode>"
 ;; Extracting lane zero is split into a simple move when it is between SIMD
 ;; registers or a store.
 (define_insn_and_split "aarch64_get_lane<mode>"
-  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
+  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
 	(vec_select:<VEL>
 	  (match_operand:VALL_F16_FULL 1 "register_operand" "w, w, w")
 	  (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 85b400489cb382a01b0c469eff2b600a93805e31..3116feda4fe54e2a21dc3f990b6976d216874260 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5629,13 +5629,13 @@ (define_insn "*<optab>si3_insn2_uxtw"
 )
 
 (define_insn "*<optab><mode>3_insn"
-  [(set (match_operand:SHORT 0 "register_operand" "=r")
-	(ASHIFT:SHORT (match_operand:SHORT 1 "register_operand" "r")
+  [(set (match_operand:ALLI 0 "register_operand" "=r")
+	(ASHIFT:ALLI (match_operand:ALLI 1 "register_operand" "r")
 		      (match_operand 2 "const_int_operand" "n")))]
   "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)"
 {
   operands[3] = GEN_INT (<sizen> - UINTVAL (operands[2]));
-  return "<bfshift>\t%w0, %w1, %2, %3";
+  return "<bfshift>\t%<w>0, %<w>1, %2, %3";
 }
   [(set_attr "type" "bfx")]
 )
@@ -5710,40 +5710,40 @@ (define_insn "*extrsi5_insn_di"
   [(set_attr "type" "rotate_imm")]
 )
 
-(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>"
+(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<ALLX:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(ANY_EXTEND:GPI
-	 (ashift:SHORT (match_operand:SHORT 1 "register_operand" "r")
+	 (ashift:ALLX (match_operand:ALLX 1 "register_operand" "r")
 		       (match_operand 2 "const_int_operand" "n"))))]
-  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
+  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
 {
-  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
+  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
   return "<su>bfiz\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
   [(set_attr "type" "bfx")]
 )
 
-(define_insn "*zero_extend<GPI:mode>_lshr<SHORT:mode>"
+(define_insn "*zero_extend<GPI:mode>_<optab><ALLX:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(zero_extend:GPI
-	 (lshiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r")
-			 (match_operand 2 "const_int_operand" "n"))))]
-  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
+	 (LSHIFTRT_ONLY:ALLX (match_operand:ALLX 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand" "n"))))]
+  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
 {
-  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
+  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
   return "ubfx\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
   [(set_attr "type" "bfx")]
 )
 
-(define_insn "*extend<GPI:mode>_ashr<SHORT:mode>"
+(define_insn "*extend<GPI:mode>_<optab><ALLX:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(sign_extend:GPI
-	 (ashiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r")
-			 (match_operand 2 "const_int_operand" "n"))))]
-  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
+	 (ASHIFTRT_ONLY:ALLX (match_operand:ALLX 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand" "n"))))]
+  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
 {
-  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
+  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
   return "sbfx\\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
   [(set_attr "type" "bfx")]
diff --git a/gcc/testsuite/gcc.target/aarch64/bitmove_1.c b/gcc/testsuite/gcc.target/aarch64/bitmove_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..8b0aa8af49cd070928bacc4995a321c7bfde58a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bitmove_1.c
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -std=c99" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdint.h>
+
+/*
+** sfoo6:
+** 	asr	x0, x0, 16
+** 	ret
+*/
+int64_t sfoo6 (int32_t x)
+{
+  return x >> 16;
+}
+
+/*
+** ufoo6:
+** 	lsr	w0, w0, 30
+** 	ret
+*/
+uint64_t ufoo6 (uint32_t x)
+{
+  return x >> 30;
+}
+
+/*
+** ufoo6s:
+** 	ubfx	w0, w0, 7, 9
+** 	ret
+*/
+uint32_t ufoo6s (uint16_t x)
+{
+  return x >> 7;
+}
+
+/*
+** ufoo6h:
+** 	ubfx	w0, w0, 4, 4
+** 	ret
+*/
+uint16_t ufoo6h (uint8_t x)
+{
+  return x >> 4;
+}
+
+/*
+** sfoo62:
+** 	asr	x0, x0, 10
+** 	ret
+*/
+int64_t sfoo62 (int32_t x)
+{
+  return x >> 10;
+}
+
+/*
+** ufoo62:
+** 	lsr	w0, w0, 10
+** 	ret
+*/
+uint64_t ufoo62 (uint32_t x)
+{
+  return x >> 10;
+}
+
+/*
+** sfoo63:
+** 	asr	x0, x0, 10
+** 	ret
+*/
+int64_t sfoo63 (int32_t x)
+{
+  return x >> 10;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/bitmove_2.c b/gcc/testsuite/gcc.target/aarch64/bitmove_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..54b3071a3b4e2001f83337837e712c381683d23a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bitmove_2.c
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -std=c99" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdint.h>
+
+/*
+** sfoo6:
+** 	sbfiz	x0, x0, 16, 16
+** 	ret
+*/
+int64_t sfoo6 (int32_t x)
+{
+  return x << 16;
+}
+
+/*
+** ufoo6:
+** 	lsl	w0, w0, 30
+** 	ret
+*/
+uint64_t ufoo6 (uint32_t x)
+{
+  return x << 30;
+}
+
+/*
+** ufoo6s:
+** 	ubfiz	w0, w0, 7, 16
+** 	ret
+*/
+uint32_t ufoo6s (uint16_t x)
+{
+  return x << 7;
+}
+
+/*
+** ufoo6h:
+** 	uxtb	w0, w0
+** 	ubfiz	w0, w0, 4, 12
+** 	ret
+*/
+uint16_t ufoo6h (uint8_t x)
+{
+  return x << 4;
+}
+
+/*
+** sfoo62:
+** 	sbfiz	x0, x0, 10, 22
+** 	ret
+*/
+int64_t sfoo62 (int32_t x)
+{
+  return x << 10;
+}
+
+/*
+** ufoo62:
+** 	lsl	w0, w0, 10
+** 	ret
+*/
+uint64_t ufoo62 (uint32_t x)
+{
+  return x << 10;
+}
+
+/*
+** sfoo63:
+** 	sbfiz	x0, x0, 10, 22
+** 	ret
+*/
+int64_t sfoo63 (int32_t x)
+{
+  return x << 10;
+}




-- 

[-- Attachment #2: rb16488.patch --]
[-- Type: text/plain, Size: 6526 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 8bcc9e76b1cad4a2591fb176175db72d7a190d57..23909c62638b49722568da4555b33c71fd21337e 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4259,7 +4259,7 @@ (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQV_L:mode>"
 ;; Extracting lane zero is split into a simple move when it is between SIMD
 ;; registers or a store.
 (define_insn_and_split "aarch64_get_lane<mode>"
-  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
+  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
 	(vec_select:<VEL>
 	  (match_operand:VALL_F16_FULL 1 "register_operand" "w, w, w")
 	  (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 85b400489cb382a01b0c469eff2b600a93805e31..3116feda4fe54e2a21dc3f990b6976d216874260 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5629,13 +5629,13 @@ (define_insn "*<optab>si3_insn2_uxtw"
 )
 
 (define_insn "*<optab><mode>3_insn"
-  [(set (match_operand:SHORT 0 "register_operand" "=r")
-	(ASHIFT:SHORT (match_operand:SHORT 1 "register_operand" "r")
+  [(set (match_operand:ALLI 0 "register_operand" "=r")
+	(ASHIFT:ALLI (match_operand:ALLI 1 "register_operand" "r")
 		      (match_operand 2 "const_int_operand" "n")))]
   "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)"
 {
   operands[3] = GEN_INT (<sizen> - UINTVAL (operands[2]));
-  return "<bfshift>\t%w0, %w1, %2, %3";
+  return "<bfshift>\t%<w>0, %<w>1, %2, %3";
 }
   [(set_attr "type" "bfx")]
 )
@@ -5710,40 +5710,40 @@ (define_insn "*extrsi5_insn_di"
   [(set_attr "type" "rotate_imm")]
 )
 
-(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>"
+(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<ALLX:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(ANY_EXTEND:GPI
-	 (ashift:SHORT (match_operand:SHORT 1 "register_operand" "r")
+	 (ashift:ALLX (match_operand:ALLX 1 "register_operand" "r")
 		       (match_operand 2 "const_int_operand" "n"))))]
-  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
+  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
 {
-  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
+  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
   return "<su>bfiz\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
   [(set_attr "type" "bfx")]
 )
 
-(define_insn "*zero_extend<GPI:mode>_lshr<SHORT:mode>"
+(define_insn "*zero_extend<GPI:mode>_<optab><ALLX:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(zero_extend:GPI
-	 (lshiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r")
-			 (match_operand 2 "const_int_operand" "n"))))]
-  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
+	 (LSHIFTRT_ONLY:ALLX (match_operand:ALLX 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand" "n"))))]
+  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
 {
-  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
+  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
   return "ubfx\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
   [(set_attr "type" "bfx")]
 )
 
-(define_insn "*extend<GPI:mode>_ashr<SHORT:mode>"
+(define_insn "*extend<GPI:mode>_<optab><ALLX:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(sign_extend:GPI
-	 (ashiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r")
-			 (match_operand 2 "const_int_operand" "n"))))]
-  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
+	 (ASHIFTRT_ONLY:ALLX (match_operand:ALLX 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand" "n"))))]
+  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
 {
-  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
+  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
   return "sbfx\\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
   [(set_attr "type" "bfx")]
diff --git a/gcc/testsuite/gcc.target/aarch64/bitmove_1.c b/gcc/testsuite/gcc.target/aarch64/bitmove_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..8b0aa8af49cd070928bacc4995a321c7bfde58a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bitmove_1.c
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -std=c99" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdint.h>
+
+/*
+** sfoo6:
+** 	asr	x0, x0, 16
+** 	ret
+*/
+int64_t sfoo6 (int32_t x)
+{
+  return x >> 16;
+}
+
+/*
+** ufoo6:
+** 	lsr	w0, w0, 30
+** 	ret
+*/
+uint64_t ufoo6 (uint32_t x)
+{
+  return x >> 30;
+}
+
+/*
+** ufoo6s:
+** 	ubfx	w0, w0, 7, 9
+** 	ret
+*/
+uint32_t ufoo6s (uint16_t x)
+{
+  return x >> 7;
+}
+
+/*
+** ufoo6h:
+** 	ubfx	w0, w0, 4, 4
+** 	ret
+*/
+uint16_t ufoo6h (uint8_t x)
+{
+  return x >> 4;
+}
+
+/*
+** sfoo62:
+** 	asr	x0, x0, 10
+** 	ret
+*/
+int64_t sfoo62 (int32_t x)
+{
+  return x >> 10;
+}
+
+/*
+** ufoo62:
+** 	lsr	w0, w0, 10
+** 	ret
+*/
+uint64_t ufoo62 (uint32_t x)
+{
+  return x >> 10;
+}
+
+/*
+** sfoo63:
+** 	asr	x0, x0, 10
+** 	ret
+*/
+int64_t sfoo63 (int32_t x)
+{
+  return x >> 10;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/bitmove_2.c b/gcc/testsuite/gcc.target/aarch64/bitmove_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..54b3071a3b4e2001f83337837e712c381683d23a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bitmove_2.c
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -std=c99" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdint.h>
+
+/*
+** sfoo6:
+** 	sbfiz	x0, x0, 16, 16
+** 	ret
+*/
+int64_t sfoo6 (int32_t x)
+{
+  return x << 16;
+}
+
+/*
+** ufoo6:
+** 	lsl	w0, w0, 30
+** 	ret
+*/
+uint64_t ufoo6 (uint32_t x)
+{
+  return x << 30;
+}
+
+/*
+** ufoo6s:
+** 	ubfiz	w0, w0, 7, 16
+** 	ret
+*/
+uint32_t ufoo6s (uint16_t x)
+{
+  return x << 7;
+}
+
+/*
+** ufoo6h:
+** 	uxtb	w0, w0
+** 	ubfiz	w0, w0, 4, 12
+** 	ret
+*/
+uint16_t ufoo6h (uint8_t x)
+{
+  return x << 4;
+}
+
+/*
+** sfoo62:
+** 	sbfiz	x0, x0, 10, 22
+** 	ret
+*/
+int64_t sfoo62 (int32_t x)
+{
+  return x << 10;
+}
+
+/*
+** ufoo62:
+** 	lsl	w0, w0, 10
+** 	ret
+*/
+uint64_t ufoo62 (uint32_t x)
+{
+  return x << 10;
+}
+
+/*
+** sfoo63:
+** 	sbfiz	x0, x0, 10, 22
+** 	ret
+*/
+int64_t sfoo63 (int32_t x)
+{
+  return x << 10;
+}




^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH]AArch64 Extend umov and sbfx patterns.
  2022-10-31 11:54 ` [PATCH]AArch64 Extend umov and sbfx patterns Tamar Christina
@ 2022-10-31 12:26   ` Richard Sandiford
  2022-11-11 14:42     ` Tamar Christina
  0 siblings, 1 reply; 33+ messages in thread
From: Richard Sandiford @ 2022-10-31 12:26 UTC (permalink / raw)
  To: Tamar Christina
  Cc: gcc-patches, nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov

Tamar Christina <tamar.christina@arm.com> writes:
> Hi All,
>
> Our zero and sign extend and extract patterns are currently very limited and
> only work for the original register size of the instructions. i.e. limited by
> GPI patterns.  However these instructions extract bits and extend.  This means
> that any register size can be used as an input as long as the extraction makes
> logical sense.
>
> The majority of the attached testcases fail currently to optimize.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	* config/aarch64/aarch64-simd.md (aarch64_get_lane<mode>): Drop reload
> 	penalty.
> 	* config/aarch64/aarch64.md
> 	(*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>): Renamed to...
> 	(*<ANY_EXTEND:optab><GPI:mode>_ashl<ALLX:mode>): ...this.
> 	(*zero_extend<GPI:mode>_lshr<SHORT:mode>): Renamed to...
> 	(*zero_extend<GPI:mode>_<optab><ALLX:mode>): ...this.
> 	(*extend<GPI:mode>_ashr<SHORT:mode>): Rename to...
> 	(*extend<GPI:mode>_<optab><ALLX:mode>): ...this.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/aarch64/bitmove_1.c: New test.
> 	* gcc.target/aarch64/bitmove_2.c: New test.

Looks like a nice change, but some comments below.

>
> --- inline copy of patch -- 
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index 8bcc9e76b1cad4a2591fb176175db72d7a190d57..23909c62638b49722568da4555b33c71fd21337e 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -4259,7 +4259,7 @@ (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQV_L:mode>"
>  ;; Extracting lane zero is split into a simple move when it is between SIMD
>  ;; registers or a store.
>  (define_insn_and_split "aarch64_get_lane<mode>"
> -  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
> +  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
>  	(vec_select:<VEL>
>  	  (match_operand:VALL_F16_FULL 1 "register_operand" "w, w, w")
>  	  (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]

Which testcase does this help with?  It didn't look like the new tests
do any vector stuff.

> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 85b400489cb382a01b0c469eff2b600a93805e31..3116feda4fe54e2a21dc3f990b6976d216874260 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -5629,13 +5629,13 @@ (define_insn "*<optab>si3_insn2_uxtw"
>  )
>  
>  (define_insn "*<optab><mode>3_insn"
> -  [(set (match_operand:SHORT 0 "register_operand" "=r")
> -	(ASHIFT:SHORT (match_operand:SHORT 1 "register_operand" "r")
> +  [(set (match_operand:ALLI 0 "register_operand" "=r")
> +	(ASHIFT:ALLI (match_operand:ALLI 1 "register_operand" "r")
>  		      (match_operand 2 "const_int_operand" "n")))]
>    "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)"
>  {
>    operands[3] = GEN_INT (<sizen> - UINTVAL (operands[2]));
> -  return "<bfshift>\t%w0, %w1, %2, %3";
> +  return "<bfshift>\t%<w>0, %<w>1, %2, %3";
>  }
>    [(set_attr "type" "bfx")]
>  )

Similar question here I guess.  There's a separate pattern for SI and DI
shifts, so I wouldn't have expected this to be necessary.

> @@ -5710,40 +5710,40 @@ (define_insn "*extrsi5_insn_di"
>    [(set_attr "type" "rotate_imm")]
>  )
>  
> -(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>"
> +(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<ALLX:mode>"
>    [(set (match_operand:GPI 0 "register_operand" "=r")
>  	(ANY_EXTEND:GPI
> -	 (ashift:SHORT (match_operand:SHORT 1 "register_operand" "r")
> +	 (ashift:ALLX (match_operand:ALLX 1 "register_operand" "r")
>  		       (match_operand 2 "const_int_operand" "n"))))]
> -  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
> +  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"

It'd be better to avoid even defining si<-si or si<-di "extensions"
(even though nothing should try to match them), so how about adding:

  <GPI:sizen> > <ALLX:sizen> && 

or similar to the beginning of the condition?  The conditions for
the invalid combos will then be provably false at compile time and
the patterns will be compiled out.

Same comment for the others.

>  {
> -  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
> +  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
>    return "<su>bfiz\t%<GPI:w>0, %<GPI:w>1, %2, %3";
>  }
>    [(set_attr "type" "bfx")]
>  )
>  
> -(define_insn "*zero_extend<GPI:mode>_lshr<SHORT:mode>"
> +(define_insn "*zero_extend<GPI:mode>_<optab><ALLX:mode>"
>    [(set (match_operand:GPI 0 "register_operand" "=r")
>  	(zero_extend:GPI
> -	 (lshiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r")
> -			 (match_operand 2 "const_int_operand" "n"))))]
> -  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
> +	 (LSHIFTRT_ONLY:ALLX (match_operand:ALLX 1 "register_operand" "r")
> +			     (match_operand 2 "const_int_operand" "n"))))]
> +  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
>  {
> -  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
> +  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
>    return "ubfx\t%<GPI:w>0, %<GPI:w>1, %2, %3";
>  }
>    [(set_attr "type" "bfx")]
>  )

I think it'd better to stick to the hard-coded lshiftrt, since nothing
in the asm template is parameterised by the operation.  Using single-item
iterators is only really useful for "@" patterns.

Likewise for the ashiftrt pattern.

Thanks,
Richard

> -(define_insn "*extend<GPI:mode>_ashr<SHORT:mode>"
> +(define_insn "*extend<GPI:mode>_<optab><ALLX:mode>"
>    [(set (match_operand:GPI 0 "register_operand" "=r")
>  	(sign_extend:GPI
> -	 (ashiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r")
> -			 (match_operand 2 "const_int_operand" "n"))))]
> -  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
> +	 (ASHIFTRT_ONLY:ALLX (match_operand:ALLX 1 "register_operand" "r")
> +			     (match_operand 2 "const_int_operand" "n"))))]
> +  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
>  {
> -  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
> +  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
>    return "sbfx\\t%<GPI:w>0, %<GPI:w>1, %2, %3";
>  }
>    [(set_attr "type" "bfx")]
> diff --git a/gcc/testsuite/gcc.target/aarch64/bitmove_1.c b/gcc/testsuite/gcc.target/aarch64/bitmove_1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..8b0aa8af49cd070928bacc4995a321c7bfde58a6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bitmove_1.c
> @@ -0,0 +1,76 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O3 -std=c99" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
> +
> +#include <stdint.h>
> +
> +/*
> +** sfoo6:
> +** 	asr	x0, x0, 16
> +** 	ret
> +*/
> +int64_t sfoo6 (int32_t x)
> +{
> +  return x >> 16;
> +}
> +
> +/*
> +** ufoo6:
> +** 	lsr	w0, w0, 30
> +** 	ret
> +*/
> +uint64_t ufoo6 (uint32_t x)
> +{
> +  return x >> 30;
> +}
> +
> +/*
> +** ufoo6s:
> +** 	ubfx	w0, w0, 7, 9
> +** 	ret
> +*/
> +uint32_t ufoo6s (uint16_t x)
> +{
> +  return x >> 7;
> +}
> +
> +/*
> +** ufoo6h:
> +** 	ubfx	w0, w0, 4, 4
> +** 	ret
> +*/
> +uint16_t ufoo6h (uint8_t x)
> +{
> +  return x >> 4;
> +}
> +
> +/*
> +** sfoo62:
> +** 	asr	x0, x0, 10
> +** 	ret
> +*/
> +int64_t sfoo62 (int32_t x)
> +{
> +  return x >> 10;
> +}
> +
> +/*
> +** ufoo62:
> +** 	lsr	w0, w0, 10
> +** 	ret
> +*/
> +uint64_t ufoo62 (uint32_t x)
> +{
> +  return x >> 10;
> +}
> +
> +/*
> +** sfoo63:
> +** 	asr	x0, x0, 10
> +** 	ret
> +*/
> +int64_t sfoo63 (int32_t x)
> +{
> +  return x >> 10;
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/bitmove_2.c b/gcc/testsuite/gcc.target/aarch64/bitmove_2.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..54b3071a3b4e2001f83337837e712c381683d23a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bitmove_2.c
> @@ -0,0 +1,76 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O3 -std=c99" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
> +
> +#include <stdint.h>
> +
> +/*
> +** sfoo6:
> +** 	sbfiz	x0, x0, 16, 16
> +** 	ret
> +*/
> +int64_t sfoo6 (int32_t x)
> +{
> +  return x << 16;
> +}
> +
> +/*
> +** ufoo6:
> +** 	lsl	w0, w0, 30
> +** 	ret
> +*/
> +uint64_t ufoo6 (uint32_t x)
> +{
> +  return x << 30;
> +}
> +
> +/*
> +** ufoo6s:
> +** 	ubfiz	w0, w0, 7, 16
> +** 	ret
> +*/
> +uint32_t ufoo6s (uint16_t x)
> +{
> +  return x << 7;
> +}
> +
> +/*
> +** ufoo6h:
> +** 	uxtb	w0, w0
> +** 	ubfiz	w0, w0, 4, 12
> +** 	ret
> +*/
> +uint16_t ufoo6h (uint8_t x)
> +{
> +  return x << 4;
> +}
> +
> +/*
> +** sfoo62:
> +** 	sbfiz	x0, x0, 10, 22
> +** 	ret
> +*/
> +int64_t sfoo62 (int32_t x)
> +{
> +  return x << 10;
> +}
> +
> +/*
> +** ufoo62:
> +** 	lsl	w0, w0, 10
> +** 	ret
> +*/
> +uint64_t ufoo62 (uint32_t x)
> +{
> +  return x << 10;
> +}
> +
> +/*
> +** sfoo63:
> +** 	sbfiz	x0, x0, 10, 22
> +** 	ret
> +*/
> +int64_t sfoo63 (int32_t x)
> +{
> +  return x << 10;
> +}

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations
  2022-10-31 11:53 [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations Tamar Christina
  2022-10-31 11:53 ` [PATCH 2/2]AArch64 Support new tbranch optab Tamar Christina
  2022-10-31 11:54 ` [PATCH]AArch64 Extend umov and sbfx patterns Tamar Christina
@ 2022-10-31 21:16 ` Jeff Law
  2022-11-01 15:53   ` Tamar Christina
  2 siblings, 1 reply; 33+ messages in thread
From: Jeff Law @ 2022-10-31 21:16 UTC (permalink / raw)
  To: Tamar Christina, gcc-patches; +Cc: nd, rguenther


On 10/31/22 05:53, Tamar Christina wrote:
> Hi All,
>
> This adds a new test-and-branch optab that can be used to do a conditional test
> of a bit and branch.   This is similar to the cbranch optab but instead can
> test any arbitrary bit inside the register.
>
> This patch recognizes boolean comparisons and single bit mask tests.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	* dojump.cc (do_jump): Pass along value.
> 	(do_jump_by_parts_greater_rtx): Likewise.
> 	(do_jump_by_parts_zero_rtx): Likewise.
> 	(do_jump_by_parts_equality_rtx): Likewise.
> 	(do_compare_rtx_and_jump): Likewise.
> 	(do_compare_and_jump): Likewise.
> 	* dojump.h (do_compare_rtx_and_jump): New.
> 	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
> 	(validate_test_and_branch): New.
> 	(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
> 	supplied then check if it's suitable for tbranch.
> 	* optabs.def (tbranch$a4): New.
> 	* doc/md.texi (tbranch@var{mode}4): Document it.
> 	* optabs.h (emit_cmp_and_jump_insns):
> 	* tree.h (tree_zero_one_valued_p): New.
>
> --- inline copy of patch --
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index c08691ab4c9a4bfe55ae81e5e228a414d6242d78..f8b32ec12f46d3fb3815f121a16b5a8a1819b66a 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -6972,6 +6972,13 @@ case, you can and should make operand 1's predicate reject some operators
>   in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
>   from the machine description.
>   
> +@cindex @code{tbranch@var{mode}4} instruction pattern
> +@item @samp{tbranch@var{mode}4}
> +Conditional branch instruction combined with a bit test-and-compare
> +instruction. Operand 0 is a comparison operator.  Operand 1 is the
> +operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
> +Operand 3 is the @code{code_label} to jump to.

Should we refine/document the set of comparison operators allowed?    Is 
operand 1 an arbitrary RTL expression or more limited?  I'm guessing its 
relatively arbitrary given how you've massaged the existing 
branch-on-bit patterns from the aarch backend.


> +
> +  if (TREE_CODE (val) != SSA_NAME)
> +    return false;
> +
> +  gimple *def = SSA_NAME_DEF_STMT (val);
> +  if (!is_gimple_assign (def)
> +      || gimple_assign_rhs_code (def) != BIT_AND_EXPR)
> +    return false;
> +
> +  tree cst = gimple_assign_rhs2 (def);
> +
> +  if (!tree_fits_uhwi_p (cst))
> +    return false;
> +
> +  tree op0 = gimple_assign_rhs1 (def);
> +  if (TREE_CODE (op0) == SSA_NAME)
> +    {
> +      def = SSA_NAME_DEF_STMT (op0);
> +      if (gimple_assign_cast_p (def))
> +	op0 = gimple_assign_rhs1 (def);
> +    }
> +
> +  wide_int wcst = wi::uhwi (tree_to_uhwi (cst),
> +			    TYPE_PRECISION (TREE_TYPE (op0)));
> +  int bitpos;
> +
> +  if ((bitpos = wi::exact_log2 (wcst)) == -1)
> +    return false;

Do we have enough information lying around from Ranger to avoid the need 
to walk the def-use chain to discover that we're masking off all but one 
bit?



>   
>
> diff --git a/gcc/tree.h b/gcc/tree.h
> index 8f8a9660c9e0605eb516de194640b8c1b531b798..be3d2dee82f692e81082cf21c878c10f9fe9e1f1 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -4690,6 +4690,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
>   extern tree signed_type_for (tree);
>   extern tree unsigned_type_for (tree);
>   extern bool is_truth_type_for (tree, tree);
> +extern bool tree_zero_one_valued_p (tree);

I don't see a definition of this anywhere.


jeff



^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations
  2022-10-31 21:16 ` [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations Jeff Law
@ 2022-11-01 15:53   ` Tamar Christina
  2022-11-01 17:00     ` Jeff Law
  0 siblings, 1 reply; 33+ messages in thread
From: Tamar Christina @ 2022-11-01 15:53 UTC (permalink / raw)
  To: Jeff Law, gcc-patches; +Cc: nd, rguenther

> -----Original Message-----
> From: Jeff Law <jeffreyalaw@gmail.com>
> Sent: Monday, October 31, 2022 9:16 PM
> To: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org
> Cc: nd <nd@arm.com>; rguenther@suse.de
> Subject: Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support
> for bit-test-and-branch operations
> 
> 
> On 10/31/22 05:53, Tamar Christina wrote:
> > Hi All,
> >
> > This adds a new test-and-branch optab that can be used to do a conditional
> test
> > of a bit and branch.   This is similar to the cbranch optab but instead can
> > test any arbitrary bit inside the register.
> >
> > This patch recognizes boolean comparisons and single bit mask tests.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > 	* dojump.cc (do_jump): Pass along value.
> > 	(do_jump_by_parts_greater_rtx): Likewise.
> > 	(do_jump_by_parts_zero_rtx): Likewise.
> > 	(do_jump_by_parts_equality_rtx): Likewise.
> > 	(do_compare_rtx_and_jump): Likewise.
> > 	(do_compare_and_jump): Likewise.
> > 	* dojump.h (do_compare_rtx_and_jump): New.
> > 	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab
> to check.
> > 	(validate_test_and_branch): New.
> > 	(emit_cmp_and_jump_insns): Optiobally take a value, and when
> value is
> > 	supplied then check if it's suitable for tbranch.
> > 	* optabs.def (tbranch$a4): New.
> > 	* doc/md.texi (tbranch@var{mode}4): Document it.
> > 	* optabs.h (emit_cmp_and_jump_insns):
> > 	* tree.h (tree_zero_one_valued_p): New.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index
> >
> c08691ab4c9a4bfe55ae81e5e228a414d6242d78..f8b32ec12f46d3fb3815f121a1
> 6b
> > 5a8a1819b66a 100644
> > --- a/gcc/doc/md.texi
> > +++ b/gcc/doc/md.texi
> > @@ -6972,6 +6972,13 @@ case, you can and should make operand 1's
> predicate reject some operators
> >   in the @samp{cstore@var{mode}4} pattern, or remove the pattern
> altogether
> >   from the machine description.
> >
> > +@cindex @code{tbranch@var{mode}4} instruction pattern @item
> > +@samp{tbranch@var{mode}4} Conditional branch instruction combined
> > +with a bit test-and-compare instruction. Operand 0 is a comparison
> > +operator.  Operand 1 is the operand of the comparison. Operand 2 is
> > +the bit position of Operand 1 to test.
> > +Operand 3 is the @code{code_label} to jump to.
> 
> Should we refine/document the set of comparison operators allowed?    Is
> operand 1 an arbitrary RTL expression or more limited?  I'm guessing its
> relatively arbitrary given how you've massaged the existing branch-on-bit
> patterns from the aarch backend.

It can be any expression in theory. However in practical terms we usually force
the values to registers before calling the expansion.  My assumption is that this
is for CSE purposes but that's only a guess.

> 
> 
> > +
> > +  if (TREE_CODE (val) != SSA_NAME)
> > +    return false;
> > +
> > +  gimple *def = SSA_NAME_DEF_STMT (val);  if (!is_gimple_assign (def)
> > +      || gimple_assign_rhs_code (def) != BIT_AND_EXPR)
> > +    return false;
> > +
> > +  tree cst = gimple_assign_rhs2 (def);
> > +
> > +  if (!tree_fits_uhwi_p (cst))
> > +    return false;
> > +
> > +  tree op0 = gimple_assign_rhs1 (def);
> > +  if (TREE_CODE (op0) == SSA_NAME)
> > +    {
> > +      def = SSA_NAME_DEF_STMT (op0);
> > +      if (gimple_assign_cast_p (def))
> > +	op0 = gimple_assign_rhs1 (def);
> > +    }
> > +
> > +  wide_int wcst = wi::uhwi (tree_to_uhwi (cst),
> > +			    TYPE_PRECISION (TREE_TYPE (op0)));
> > +  int bitpos;
> > +
> > +  if ((bitpos = wi::exact_log2 (wcst)) == -1)
> > +    return false;
> 
> Do we have enough information lying around from Ranger to avoid the need
> to walk the def-use chain to discover that we're masking off all but one bit?
> 

That's an interesting thought.  I'll try to see if I can figure out how to query
Ranger here.  It would be nice to do so here.

Cheers,
Tamar

> 
> 
> >
> >
> > diff --git a/gcc/tree.h b/gcc/tree.h
> > index
> >
> 8f8a9660c9e0605eb516de194640b8c1b531b798..be3d2dee82f692e81082cf21c
> 878
> > c10f9fe9e1f1 100644
> > --- a/gcc/tree.h
> > +++ b/gcc/tree.h
> > @@ -4690,6 +4690,7 @@ extern tree signed_or_unsigned_type_for (int,
> tree);
> >   extern tree signed_type_for (tree);
> >   extern tree unsigned_type_for (tree);
> >   extern bool is_truth_type_for (tree, tree);
> > +extern bool tree_zero_one_valued_p (tree);
> 
> I don't see a definition of this anywhere.
> 
> 
> jeff
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations
  2022-11-01 15:53   ` Tamar Christina
@ 2022-11-01 17:00     ` Jeff Law
  2022-11-02  9:55       ` Tamar Christina
  0 siblings, 1 reply; 33+ messages in thread
From: Jeff Law @ 2022-11-01 17:00 UTC (permalink / raw)
  To: Tamar Christina, gcc-patches; +Cc: nd, rguenther


On 11/1/22 09:53, Tamar Christina wrote:
>>
>>>    from the machine description.
>>>
>>> +@cindex @code{tbranch@var{mode}4} instruction pattern @item
>>> +@samp{tbranch@var{mode}4} Conditional branch instruction combined
>>> +with a bit test-and-compare instruction. Operand 0 is a comparison
>>> +operator.  Operand 1 is the operand of the comparison. Operand 2 is
>>> +the bit position of Operand 1 to test.
>>> +Operand 3 is the @code{code_label} to jump to.
>> Should we refine/document the set of comparison operators allowed?    Is
>> operand 1 an arbitrary RTL expression or more limited?  I'm guessing its
>> relatively arbitrary given how you've massaged the existing branch-on-bit
>> patterns from the aarch backend.
> It can be any expression in theory. However in practical terms we usually force
> the values to registers before calling the expansion.  My assumption is that this
> is for CSE purposes but that's only a guess.

Understood.  And generally yes, forcing expressions into regs is good 
for CSE.


>
>> Do we have enough information lying around from Ranger to avoid the need
>> to walk the def-use chain to discover that we're masking off all but one bit?
>>
> That's an interesting thought.  I'll try to see if I can figure out how to query
> Ranger here.  It would be nice to do so here.

Reach out to Aldy, I suspect he can probably give you the necessary 
pseudocode pretty quickly.


Jeff



^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations
  2022-11-01 17:00     ` Jeff Law
@ 2022-11-02  9:55       ` Tamar Christina
  2022-11-02 11:08         ` Aldy Hernandez
  0 siblings, 1 reply; 33+ messages in thread
From: Tamar Christina @ 2022-11-02  9:55 UTC (permalink / raw)
  To: Jeff Law, gcc-patches; +Cc: nd, rguenther, aldyh

Hi Aldy,

I'm trying to use Ranger to determine if a range of an expression is a single bit.

If possible in case of a mask then also the position of the bit that's being checked by the mask (or the mask itself).

Do you have any pointers/existing code I can look at to do this?

Kind regards,
Tamar

> -----Original Message-----
> From: Jeff Law <jeffreyalaw@gmail.com>
> Sent: Tuesday, November 1, 2022 5:00 PM
> To: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org
> Cc: nd <nd@arm.com>; rguenther@suse.de
> Subject: Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support
> for bit-test-and-branch operations
> 
> 
> On 11/1/22 09:53, Tamar Christina wrote:
> >>
> >>>    from the machine description.
> >>>
> >>> +@cindex @code{tbranch@var{mode}4} instruction pattern @item
> >>> +@samp{tbranch@var{mode}4} Conditional branch instruction
> combined
> >>> +with a bit test-and-compare instruction. Operand 0 is a comparison
> >>> +operator.  Operand 1 is the operand of the comparison. Operand 2 is
> >>> +the bit position of Operand 1 to test.
> >>> +Operand 3 is the @code{code_label} to jump to.
> >> Should we refine/document the set of comparison operators allowed?
> >> Is operand 1 an arbitrary RTL expression or more limited?  I'm
> >> guessing its relatively arbitrary given how you've massaged the
> >> existing branch-on-bit patterns from the aarch backend.
> > It can be any expression in theory. However in practical terms we
> > usually force the values to registers before calling the expansion.
> > My assumption is that this is for CSE purposes but that's only a guess.
> 
> Understood.  And generally yes, forcing expressions into regs is good for CSE.
> 
> 
> >
> >> Do we have enough information lying around from Ranger to avoid the
> need
> >> to walk the def-use chain to discover that we're masking off all but one
> bit?
> >>
> > That's an interesting thought.  I'll try to see if I can figure out how to query
> > Ranger here.  It would be nice to do so here.
> 
> Reach out to Aldy, I suspect he can probably give you the necessary
> pseudocode pretty quickly.
> 
> 
> Jeff
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations
  2022-11-02  9:55       ` Tamar Christina
@ 2022-11-02 11:08         ` Aldy Hernandez
  2022-11-05 14:23           ` Richard Biener
  0 siblings, 1 reply; 33+ messages in thread
From: Aldy Hernandez @ 2022-11-02 11:08 UTC (permalink / raw)
  To: Tamar Christina; +Cc: Jeff Law, gcc-patches, nd, rguenther, MacLeod, Andrew

On Wed, Nov 2, 2022 at 10:55 AM Tamar Christina <Tamar.Christina@arm.com> wrote:
>
> Hi Aldy,
>
> I'm trying to use Ranger to determine if a range of an expression is a single bit.
>
> If possible in case of a mask then also the position of the bit that's being checked by the mask (or the mask itself).

Just instantiate a ranger, and ask for the range of an SSA name (or an
arbitrary tree expression) at a particular gimple statement (or an
edge):

gimple_ranger ranger;
int_range_max r;
if (ranger.range_of_expr (r, <SSA_NAME>, <STMT>)) {
  // do stuff with range "r"
  if (r.singleton_p ()) {
    wide_int num = r.lower_bound ();
    // Check the bits in NUM, etc...
  }
}

You can see the full ranger API in gimple-range.h.

Note that instantiating a new ranger is relatively lightweight, but
it's not free.  So unless you're calling range_of_expr sporadically,
you probably want to have one instance for your pass.  You can pass
around the gimple_ranger around your pass.  Another way of doing this
is calling enable_rager() at pass start, and then doing:

  get_range_query (cfun)->range_of_expr (r, <SSA_NAME>, <STMT>));

gimple-loop-versioning.cc has an example of using enable_ranger /
disable_ranger.

I am assuming you are interested in ranges for integers / pointers.
Otherwise (floats, etc) you'd have to use "Value_Range" instead of
int_range_max.  I can give you examples on that if necessary.

Let me know if that helps.
Aldy

>
> Do you have any pointers/existing code I can look at to do this?
>
> Kind regards,
> Tamar
>
> > -----Original Message-----
> > From: Jeff Law <jeffreyalaw@gmail.com>
> > Sent: Tuesday, November 1, 2022 5:00 PM
> > To: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org
> > Cc: nd <nd@arm.com>; rguenther@suse.de
> > Subject: Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support
> > for bit-test-and-branch operations
> >
> >
> > On 11/1/22 09:53, Tamar Christina wrote:
> > >>
> > >>>    from the machine description.
> > >>>
> > >>> +@cindex @code{tbranch@var{mode}4} instruction pattern @item
> > >>> +@samp{tbranch@var{mode}4} Conditional branch instruction
> > combined
> > >>> +with a bit test-and-compare instruction. Operand 0 is a comparison
> > >>> +operator.  Operand 1 is the operand of the comparison. Operand 2 is
> > >>> +the bit position of Operand 1 to test.
> > >>> +Operand 3 is the @code{code_label} to jump to.
> > >> Should we refine/document the set of comparison operators allowed?
> > >> Is operand 1 an arbitrary RTL expression or more limited?  I'm
> > >> guessing its relatively arbitrary given how you've massaged the
> > >> existing branch-on-bit patterns from the aarch backend.
> > > It can be any expression in theory. However in practical terms we
> > > usually force the values to registers before calling the expansion.
> > > My assumption is that this is for CSE purposes but that's only a guess.
> >
> > Understood.  And generally yes, forcing expressions into regs is good for CSE.
> >
> >
> > >
> > >> Do we have enough information lying around from Ranger to avoid the
> > need
> > >> to walk the def-use chain to discover that we're masking off all but one
> > bit?
> > >>
> > > That's an interesting thought.  I'll try to see if I can figure out how to query
> > > Ranger here.  It would be nice to do so here.
> >
> > Reach out to Aldy, I suspect he can probably give you the necessary
> > pseudocode pretty quickly.
> >
> >
> > Jeff
> >
>


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations
  2022-11-02 11:08         ` Aldy Hernandez
@ 2022-11-05 14:23           ` Richard Biener
  2022-11-14 15:56             ` Tamar Christina
  0 siblings, 1 reply; 33+ messages in thread
From: Richard Biener @ 2022-11-05 14:23 UTC (permalink / raw)
  To: Aldy Hernandez
  Cc: Tamar Christina, Jeff Law, gcc-patches, nd, MacLeod, Andrew

On Wed, 2 Nov 2022, Aldy Hernandez wrote:

> On Wed, Nov 2, 2022 at 10:55 AM Tamar Christina <Tamar.Christina@arm.com> wrote:
> >
> > Hi Aldy,
> >
> > I'm trying to use Ranger to determine if a range of an expression is a single bit.
> >
> > If possible in case of a mask then also the position of the bit that's being checked by the mask (or the mask itself).
> 
> Just instantiate a ranger, and ask for the range of an SSA name (or an
> arbitrary tree expression) at a particular gimple statement (or an
> edge):
> 
> gimple_ranger ranger;
> int_range_max r;
> if (ranger.range_of_expr (r, <SSA_NAME>, <STMT>)) {
>   // do stuff with range "r"
>   if (r.singleton_p ()) {
>     wide_int num = r.lower_bound ();
>     // Check the bits in NUM, etc...
>   }
> }
> 
> You can see the full ranger API in gimple-range.h.
> 
> Note that instantiating a new ranger is relatively lightweight, but
> it's not free.  So unless you're calling range_of_expr sporadically,
> you probably want to have one instance for your pass.  You can pass
> around the gimple_ranger around your pass.  Another way of doing this
> is calling enable_rager() at pass start, and then doing:
> 
>   get_range_query (cfun)->range_of_expr (r, <SSA_NAME>, <STMT>));
> 
> gimple-loop-versioning.cc has an example of using enable_ranger /
> disable_ranger.
> 
> I am assuming you are interested in ranges for integers / pointers.
> Otherwise (floats, etc) you'd have to use "Value_Range" instead of
> int_range_max.  I can give you examples on that if necessary.
> 
> Let me know if that helps.

I think you maybe just want get_nonzero_bits?

> Aldy
> 
> >
> > Do you have any pointers/existing code I can look at to do this?
> >
> > Kind regards,
> > Tamar
> >
> > > -----Original Message-----
> > > From: Jeff Law <jeffreyalaw@gmail.com>
> > > Sent: Tuesday, November 1, 2022 5:00 PM
> > > To: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org
> > > Cc: nd <nd@arm.com>; rguenther@suse.de
> > > Subject: Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support
> > > for bit-test-and-branch operations
> > >
> > >
> > > On 11/1/22 09:53, Tamar Christina wrote:
> > > >>
> > > >>>    from the machine description.
> > > >>>
> > > >>> +@cindex @code{tbranch@var{mode}4} instruction pattern @item
> > > >>> +@samp{tbranch@var{mode}4} Conditional branch instruction
> > > combined
> > > >>> +with a bit test-and-compare instruction. Operand 0 is a comparison
> > > >>> +operator.  Operand 1 is the operand of the comparison. Operand 2 is
> > > >>> +the bit position of Operand 1 to test.
> > > >>> +Operand 3 is the @code{code_label} to jump to.
> > > >> Should we refine/document the set of comparison operators allowed?
> > > >> Is operand 1 an arbitrary RTL expression or more limited?  I'm
> > > >> guessing its relatively arbitrary given how you've massaged the
> > > >> existing branch-on-bit patterns from the aarch backend.
> > > > It can be any expression in theory. However in practical terms we
> > > > usually force the values to registers before calling the expansion.
> > > > My assumption is that this is for CSE purposes but that's only a guess.
> > >
> > > Understood.  And generally yes, forcing expressions into regs is good for CSE.
> > >
> > >
> > > >
> > > >> Do we have enough information lying around from Ranger to avoid the
> > > need
> > > >> to walk the def-use chain to discover that we're masking off all but one
> > > bit?
> > > >>
> > > > That's an interesting thought.  I'll try to see if I can figure out how to query
> > > > Ranger here.  It would be nice to do so here.
> > >
> > > Reach out to Aldy, I suspect he can probably give you the necessary
> > > pseudocode pretty quickly.
> > >
> > >
> > > Jeff
> > >
> >
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH]AArch64 Extend umov and sbfx patterns.
  2022-10-31 12:26   ` Richard Sandiford
@ 2022-11-11 14:42     ` Tamar Christina
  2022-11-15 11:10       ` Richard Sandiford
  0 siblings, 1 reply; 33+ messages in thread
From: Tamar Christina @ 2022-11-11 14:42 UTC (permalink / raw)
  To: Richard Sandiford
  Cc: gcc-patches, nd, Richard Earnshaw, Marcus Shawcroft, Kyrylo Tkachov

[-- Attachment #1: Type: text/plain, Size: 7893 bytes --]

Hi,

> > --- a/gcc/config/aarch64/aarch64-simd.md
> > +++ b/gcc/config/aarch64/aarch64-simd.md
> > @@ -4259,7 +4259,7 @@ (define_insn
> "*aarch64_get_lane_zero_extend<GPI:mode><VDQV_L:mode>"
> >  ;; Extracting lane zero is split into a simple move when it is
> > between SIMD  ;; registers or a store.
> >  (define_insn_and_split "aarch64_get_lane<mode>"
> > -  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand"
> > "=?r, w, Utv")
> > +  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand"
> > + "=r, w, Utv")
> >  	(vec_select:<VEL>
> >  	  (match_operand:VALL_F16_FULL 1 "register_operand" "w, w, w")
> >  	  (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
> 
> Which testcase does this help with?  It didn't look like the new tests do any
> vector stuff.
> 

Right, sorry about that, splitting up my patches resulted in this sneaking in from a different series.
Moved now.

> > -(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>"
> > +(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<ALLX:mode>"
> >    [(set (match_operand:GPI 0 "register_operand" "=r")
> >  	(ANY_EXTEND:GPI
> > -	 (ashift:SHORT (match_operand:SHORT 1 "register_operand" "r")
> > +	 (ashift:ALLX (match_operand:ALLX 1 "register_operand" "r")
> >  		       (match_operand 2 "const_int_operand" "n"))))]
> > -  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
> > +  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
> 
> It'd be better to avoid even defining si<-si or si<-di "extensions"
> (even though nothing should try to match them), so how about adding:
> 
>   <GPI:sizen> > <ALLX:sizen> &&
> 
> or similar to the beginning of the condition?  The conditions for the invalid
> combos will then be provably false at compile time and the patterns will be
> compiled out.
> 

Done.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* config/aarch64/aarch64.md
	(*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>): Renamed to...
	(*<ANY_EXTEND:optab><GPI:mode>_ashl<ALLX:mode>): ...this.
	(*zero_extend<GPI:mode>_lshr<SHORT:mode>): Renamed to...
	(*zero_extend<GPI:mode>_lshr<ALLX:mode>): ...this.
	(*extend<GPI:mode>_ashr<SHORT:mode>): Rename to...
	(*extend<GPI:mode>_ashr<ALLX:mode>): ...this.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/bitmove_1.c: New test.
	* gcc.target/aarch64/bitmove_2.c: New test.

--- inline copy of patch ---

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index d7684c93fba5b717d568e1a4fd712bde55c7c72e..d230bbb833f97813c8371aa07b587bd8b0292cee 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5711,40 +5711,43 @@ (define_insn "*extrsi5_insn_di"
   [(set_attr "type" "rotate_imm")]
 )
 
-(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>"
+(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<ALLX:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(ANY_EXTEND:GPI
-	 (ashift:SHORT (match_operand:SHORT 1 "register_operand" "r")
+	 (ashift:ALLX (match_operand:ALLX 1 "register_operand" "r")
 		       (match_operand 2 "const_int_operand" "n"))))]
-  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
+  "<GPI:sizen> > <ALLX:sizen>
+   && UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
 {
-  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
+  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
   return "<su>bfiz\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
   [(set_attr "type" "bfx")]
 )
 
-(define_insn "*zero_extend<GPI:mode>_lshr<SHORT:mode>"
+(define_insn "*zero_extend<GPI:mode>_lshr<ALLX:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(zero_extend:GPI
-	 (lshiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r")
-			 (match_operand 2 "const_int_operand" "n"))))]
-  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
+	 (lshiftrt:ALLX (match_operand:ALLX 1 "register_operand" "r")
+			(match_operand 2 "const_int_operand" "n"))))]
+  "<GPI:sizen> > <ALLX:sizen>
+   && UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
 {
-  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
+  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
   return "ubfx\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
   [(set_attr "type" "bfx")]
 )
 
-(define_insn "*extend<GPI:mode>_ashr<SHORT:mode>"
+(define_insn "*extend<GPI:mode>_ashr<ALLX:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(sign_extend:GPI
-	 (ashiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r")
-			 (match_operand 2 "const_int_operand" "n"))))]
-  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
+	 (ashiftrt:ALLX (match_operand:ALLX 1 "register_operand" "r")
+			(match_operand 2 "const_int_operand" "n"))))]
+  "<GPI:sizen> > <ALLX:sizen>
+   && UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
 {
-  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
+  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
   return "sbfx\\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
   [(set_attr "type" "bfx")]
diff --git a/gcc/testsuite/gcc.target/aarch64/bitmove_1.c b/gcc/testsuite/gcc.target/aarch64/bitmove_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..5ea4265f55213d7e7e5193a3a3681c9350867b50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bitmove_1.c
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -std=c99" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdint.h>
+
+/*
+** sfoo6:
+** 	asr	x0, x0, 16
+** 	ret
+*/
+int64_t sfoo6 (int32_t x)
+{
+  return x >> 16;
+}
+
+/*
+** ufoo6:
+** 	lsr	w0, w0, 30
+** 	ret
+*/
+uint64_t ufoo6 (uint32_t x)
+{
+  return x >> 30;
+}
+
+/*
+** ufoo6s:
+** 	ubfx	w0, w0, 7, 9
+** 	ret
+*/
+uint32_t ufoo6s (uint16_t x)
+{
+  return x >> 7;
+}
+
+/*
+** ufoo6h:
+** 	ubfx	w0, w0, 4, 4
+** 	ret
+*/
+uint16_t ufoo6h (uint8_t x)
+{
+  return x >> 4;
+}
+
+/*
+** sfoo62:
+** 	sbfx	x0, x0, 10, 22
+** 	ret
+*/
+int64_t sfoo62 (int32_t x)
+{
+  return x >> 10;
+}
+
+/*
+** ufoo62:
+** 	lsr	w0, w0, 10
+** 	ret
+*/
+uint64_t ufoo62 (uint32_t x)
+{
+  return x >> 10;
+}
+
+/*
+** sfoo63:
+** 	sbfx	x0, x0, 10, 22
+** 	ret
+*/
+int64_t sfoo63 (int32_t x)
+{
+  return x >> 10;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/bitmove_2.c b/gcc/testsuite/gcc.target/aarch64/bitmove_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..329600cb3dbecf4cdfed994f6cfdf98ab77e8a01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bitmove_2.c
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -std=c99" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdint.h>
+
+/*
+** sfoo6:
+** 	sbfiz	x0, x0, 16, 16
+** 	ret
+*/
+int64_t sfoo6 (int32_t x)
+{
+  return x << 16;
+}
+
+/*
+** ufoo6:
+** 	lsl	w0, w0, 30
+** 	ret
+*/
+uint64_t ufoo6 (uint32_t x)
+{
+  return x << 30;
+}
+
+/*
+** ufoo6s:
+** 	ubfiz	w0, w0, 7, 16
+** 	ret
+*/
+uint32_t ufoo6s (uint16_t x)
+{
+  return x << 7;
+}
+
+/*
+** ufoo6h:
+** 	...
+** 	ubfiz	w0, w0, 4, 12
+** 	ret
+*/
+uint16_t ufoo6h (uint8_t x)
+{
+  return x << 4;
+}
+
+/*
+** sfoo62:
+** 	sbfiz	x0, x0, 10, 22
+** 	ret
+*/
+int64_t sfoo62 (int32_t x)
+{
+  return x << 10;
+}
+
+/*
+** ufoo62:
+** 	lsl	w0, w0, 10
+** 	ret
+*/
+uint64_t ufoo62 (uint32_t x)
+{
+  return x << 10;
+}
+
+/*
+** sfoo63:
+** 	sbfiz	x0, x0, 10, 22
+** 	ret
+*/
+int64_t sfoo63 (int32_t x)
+{
+  return x << 10;
+}

[-- Attachment #2: rb16488 (1).patch --]
[-- Type: application/octet-stream, Size: 5137 bytes --]

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index d7684c93fba5b717d568e1a4fd712bde55c7c72e..d230bbb833f97813c8371aa07b587bd8b0292cee 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5711,40 +5711,43 @@ (define_insn "*extrsi5_insn_di"
   [(set_attr "type" "rotate_imm")]
 )
 
-(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>"
+(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<ALLX:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(ANY_EXTEND:GPI
-	 (ashift:SHORT (match_operand:SHORT 1 "register_operand" "r")
+	 (ashift:ALLX (match_operand:ALLX 1 "register_operand" "r")
 		       (match_operand 2 "const_int_operand" "n"))))]
-  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
+  "<GPI:sizen> > <ALLX:sizen>
+   && UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
 {
-  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
+  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
   return "<su>bfiz\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
   [(set_attr "type" "bfx")]
 )
 
-(define_insn "*zero_extend<GPI:mode>_lshr<SHORT:mode>"
+(define_insn "*zero_extend<GPI:mode>_lshr<ALLX:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(zero_extend:GPI
-	 (lshiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r")
-			 (match_operand 2 "const_int_operand" "n"))))]
-  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
+	 (lshiftrt:ALLX (match_operand:ALLX 1 "register_operand" "r")
+			(match_operand 2 "const_int_operand" "n"))))]
+  "<GPI:sizen> > <ALLX:sizen>
+   && UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
 {
-  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
+  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
   return "ubfx\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
   [(set_attr "type" "bfx")]
 )
 
-(define_insn "*extend<GPI:mode>_ashr<SHORT:mode>"
+(define_insn "*extend<GPI:mode>_ashr<ALLX:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(sign_extend:GPI
-	 (ashiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r")
-			 (match_operand 2 "const_int_operand" "n"))))]
-  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
+	 (ashiftrt:ALLX (match_operand:ALLX 1 "register_operand" "r")
+			(match_operand 2 "const_int_operand" "n"))))]
+  "<GPI:sizen> > <ALLX:sizen>
+   && UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
 {
-  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
+  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
   return "sbfx\\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
   [(set_attr "type" "bfx")]
diff --git a/gcc/testsuite/gcc.target/aarch64/bitmove_1.c b/gcc/testsuite/gcc.target/aarch64/bitmove_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..5ea4265f55213d7e7e5193a3a3681c9350867b50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bitmove_1.c
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -std=c99" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdint.h>
+
+/*
+** sfoo6:
+** 	asr	x0, x0, 16
+** 	ret
+*/
+int64_t sfoo6 (int32_t x)
+{
+  return x >> 16;
+}
+
+/*
+** ufoo6:
+** 	lsr	w0, w0, 30
+** 	ret
+*/
+uint64_t ufoo6 (uint32_t x)
+{
+  return x >> 30;
+}
+
+/*
+** ufoo6s:
+** 	ubfx	w0, w0, 7, 9
+** 	ret
+*/
+uint32_t ufoo6s (uint16_t x)
+{
+  return x >> 7;
+}
+
+/*
+** ufoo6h:
+** 	ubfx	w0, w0, 4, 4
+** 	ret
+*/
+uint16_t ufoo6h (uint8_t x)
+{
+  return x >> 4;
+}
+
+/*
+** sfoo62:
+** 	sbfx	x0, x0, 10, 22
+** 	ret
+*/
+int64_t sfoo62 (int32_t x)
+{
+  return x >> 10;
+}
+
+/*
+** ufoo62:
+** 	lsr	w0, w0, 10
+** 	ret
+*/
+uint64_t ufoo62 (uint32_t x)
+{
+  return x >> 10;
+}
+
+/*
+** sfoo63:
+** 	sbfx	x0, x0, 10, 22
+** 	ret
+*/
+int64_t sfoo63 (int32_t x)
+{
+  return x >> 10;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/bitmove_2.c b/gcc/testsuite/gcc.target/aarch64/bitmove_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..329600cb3dbecf4cdfed994f6cfdf98ab77e8a01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bitmove_2.c
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -std=c99" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdint.h>
+
+/*
+** sfoo6:
+** 	sbfiz	x0, x0, 16, 16
+** 	ret
+*/
+int64_t sfoo6 (int32_t x)
+{
+  return x << 16;
+}
+
+/*
+** ufoo6:
+** 	lsl	w0, w0, 30
+** 	ret
+*/
+uint64_t ufoo6 (uint32_t x)
+{
+  return x << 30;
+}
+
+/*
+** ufoo6s:
+** 	ubfiz	w0, w0, 7, 16
+** 	ret
+*/
+uint32_t ufoo6s (uint16_t x)
+{
+  return x << 7;
+}
+
+/*
+** ufoo6h:
+** 	...
+** 	ubfiz	w0, w0, 4, 12
+** 	ret
+*/
+uint16_t ufoo6h (uint8_t x)
+{
+  return x << 4;
+}
+
+/*
+** sfoo62:
+** 	sbfiz	x0, x0, 10, 22
+** 	ret
+*/
+int64_t sfoo62 (int32_t x)
+{
+  return x << 10;
+}
+
+/*
+** ufoo62:
+** 	lsl	w0, w0, 10
+** 	ret
+*/
+uint64_t ufoo62 (uint32_t x)
+{
+  return x << 10;
+}
+
+/*
+** sfoo63:
+** 	sbfiz	x0, x0, 10, 22
+** 	ret
+*/
+int64_t sfoo63 (int32_t x)
+{
+  return x << 10;
+}

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations
  2022-11-05 14:23           ` Richard Biener
@ 2022-11-14 15:56             ` Tamar Christina
  2022-11-14 16:22               ` Jeff Law
  2022-11-15  7:33               ` Richard Biener
  0 siblings, 2 replies; 33+ messages in thread
From: Tamar Christina @ 2022-11-14 15:56 UTC (permalink / raw)
  To: Richard Biener, Aldy Hernandez; +Cc: Jeff Law, gcc-patches, nd, MacLeod, Andrew

[-- Attachment #1: Type: text/plain, Size: 17063 bytes --]

> -----Original Message-----
> From: Richard Biener <rguenther@suse.de>
> Sent: Saturday, November 5, 2022 2:23 PM
> To: Aldy Hernandez <aldyh@redhat.com>
> Cc: Tamar Christina <Tamar.Christina@arm.com>; Jeff Law
> <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org; nd <nd@arm.com>;
> MacLeod, Andrew <amacleod@redhat.com>
> Subject: Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support
> for bit-test-and-branch operations
> 
> On Wed, 2 Nov 2022, Aldy Hernandez wrote:
> 
> > On Wed, Nov 2, 2022 at 10:55 AM Tamar Christina
> <Tamar.Christina@arm.com> wrote:
> > >
> > > Hi Aldy,
> > >
> > > I'm trying to use Ranger to determine if a range of an expression is a
> single bit.
> > >
> > > If possible in case of a mask then also the position of the bit that's being
> checked by the mask (or the mask itself).
> >
> > Just instantiate a ranger, and ask for the range of an SSA name (or an
> > arbitrary tree expression) at a particular gimple statement (or an
> > edge):
> >
> > gimple_ranger ranger;
> > int_range_max r;
> > if (ranger.range_of_expr (r, <SSA_NAME>, <STMT>)) {
> >   // do stuff with range "r"
> >   if (r.singleton_p ()) {
> >     wide_int num = r.lower_bound ();
> >     // Check the bits in NUM, etc...
> >   }
> > }
> >
> > You can see the full ranger API in gimple-range.h.
> >
> > Note that instantiating a new ranger is relatively lightweight, but
> > it's not free.  So unless you're calling range_of_expr sporadically,
> > you probably want to have one instance for your pass.  You can pass
> > around the gimple_ranger around your pass.  Another way of doing this
> > is calling enable_rager() at pass start, and then doing:
> >
> >   get_range_query (cfun)->range_of_expr (r, <SSA_NAME>, <STMT>));
> >
> > gimple-loop-versioning.cc has an example of using enable_ranger /
> > disable_ranger.
> >
> > I am assuming you are interested in ranges for integers / pointers.
> > Otherwise (floats, etc) you'd have to use "Value_Range" instead of
> > int_range_max.  I can give you examples on that if necessary.
> >
> > Let me know if that helps.

It Did! I ended up going with Richi's suggestion, but the snippet was very helpful
for a different range based patch I'm trying a prototype for.

Many thanks for the example!

> 
> I think you maybe just want get_nonzero_bits?

Ah, looks like that uses range info as well.  Thanks!

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* dojump.cc (do_jump): Pass along value.
	(do_jump_by_parts_greater_rtx): Likewise.
	(do_jump_by_parts_zero_rtx): Likewise.
	(do_jump_by_parts_equality_rtx): Likewise.
	(do_compare_rtx_and_jump): Likewise.
	(do_compare_and_jump): Likewise.
	* dojump.h (do_compare_rtx_and_jump): New.
	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
	(validate_test_and_branch): New.
	(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
	supplied then check if it's suitable for tbranch.
	* optabs.def (tbranch$a4): New.
	* doc/md.texi (tbranch@var{mode}4): Document it.
	* optabs.h (emit_cmp_and_jump_insns):
	* tree.h (tree_zero_one_valued_p): New.

--- inline copy of patch ---
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 34825549ed4e315b07d36dc3d63bae0cc0a3932d..342e8c4c670de251a35689d1805acceb72a8f6bf 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6958,6 +6958,13 @@ case, you can and should make operand 1's predicate reject some operators
 in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
 from the machine description.
 
+@cindex @code{tbranch@var{mode}4} instruction pattern
+@item @samp{tbranch@var{mode}4}
+Conditional branch instruction combined with a bit test-and-compare
+instruction. Operand 0 is a comparison operator.  Operand 1 is the
+operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
+Operand 3 is the @code{code_label} to jump to.
+
 @cindex @code{cbranch@var{mode}4} instruction pattern
 @item @samp{cbranch@var{mode}4}
 Conditional branch instruction combined with a compare instruction.
diff --git a/gcc/dojump.h b/gcc/dojump.h
index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644
--- a/gcc/dojump.h
+++ b/gcc/dojump.h
@@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
 extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
 			 profile_probability);
 
+extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
+				     machine_mode, rtx, rtx_code_label *,
+				     rtx_code_label *, profile_probability);
+
 extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
 				     machine_mode, rtx, rtx_code_label *,
 				     rtx_code_label *, profile_probability);
diff --git a/gcc/dojump.cc b/gcc/dojump.cc
index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644
--- a/gcc/dojump.cc
+++ b/gcc/dojump.cc
@@ -619,7 +619,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
 	}
       do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
 			       NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
-			       GET_MODE (temp), NULL_RTX,
+			       exp, GET_MODE (temp), NULL_RTX,
 			       if_false_label, if_true_label, prob);
     }
 
@@ -687,7 +687,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 
       /* All but high-order word must be compared as unsigned.  */
       do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
-			       word_mode, NULL_RTX, NULL, if_true_label,
+			       NULL, word_mode, NULL_RTX, NULL, if_true_label,
 			       prob);
 
       /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
@@ -695,8 +695,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 	break;
 
       /* Consider lower words only if these are equal.  */
-      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
-			       NULL_RTX, NULL, if_false_label,
+      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
+			       word_mode, NULL_RTX, NULL, if_false_label,
 			       prob.invert ());
     }
 
@@ -755,7 +755,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   if (part != 0)
     {
-      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
+      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
 			       NULL_RTX, if_false_label, if_true_label, prob);
       return;
     }
@@ -766,7 +766,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
+			     const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -809,8 +809,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             operand_subword_force (op1, i, mode),
-                             EQ, 0, word_mode, NULL_RTX,
+			     operand_subword_force (op1, i, mode),
+			     EQ, 0, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -962,6 +962,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 			 rtx_code_label *if_false_label,
 			 rtx_code_label *if_true_label,
 			 profile_probability prob)
+{
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
+			  if_false_label, if_true_label, prob);
+}
+
+/* Like do_compare_and_jump but expects the values to compare as two rtx's.
+   The decision as to signed or unsigned comparison must be made by the caller.
+
+   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
+   compared.  */
+
+void
+do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
+			 tree val, machine_mode mode, rtx size,
+			 rtx_code_label *if_false_label,
+			 rtx_code_label *if_true_label,
+			 profile_probability prob)
 {
   rtx tem;
   rtx_code_label *dummy_label = NULL;
@@ -1177,8 +1194,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 		    }
 		  else
 		    dest_label = if_false_label;
-                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, dest_label, NULL, first_prob);
+
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, dest_label, NULL,
+					   first_prob);
 		}
 	      /* For !and_them we want to split:
 		 if (x) goto t; // prob;
@@ -1192,8 +1211,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
               else
 		{
 		  profile_probability first_prob = prob.split (cprob);
-		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, NULL, if_true_label, first_prob);
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, NULL,
+					   if_true_label, first_prob);
 		  if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
 		    {
 		      /* x != y can be split into x unord y || x ltgt y
@@ -1215,7 +1235,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 	    }
 	}
 
-      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
+      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
 			       if_true_label, prob);
     }
 
@@ -1289,9 +1309,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
       op1 = new_op1;
     }
 
-  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
-                           ((mode == BLKmode)
-                            ? expr_size (treeop0) : NULL_RTX),
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
+			   ((mode == BLKmode)
+			    ? expr_size (treeop0) : NULL_RTX),
 			   if_false_label, if_true_label, prob);
 }
 
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index f338df410265dfe55b6896160090a453cc6a28d9..0f662ebdb818d7538bdd13fb02bcf8bcf1dbab64 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "libfuncs.h"
 #include "internal-fn.h"
 #include "langhooks.h"
+#include "gimple.h"
+#include "ssa.h"
 
 static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
 				   machine_mode *);
@@ -4620,7 +4622,7 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
 
 static void
 emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
-			  profile_probability prob)
+			  direct_optab cmp_optab, profile_probability prob)
 {
   machine_mode optab_mode;
   enum mode_class mclass;
@@ -4629,7 +4631,7 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
 
   mclass = GET_MODE_CLASS (mode);
   optab_mode = (mclass == MODE_CC) ? CCmode : mode;
-  icode = optab_handler (cbranch_optab, optab_mode);
+  icode = optab_handler (cmp_optab, optab_mode);
 
   gcc_assert (icode != CODE_FOR_nothing);
   gcc_assert (insn_operand_matches (icode, 0, test));
@@ -4644,6 +4646,56 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
     add_reg_br_prob_note (insn, prob);
 }
 
+/* Check to see if the supplied comparison in PTEST can be performed as a
+   bit-test-and-branch instead.  VAL must contain the original tree
+   expression of the non-zero operand which will be used to rewrite the
+   comparison in PTEST.
+
+   Returns TRUE if operation succeeds and returns updated PMODE and PTEST,
+   else FALSE.  */
+
+enum insn_code
+static validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode)
+{
+  if (!val || TREE_CODE (val) != SSA_NAME)
+    return CODE_FOR_nothing;
+
+  machine_mode mode = TYPE_MODE (TREE_TYPE (val));
+  rtx test = *ptest;
+
+  if (GET_CODE (test) != EQ && GET_CODE (test) != NE)
+    return CODE_FOR_nothing;
+
+  /* If the target supports the testbit comparison directly, great.  */
+  auto icode = direct_optab_handler (tbranch_optab, mode);
+  if (icode == CODE_FOR_nothing)
+    return icode;
+
+  if (tree_zero_one_valued_p (val))
+    {
+      auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
+      XEXP (test, 1) = gen_int_mode (pos, mode);
+      *ptest = test;
+      *pmode = mode;
+      return icode;
+    }
+
+  wide_int wcst = get_nonzero_bits (val);
+  if (wcst == -1)
+    return CODE_FOR_nothing;
+
+  int bitpos;
+
+  if ((bitpos = wi::exact_log2 (wcst)) == -1)
+    return CODE_FOR_nothing;
+
+  auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
+  XEXP (test, 1) = gen_int_mode (pos, mode);
+  *ptest = test;
+  *pmode = mode;
+  return icode;
+}
+
 /* Generate code to compare X with Y so that the condition codes are
    set and to jump to LABEL if the condition is true.  If X is a
    constant and Y is not a constant, then the comparison is swapped to
@@ -4661,11 +4713,13 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
    It will be potentially converted into an unsigned variant based on
    UNSIGNEDP to select a proper jump instruction.
    
-   PROB is the probability of jumping to LABEL.  */
+   PROB is the probability of jumping to LABEL.  If the comparison is against
+   zero then VAL contains the expression from which the non-zero RTL is
+   derived.  */
 
 void
 emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
-			 machine_mode mode, int unsignedp, rtx label,
+			 machine_mode mode, int unsignedp, tree val, rtx label,
                          profile_probability prob)
 {
   rtx op0 = x, op1 = y;
@@ -4690,10 +4744,32 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
 
   prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
 		    &test, &mode);
-  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
+
+  /* Check if we're comparing a truth type with 0, and if so check if
+     the target supports tbranch.  */
+  machine_mode tmode = mode;
+  if (op1 == CONST0_RTX (GET_MODE (op1))
+      && validate_test_and_branch (val, &test, &tmode) != CODE_FOR_nothing)
+    {
+      emit_cmp_and_jump_insn_1 (test, tmode, label, tbranch_optab, prob);
+      return;
+    }
+
+  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob);
 }
 
-

+/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
+
+void
+emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
+			 machine_mode mode, int unsignedp, rtx label,
+			 profile_probability prob)
+{
+  emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
+			   label, prob);
+}
+
+
 /* Emit a library call comparison between floating point X and Y.
    COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).  */
 
diff --git a/gcc/optabs.def b/gcc/optabs.def
index a6db2342bed6baf13ecbd84112c8432c6972e6fe..56e37d67231e1ba74ad6c5b81d74a65f315e26e2 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -220,6 +220,7 @@ OPTAB_D (reload_in_optab, "reload_in$a")
 OPTAB_D (reload_out_optab, "reload_out$a")
 
 OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
+OPTAB_D (tbranch_optab, "tbranch$a4")
 OPTAB_D (addcc_optab, "add$acc")
 OPTAB_D (negcc_optab, "neg$acc")
 OPTAB_D (notcc_optab, "not$acc")
diff --git a/gcc/optabs.h b/gcc/optabs.h
index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
 				     machine_mode, int, rtx,
 				     profile_probability prob
 					= profile_probability::uninitialized ());
+extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
+				     machine_mode, int, tree, rtx,
+				     profile_probability prob
+					= profile_probability::uninitialized ());
 
 /* Generate code to indirectly jump to a location given in the rtx LOC.  */
 extern void emit_indirect_jump (rtx);
diff --git a/gcc/tree.h b/gcc/tree.h
index e6564aaccb7b69cd938ff60b6121aec41b7e8a59..f455008ceb8d91e7e073c0ad6d93dcaed65deccf 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4690,6 +4690,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
 extern tree signed_type_for (tree);
 extern tree unsigned_type_for (tree);
 extern bool is_truth_type_for (tree, tree);
+extern bool tree_zero_one_valued_p (tree);
 extern tree truth_type_for (tree);
 extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
 extern tree build_pointer_type (tree);

[-- Attachment #2: rb16485.patch --]
[-- Type: application/octet-stream, Size: 13509 bytes --]

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 34825549ed4e315b07d36dc3d63bae0cc0a3932d..342e8c4c670de251a35689d1805acceb72a8f6bf 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6958,6 +6958,13 @@ case, you can and should make operand 1's predicate reject some operators
 in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
 from the machine description.
 
+@cindex @code{tbranch@var{mode}4} instruction pattern
+@item @samp{tbranch@var{mode}4}
+Conditional branch instruction combined with a bit test-and-compare
+instruction. Operand 0 is a comparison operator.  Operand 1 is the
+operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
+Operand 3 is the @code{code_label} to jump to.
+
 @cindex @code{cbranch@var{mode}4} instruction pattern
 @item @samp{cbranch@var{mode}4}
 Conditional branch instruction combined with a compare instruction.
diff --git a/gcc/dojump.h b/gcc/dojump.h
index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644
--- a/gcc/dojump.h
+++ b/gcc/dojump.h
@@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
 extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
 			 profile_probability);
 
+extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
+				     machine_mode, rtx, rtx_code_label *,
+				     rtx_code_label *, profile_probability);
+
 extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
 				     machine_mode, rtx, rtx_code_label *,
 				     rtx_code_label *, profile_probability);
diff --git a/gcc/dojump.cc b/gcc/dojump.cc
index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644
--- a/gcc/dojump.cc
+++ b/gcc/dojump.cc
@@ -619,7 +619,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
 	}
       do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
 			       NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
-			       GET_MODE (temp), NULL_RTX,
+			       exp, GET_MODE (temp), NULL_RTX,
 			       if_false_label, if_true_label, prob);
     }
 
@@ -687,7 +687,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 
       /* All but high-order word must be compared as unsigned.  */
       do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
-			       word_mode, NULL_RTX, NULL, if_true_label,
+			       NULL, word_mode, NULL_RTX, NULL, if_true_label,
 			       prob);
 
       /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
@@ -695,8 +695,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 	break;
 
       /* Consider lower words only if these are equal.  */
-      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
-			       NULL_RTX, NULL, if_false_label,
+      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
+			       word_mode, NULL_RTX, NULL, if_false_label,
 			       prob.invert ());
     }
 
@@ -755,7 +755,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   if (part != 0)
     {
-      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
+      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
 			       NULL_RTX, if_false_label, if_true_label, prob);
       return;
     }
@@ -766,7 +766,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
+			     const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -809,8 +809,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             operand_subword_force (op1, i, mode),
-                             EQ, 0, word_mode, NULL_RTX,
+			     operand_subword_force (op1, i, mode),
+			     EQ, 0, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -962,6 +962,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 			 rtx_code_label *if_false_label,
 			 rtx_code_label *if_true_label,
 			 profile_probability prob)
+{
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
+			  if_false_label, if_true_label, prob);
+}
+
+/* Like do_compare_and_jump but expects the values to compare as two rtx's.
+   The decision as to signed or unsigned comparison must be made by the caller.
+
+   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
+   compared.  */
+
+void
+do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
+			 tree val, machine_mode mode, rtx size,
+			 rtx_code_label *if_false_label,
+			 rtx_code_label *if_true_label,
+			 profile_probability prob)
 {
   rtx tem;
   rtx_code_label *dummy_label = NULL;
@@ -1177,8 +1194,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 		    }
 		  else
 		    dest_label = if_false_label;
-                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, dest_label, NULL, first_prob);
+
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, dest_label, NULL,
+					   first_prob);
 		}
 	      /* For !and_them we want to split:
 		 if (x) goto t; // prob;
@@ -1192,8 +1211,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
               else
 		{
 		  profile_probability first_prob = prob.split (cprob);
-		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, NULL, if_true_label, first_prob);
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, NULL,
+					   if_true_label, first_prob);
 		  if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
 		    {
 		      /* x != y can be split into x unord y || x ltgt y
@@ -1215,7 +1235,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 	    }
 	}
 
-      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
+      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
 			       if_true_label, prob);
     }
 
@@ -1289,9 +1309,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
       op1 = new_op1;
     }
 
-  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
-                           ((mode == BLKmode)
-                            ? expr_size (treeop0) : NULL_RTX),
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
+			   ((mode == BLKmode)
+			    ? expr_size (treeop0) : NULL_RTX),
 			   if_false_label, if_true_label, prob);
 }
 
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index f338df410265dfe55b6896160090a453cc6a28d9..0f662ebdb818d7538bdd13fb02bcf8bcf1dbab64 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "libfuncs.h"
 #include "internal-fn.h"
 #include "langhooks.h"
+#include "gimple.h"
+#include "ssa.h"
 
 static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
 				   machine_mode *);
@@ -4620,7 +4622,7 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
 
 static void
 emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
-			  profile_probability prob)
+			  direct_optab cmp_optab, profile_probability prob)
 {
   machine_mode optab_mode;
   enum mode_class mclass;
@@ -4629,7 +4631,7 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
 
   mclass = GET_MODE_CLASS (mode);
   optab_mode = (mclass == MODE_CC) ? CCmode : mode;
-  icode = optab_handler (cbranch_optab, optab_mode);
+  icode = optab_handler (cmp_optab, optab_mode);
 
   gcc_assert (icode != CODE_FOR_nothing);
   gcc_assert (insn_operand_matches (icode, 0, test));
@@ -4644,6 +4646,56 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
     add_reg_br_prob_note (insn, prob);
 }
 
+/* Check to see if the supplied comparison in PTEST can be performed as a
+   bit-test-and-branch instead.  VAL must contain the original tree
+   expression of the non-zero operand which will be used to rewrite the
+   comparison in PTEST.
+
+   Returns TRUE if operation succeeds and returns updated PMODE and PTEST,
+   else FALSE.  */
+
+enum insn_code
+static validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode)
+{
+  if (!val || TREE_CODE (val) != SSA_NAME)
+    return CODE_FOR_nothing;
+
+  machine_mode mode = TYPE_MODE (TREE_TYPE (val));
+  rtx test = *ptest;
+
+  if (GET_CODE (test) != EQ && GET_CODE (test) != NE)
+    return CODE_FOR_nothing;
+
+  /* If the target supports the testbit comparison directly, great.  */
+  auto icode = direct_optab_handler (tbranch_optab, mode);
+  if (icode == CODE_FOR_nothing)
+    return icode;
+
+  if (tree_zero_one_valued_p (val))
+    {
+      auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
+      XEXP (test, 1) = gen_int_mode (pos, mode);
+      *ptest = test;
+      *pmode = mode;
+      return icode;
+    }
+
+  wide_int wcst = get_nonzero_bits (val);
+  if (wcst == -1)
+    return CODE_FOR_nothing;
+
+  int bitpos;
+
+  if ((bitpos = wi::exact_log2 (wcst)) == -1)
+    return CODE_FOR_nothing;
+
+  auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
+  XEXP (test, 1) = gen_int_mode (pos, mode);
+  *ptest = test;
+  *pmode = mode;
+  return icode;
+}
+
 /* Generate code to compare X with Y so that the condition codes are
    set and to jump to LABEL if the condition is true.  If X is a
    constant and Y is not a constant, then the comparison is swapped to
@@ -4661,11 +4713,13 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
    It will be potentially converted into an unsigned variant based on
    UNSIGNEDP to select a proper jump instruction.
    
-   PROB is the probability of jumping to LABEL.  */
+   PROB is the probability of jumping to LABEL.  If the comparison is against
+   zero then VAL contains the expression from which the non-zero RTL is
+   derived.  */
 
 void
 emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
-			 machine_mode mode, int unsignedp, rtx label,
+			 machine_mode mode, int unsignedp, tree val, rtx label,
                          profile_probability prob)
 {
   rtx op0 = x, op1 = y;
@@ -4690,10 +4744,32 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
 
   prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
 		    &test, &mode);
-  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
+
+  /* Check if we're comparing a truth type with 0, and if so check if
+     the target supports tbranch.  */
+  machine_mode tmode = mode;
+  if (op1 == CONST0_RTX (GET_MODE (op1))
+      && validate_test_and_branch (val, &test, &tmode) != CODE_FOR_nothing)
+    {
+      emit_cmp_and_jump_insn_1 (test, tmode, label, tbranch_optab, prob);
+      return;
+    }
+
+  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob);
 }
 
-\f
+/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
+
+void
+emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
+			 machine_mode mode, int unsignedp, rtx label,
+			 profile_probability prob)
+{
+  emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
+			   label, prob);
+}
+
+
 /* Emit a library call comparison between floating point X and Y.
    COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).  */
 
diff --git a/gcc/optabs.def b/gcc/optabs.def
index a6db2342bed6baf13ecbd84112c8432c6972e6fe..56e37d67231e1ba74ad6c5b81d74a65f315e26e2 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -220,6 +220,7 @@ OPTAB_D (reload_in_optab, "reload_in$a")
 OPTAB_D (reload_out_optab, "reload_out$a")
 
 OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
+OPTAB_D (tbranch_optab, "tbranch$a4")
 OPTAB_D (addcc_optab, "add$acc")
 OPTAB_D (negcc_optab, "neg$acc")
 OPTAB_D (notcc_optab, "not$acc")
diff --git a/gcc/optabs.h b/gcc/optabs.h
index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
 				     machine_mode, int, rtx,
 				     profile_probability prob
 					= profile_probability::uninitialized ());
+extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
+				     machine_mode, int, tree, rtx,
+				     profile_probability prob
+					= profile_probability::uninitialized ());
 
 /* Generate code to indirectly jump to a location given in the rtx LOC.  */
 extern void emit_indirect_jump (rtx);
diff --git a/gcc/tree.h b/gcc/tree.h
index e6564aaccb7b69cd938ff60b6121aec41b7e8a59..f455008ceb8d91e7e073c0ad6d93dcaed65deccf 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4690,6 +4690,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
 extern tree signed_type_for (tree);
 extern tree unsigned_type_for (tree);
 extern bool is_truth_type_for (tree, tree);
+extern bool tree_zero_one_valued_p (tree);
 extern tree truth_type_for (tree);
 extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
 extern tree build_pointer_type (tree);

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH 2/2]AArch64 Support new tbranch optab.
  2022-10-31 11:53 ` [PATCH 2/2]AArch64 Support new tbranch optab Tamar Christina
@ 2022-11-14 15:58   ` Tamar Christina
  2022-11-15 10:36     ` Richard Sandiford
  0 siblings, 1 reply; 33+ messages in thread
From: Tamar Christina @ 2022-11-14 15:58 UTC (permalink / raw)
  To: Tamar Christina, gcc-patches
  Cc: Richard Earnshaw, nd, Richard Sandiford, Marcus Shawcroft

[-- Attachment #1: Type: text/plain, Size: 4194 bytes --]

Hello,

Ping and updated patch.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
	(*tb<optab><ALLI:mode><GPI:mode>1): ... this.
	(tbranch<mode>4): New.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/tbz_1.c: New test.

--- inline copy of patch ---

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd712bde55c7c72e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
 		      (const_int 1)))]
 )
 
-(define_insn "*tb<optab><mode>1"
+(define_expand "tbranch<mode>4"
   [(set (pc) (if_then_else
-	      (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
-				    (const_int 1)
-				    (match_operand 1
-				      "aarch64_simd_shift_imm_<mode>" "n"))
+		(match_operator 0 "aarch64_comparison_operator"
+		 [(match_operand:ALLI 1 "register_operand")
+		  (match_operand:ALLI 2 "aarch64_simd_shift_imm_<ALLI:mode>")])
+		(label_ref (match_operand 3 "" ""))
+		(pc)))]
+  "optimize > 0"
+{
+  rtx bitvalue = gen_reg_rtx (DImode);
+  rtx tmp = simplify_gen_subreg (DImode, operands[1], GET_MODE (operands[1]), 0);
+  emit_insn (gen_extzv (bitvalue, tmp, const1_rtx, operands[2]));
+  operands[2] = const0_rtx;
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), bitvalue,
+					 operands[2]);
+})
+
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
+  [(set (pc) (if_then_else
+	      (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
+				     (const_int 1)
+				     (match_operand 1
+				       "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
 		   (const_int 0))
 	     (label_ref (match_operand 2 "" ""))
 	     (pc)))
@@ -959,15 +976,15 @@ (define_insn "*tb<optab><mode>1"
       {
 	if (get_attr_far_branch (insn) == 1)
 	  return aarch64_gen_far_branch (operands, 2, "Ltb",
-					 "<inv_tb>\\t%<w>0, %1, ");
+					 "<inv_tb>\\t%<ALLI:w>0, %1, ");
 	else
 	  {
 	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
-	    return "tst\t%<w>0, %1\;<bcond>\t%l2";
+	    return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
 	  }
       }
     else
-      return "<tbz>\t%<w>0, %1, %l2";
+      return "<tbz>\t%<ALLI:w>0, %1, %l2";
   }
   [(set_attr "type" "branch")
    (set (attr "length")
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549ce1a0cff6774463
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdbool.h>
+
+void h(void);
+
+/*
+** g1:
+** 	tbnz	x[0-9]+, #?0, .L([0-9]+)
+** 	ret
+**	...
+*/
+void g1(bool x)
+{
+  if (__builtin_expect (x, 0))
+    h ();
+}
+
+/*
+** g2:
+** 	tbz	x[0-9]+, #?0, .L([0-9]+)
+** 	b	h
+**	...
+*/
+void g2(bool x)
+{
+  if (__builtin_expect (x, 1))
+    h ();
+}
+
+/*
+** g3_ge:
+** 	tbnz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_ge(int x)
+{
+  if (__builtin_expect (x >= 0, 1))
+    h ();
+}
+
+/*
+** g3_gt:
+** 	cmp	w[0-9]+, 0
+** 	ble	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_gt(int x)
+{
+  if (__builtin_expect (x > 0, 1))
+    h ();
+}
+
+/*
+** g3_lt:
+** 	tbz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_lt(int x)
+{
+  if (__builtin_expect (x < 0, 1))
+    h ();
+}
+
+/*
+** g3_le:
+** 	cmp	w[0-9]+, 0
+** 	bgt	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_le(int x)
+{
+  if (__builtin_expect (x <= 0, 1))
+    h ();
+}
+
+/*
+** g5:
+** 	mov	w[0-9]+, 65279
+** 	tst	w[0-9]+, w[0-9]+
+** 	beq	.L[0-9]+
+** 	b	h
+**	...
+*/ 
+void g5(int x)
+{
+  if (__builtin_expect (x & 0xfeff, 1))
+    h ();
+}

[-- Attachment #2: rb16486.patch --]
[-- Type: application/octet-stream, Size: 3634 bytes --]

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd712bde55c7c72e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
 		      (const_int 1)))]
 )
 
-(define_insn "*tb<optab><mode>1"
+(define_expand "tbranch<mode>4"
   [(set (pc) (if_then_else
-	      (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
-				    (const_int 1)
-				    (match_operand 1
-				      "aarch64_simd_shift_imm_<mode>" "n"))
+		(match_operator 0 "aarch64_comparison_operator"
+		 [(match_operand:ALLI 1 "register_operand")
+		  (match_operand:ALLI 2 "aarch64_simd_shift_imm_<ALLI:mode>")])
+		(label_ref (match_operand 3 "" ""))
+		(pc)))]
+  "optimize > 0"
+{
+  rtx bitvalue = gen_reg_rtx (DImode);
+  rtx tmp = simplify_gen_subreg (DImode, operands[1], GET_MODE (operands[1]), 0);
+  emit_insn (gen_extzv (bitvalue, tmp, const1_rtx, operands[2]));
+  operands[2] = const0_rtx;
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), bitvalue,
+					 operands[2]);
+})
+
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
+  [(set (pc) (if_then_else
+	      (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
+				     (const_int 1)
+				     (match_operand 1
+				       "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
 		   (const_int 0))
 	     (label_ref (match_operand 2 "" ""))
 	     (pc)))
@@ -959,15 +976,15 @@ (define_insn "*tb<optab><mode>1"
       {
 	if (get_attr_far_branch (insn) == 1)
 	  return aarch64_gen_far_branch (operands, 2, "Ltb",
-					 "<inv_tb>\\t%<w>0, %1, ");
+					 "<inv_tb>\\t%<ALLI:w>0, %1, ");
 	else
 	  {
 	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
-	    return "tst\t%<w>0, %1\;<bcond>\t%l2";
+	    return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
 	  }
       }
     else
-      return "<tbz>\t%<w>0, %1, %l2";
+      return "<tbz>\t%<ALLI:w>0, %1, %l2";
   }
   [(set_attr "type" "branch")
    (set (attr "length")
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549ce1a0cff6774463
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdbool.h>
+
+void h(void);
+
+/*
+** g1:
+** 	tbnz	x[0-9]+, #?0, .L([0-9]+)
+** 	ret
+**	...
+*/
+void g1(bool x)
+{
+  if (__builtin_expect (x, 0))
+    h ();
+}
+
+/*
+** g2:
+** 	tbz	x[0-9]+, #?0, .L([0-9]+)
+** 	b	h
+**	...
+*/
+void g2(bool x)
+{
+  if (__builtin_expect (x, 1))
+    h ();
+}
+
+/*
+** g3_ge:
+** 	tbnz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_ge(int x)
+{
+  if (__builtin_expect (x >= 0, 1))
+    h ();
+}
+
+/*
+** g3_gt:
+** 	cmp	w[0-9]+, 0
+** 	ble	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_gt(int x)
+{
+  if (__builtin_expect (x > 0, 1))
+    h ();
+}
+
+/*
+** g3_lt:
+** 	tbz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_lt(int x)
+{
+  if (__builtin_expect (x < 0, 1))
+    h ();
+}
+
+/*
+** g3_le:
+** 	cmp	w[0-9]+, 0
+** 	bgt	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_le(int x)
+{
+  if (__builtin_expect (x <= 0, 1))
+    h ();
+}
+
+/*
+** g5:
+** 	mov	w[0-9]+, 65279
+** 	tst	w[0-9]+, w[0-9]+
+** 	beq	.L[0-9]+
+** 	b	h
+**	...
+*/ 
+void g5(int x)
+{
+  if (__builtin_expect (x & 0xfeff, 1))
+    h ();
+}

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations
  2022-11-14 15:56             ` Tamar Christina
@ 2022-11-14 16:22               ` Jeff Law
  2022-11-15  7:33               ` Richard Biener
  1 sibling, 0 replies; 33+ messages in thread
From: Jeff Law @ 2022-11-14 16:22 UTC (permalink / raw)
  To: Tamar Christina, Richard Biener, Aldy Hernandez
  Cc: gcc-patches, nd, MacLeod, Andrew


On 11/14/22 08:56, Tamar Christina wrote:
>
> gcc/ChangeLog:
>
> 	* dojump.cc (do_jump): Pass along value.
> 	(do_jump_by_parts_greater_rtx): Likewise.
> 	(do_jump_by_parts_zero_rtx): Likewise.
> 	(do_jump_by_parts_equality_rtx): Likewise.
> 	(do_compare_rtx_and_jump): Likewise.
> 	(do_compare_and_jump): Likewise.
> 	* dojump.h (do_compare_rtx_and_jump): New.
> 	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
> 	(validate_test_and_branch): New.
> 	(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
> 	supplied then check if it's suitable for tbranch.
> 	* optabs.def (tbranch$a4): New.
> 	* doc/md.texi (tbranch@var{mode}4): Document it.
> 	* optabs.h (emit_cmp_and_jump_insns):
> 	* tree.h (tree_zero_one_valued_p): New.

OK.

jeff


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations
  2022-11-14 15:56             ` Tamar Christina
  2022-11-14 16:22               ` Jeff Law
@ 2022-11-15  7:33               ` Richard Biener
  2022-12-01 16:29                 ` Tamar Christina
  1 sibling, 1 reply; 33+ messages in thread
From: Richard Biener @ 2022-11-15  7:33 UTC (permalink / raw)
  To: Tamar Christina
  Cc: Richard Biener, Aldy Hernandez, Jeff Law, gcc-patches, nd,
	MacLeod, Andrew

On Mon, Nov 14, 2022 at 4:57 PM Tamar Christina via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> > -----Original Message-----
> > From: Richard Biener <rguenther@suse.de>
> > Sent: Saturday, November 5, 2022 2:23 PM
> > To: Aldy Hernandez <aldyh@redhat.com>
> > Cc: Tamar Christina <Tamar.Christina@arm.com>; Jeff Law
> > <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org; nd <nd@arm.com>;
> > MacLeod, Andrew <amacleod@redhat.com>
> > Subject: Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support
> > for bit-test-and-branch operations
> >
> > On Wed, 2 Nov 2022, Aldy Hernandez wrote:
> >
> > > On Wed, Nov 2, 2022 at 10:55 AM Tamar Christina
> > <Tamar.Christina@arm.com> wrote:
> > > >
> > > > Hi Aldy,
> > > >
> > > > I'm trying to use Ranger to determine if a range of an expression is a
> > single bit.
> > > >
> > > > If possible in case of a mask then also the position of the bit that's being
> > checked by the mask (or the mask itself).
> > >
> > > Just instantiate a ranger, and ask for the range of an SSA name (or an
> > > arbitrary tree expression) at a particular gimple statement (or an
> > > edge):
> > >
> > > gimple_ranger ranger;
> > > int_range_max r;
> > > if (ranger.range_of_expr (r, <SSA_NAME>, <STMT>)) {
> > >   // do stuff with range "r"
> > >   if (r.singleton_p ()) {
> > >     wide_int num = r.lower_bound ();
> > >     // Check the bits in NUM, etc...
> > >   }
> > > }
> > >
> > > You can see the full ranger API in gimple-range.h.
> > >
> > > Note that instantiating a new ranger is relatively lightweight, but
> > > it's not free.  So unless you're calling range_of_expr sporadically,
> > > you probably want to have one instance for your pass.  You can pass
> > > around the gimple_ranger around your pass.  Another way of doing this
> > > is calling enable_rager() at pass start, and then doing:
> > >
> > >   get_range_query (cfun)->range_of_expr (r, <SSA_NAME>, <STMT>));
> > >
> > > gimple-loop-versioning.cc has an example of using enable_ranger /
> > > disable_ranger.
> > >
> > > I am assuming you are interested in ranges for integers / pointers.
> > > Otherwise (floats, etc) you'd have to use "Value_Range" instead of
> > > int_range_max.  I can give you examples on that if necessary.
> > >
> > > Let me know if that helps.
>
> It Did! I ended up going with Richi's suggestion, but the snippet was very helpful
> for a different range based patch I'm trying a prototype for.
>
> Many thanks for the example!
>
> >
> > I think you maybe just want get_nonzero_bits?
>
> Ah, looks like that uses range info as well.  Thanks!
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>         * dojump.cc (do_jump): Pass along value.
>         (do_jump_by_parts_greater_rtx): Likewise.
>         (do_jump_by_parts_zero_rtx): Likewise.
>         (do_jump_by_parts_equality_rtx): Likewise.
>         (do_compare_rtx_and_jump): Likewise.
>         (do_compare_and_jump): Likewise.
>         * dojump.h (do_compare_rtx_and_jump): New.
>         * optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
>         (validate_test_and_branch): New.
>         (emit_cmp_and_jump_insns): Optiobally take a value, and when value is
>         supplied then check if it's suitable for tbranch.
>         * optabs.def (tbranch$a4): New.
>         * doc/md.texi (tbranch@var{mode}4): Document it.
>         * optabs.h (emit_cmp_and_jump_insns):
>         * tree.h (tree_zero_one_valued_p): New.
>
> --- inline copy of patch ---
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 34825549ed4e315b07d36dc3d63bae0cc0a3932d..342e8c4c670de251a35689d1805acceb72a8f6bf 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -6958,6 +6958,13 @@ case, you can and should make operand 1's predicate reject some operators
>  in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
>  from the machine description.
>
> +@cindex @code{tbranch@var{mode}4} instruction pattern
> +@item @samp{tbranch@var{mode}4}
> +Conditional branch instruction combined with a bit test-and-compare
> +instruction. Operand 0 is a comparison operator.  Operand 1 is the
> +operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
> +Operand 3 is the @code{code_label} to jump to.
> +
>  @cindex @code{cbranch@var{mode}4} instruction pattern
>  @item @samp{cbranch@var{mode}4}
>  Conditional branch instruction combined with a compare instruction.
> diff --git a/gcc/dojump.h b/gcc/dojump.h
> index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644
> --- a/gcc/dojump.h
> +++ b/gcc/dojump.h
> @@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
>  extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
>                          profile_probability);
>
> +extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
> +                                    machine_mode, rtx, rtx_code_label *,
> +                                    rtx_code_label *, profile_probability);
> +
>  extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
>                                      machine_mode, rtx, rtx_code_label *,
>                                      rtx_code_label *, profile_probability);
> diff --git a/gcc/dojump.cc b/gcc/dojump.cc
> index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644
> --- a/gcc/dojump.cc
> +++ b/gcc/dojump.cc
> @@ -619,7 +619,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
>         }
>        do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
>                                NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
> -                              GET_MODE (temp), NULL_RTX,
> +                              exp, GET_MODE (temp), NULL_RTX,
>                                if_false_label, if_true_label, prob);
>      }
>
> @@ -687,7 +687,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
>
>        /* All but high-order word must be compared as unsigned.  */
>        do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
> -                              word_mode, NULL_RTX, NULL, if_true_label,
> +                              NULL, word_mode, NULL_RTX, NULL, if_true_label,
>                                prob);
>
>        /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
> @@ -695,8 +695,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
>         break;
>
>        /* Consider lower words only if these are equal.  */
> -      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
> -                              NULL_RTX, NULL, if_false_label,
> +      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
> +                              word_mode, NULL_RTX, NULL, if_false_label,
>                                prob.invert ());
>      }
>
> @@ -755,7 +755,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
>
>    if (part != 0)
>      {
> -      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
> +      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
>                                NULL_RTX, if_false_label, if_true_label, prob);
>        return;
>      }
> @@ -766,7 +766,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
>
>    for (i = 0; i < nwords; i++)
>      do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
> -                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
> +                            const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
>                              if_false_label, NULL, prob);
>
>    if (if_true_label)
> @@ -809,8 +809,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
>
>    for (i = 0; i < nwords; i++)
>      do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
> -                             operand_subword_force (op1, i, mode),
> -                             EQ, 0, word_mode, NULL_RTX,
> +                            operand_subword_force (op1, i, mode),
> +                            EQ, 0, NULL, word_mode, NULL_RTX,
>                              if_false_label, NULL, prob);
>
>    if (if_true_label)
> @@ -962,6 +962,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>                          rtx_code_label *if_false_label,
>                          rtx_code_label *if_true_label,
>                          profile_probability prob)
> +{
> +  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
> +                         if_false_label, if_true_label, prob);
> +}
> +
> +/* Like do_compare_and_jump but expects the values to compare as two rtx's.
> +   The decision as to signed or unsigned comparison must be made by the caller.
> +
> +   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
> +   compared.  */
> +
> +void
> +do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
> +                        tree val, machine_mode mode, rtx size,
> +                        rtx_code_label *if_false_label,
> +                        rtx_code_label *if_true_label,
> +                        profile_probability prob)
>  {
>    rtx tem;
>    rtx_code_label *dummy_label = NULL;
> @@ -1177,8 +1194,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>                     }
>                   else
>                     dest_label = if_false_label;
> -                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
> -                                          size, dest_label, NULL, first_prob);
> +
> +                 do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
> +                                          val, mode, size, dest_label, NULL,
> +                                          first_prob);
>                 }
>               /* For !and_them we want to split:
>                  if (x) goto t; // prob;
> @@ -1192,8 +1211,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>                else
>                 {
>                   profile_probability first_prob = prob.split (cprob);
> -                 do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
> -                                          size, NULL, if_true_label, first_prob);
> +                 do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
> +                                          val, mode, size, NULL,
> +                                          if_true_label, first_prob);
>                   if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
>                     {
>                       /* x != y can be split into x unord y || x ltgt y
> @@ -1215,7 +1235,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>             }
>         }
>
> -      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
> +      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
>                                if_true_label, prob);
>      }
>
> @@ -1289,9 +1309,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
>        op1 = new_op1;
>      }
>
> -  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
> -                           ((mode == BLKmode)
> -                            ? expr_size (treeop0) : NULL_RTX),
> +  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
> +                          ((mode == BLKmode)
> +                           ? expr_size (treeop0) : NULL_RTX),
>                            if_false_label, if_true_label, prob);
>  }
>
> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> index f338df410265dfe55b6896160090a453cc6a28d9..0f662ebdb818d7538bdd13fb02bcf8bcf1dbab64 100644
> --- a/gcc/optabs.cc
> +++ b/gcc/optabs.cc
> @@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "libfuncs.h"
>  #include "internal-fn.h"
>  #include "langhooks.h"
> +#include "gimple.h"
> +#include "ssa.h"
>
>  static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
>                                    machine_mode *);
> @@ -4620,7 +4622,7 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
>
>  static void
>  emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
> -                         profile_probability prob)
> +                         direct_optab cmp_optab, profile_probability prob)
>  {
>    machine_mode optab_mode;
>    enum mode_class mclass;
> @@ -4629,7 +4631,7 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>
>    mclass = GET_MODE_CLASS (mode);
>    optab_mode = (mclass == MODE_CC) ? CCmode : mode;
> -  icode = optab_handler (cbranch_optab, optab_mode);
> +  icode = optab_handler (cmp_optab, optab_mode);
>
>    gcc_assert (icode != CODE_FOR_nothing);
>    gcc_assert (insn_operand_matches (icode, 0, test));
> @@ -4644,6 +4646,56 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>      add_reg_br_prob_note (insn, prob);
>  }
>
> +/* Check to see if the supplied comparison in PTEST can be performed as a
> +   bit-test-and-branch instead.  VAL must contain the original tree
> +   expression of the non-zero operand which will be used to rewrite the
> +   comparison in PTEST.
> +
> +   Returns TRUE if operation succeeds and returns updated PMODE and PTEST,
> +   else FALSE.  */
> +
> +enum insn_code
> +static validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode)
> +{
> +  if (!val || TREE_CODE (val) != SSA_NAME)
> +    return CODE_FOR_nothing;
> +
> +  machine_mode mode = TYPE_MODE (TREE_TYPE (val));
> +  rtx test = *ptest;
> +
> +  if (GET_CODE (test) != EQ && GET_CODE (test) != NE)
> +    return CODE_FOR_nothing;
> +
> +  /* If the target supports the testbit comparison directly, great.  */
> +  auto icode = direct_optab_handler (tbranch_optab, mode);
> +  if (icode == CODE_FOR_nothing)
> +    return icode;
> +
> +  if (tree_zero_one_valued_p (val))
> +    {
> +      auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;

Does this work for BYTES_BIG_ENDIAN && !WORDS_BIG_ENDIAN and mode > word_mode?

> +      XEXP (test, 1) = gen_int_mode (pos, mode);
> +      *ptest = test;
> +      *pmode = mode;
> +      return icode;
> +    }
> +
> +  wide_int wcst = get_nonzero_bits (val);
> +  if (wcst == -1)
> +    return CODE_FOR_nothing;
> +
> +  int bitpos;
> +
> +  if ((bitpos = wi::exact_log2 (wcst)) == -1)
> +    return CODE_FOR_nothing;
> +
> +  auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
> +  XEXP (test, 1) = gen_int_mode (pos, mode);
> +  *ptest = test;
> +  *pmode = mode;
> +  return icode;
> +}
> +
>  /* Generate code to compare X with Y so that the condition codes are
>     set and to jump to LABEL if the condition is true.  If X is a
>     constant and Y is not a constant, then the comparison is swapped to
> @@ -4661,11 +4713,13 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>     It will be potentially converted into an unsigned variant based on
>     UNSIGNEDP to select a proper jump instruction.
>
> -   PROB is the probability of jumping to LABEL.  */
> +   PROB is the probability of jumping to LABEL.  If the comparison is against
> +   zero then VAL contains the expression from which the non-zero RTL is
> +   derived.  */
>
>  void
>  emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
> -                        machine_mode mode, int unsignedp, rtx label,
> +                        machine_mode mode, int unsignedp, tree val, rtx label,
>                           profile_probability prob)
>  {
>    rtx op0 = x, op1 = y;
> @@ -4690,10 +4744,32 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
>
>    prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
>                     &test, &mode);
> -  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
> +
> +  /* Check if we're comparing a truth type with 0, and if so check if
> +     the target supports tbranch.  */
> +  machine_mode tmode = mode;
> +  if (op1 == CONST0_RTX (GET_MODE (op1))
> +      && validate_test_and_branch (val, &test, &tmode) != CODE_FOR_nothing)
> +    {
> +      emit_cmp_and_jump_insn_1 (test, tmode, label, tbranch_optab, prob);
> +      return;
> +    }
> +
> +  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob);
>  }
>
> -
>
> +/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
> +
> +void
> +emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
> +                        machine_mode mode, int unsignedp, rtx label,
> +                        profile_probability prob)
> +{
> +  emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
> +                          label, prob);
> +}
> +
> +
>  /* Emit a library call comparison between floating point X and Y.
>     COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).  */
>
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index a6db2342bed6baf13ecbd84112c8432c6972e6fe..56e37d67231e1ba74ad6c5b81d74a65f315e26e2 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -220,6 +220,7 @@ OPTAB_D (reload_in_optab, "reload_in$a")
>  OPTAB_D (reload_out_optab, "reload_out$a")
>
>  OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
> +OPTAB_D (tbranch_optab, "tbranch$a4")
>  OPTAB_D (addcc_optab, "add$acc")
>  OPTAB_D (negcc_optab, "neg$acc")
>  OPTAB_D (notcc_optab, "not$acc")
> diff --git a/gcc/optabs.h b/gcc/optabs.h
> index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644
> --- a/gcc/optabs.h
> +++ b/gcc/optabs.h
> @@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
>                                      machine_mode, int, rtx,
>                                      profile_probability prob
>                                         = profile_probability::uninitialized ());
> +extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
> +                                    machine_mode, int, tree, rtx,
> +                                    profile_probability prob
> +                                       = profile_probability::uninitialized ());
>
>  /* Generate code to indirectly jump to a location given in the rtx LOC.  */
>  extern void emit_indirect_jump (rtx);
> diff --git a/gcc/tree.h b/gcc/tree.h
> index e6564aaccb7b69cd938ff60b6121aec41b7e8a59..f455008ceb8d91e7e073c0ad6d93dcaed65deccf 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -4690,6 +4690,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
>  extern tree signed_type_for (tree);
>  extern tree unsigned_type_for (tree);
>  extern bool is_truth_type_for (tree, tree);
> +extern bool tree_zero_one_valued_p (tree);
>  extern tree truth_type_for (tree);
>  extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
>  extern tree build_pointer_type (tree);

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/2]AArch64 Support new tbranch optab.
  2022-11-14 15:58   ` Tamar Christina
@ 2022-11-15 10:36     ` Richard Sandiford
  2022-11-15 10:42       ` Tamar Christina
  0 siblings, 1 reply; 33+ messages in thread
From: Richard Sandiford @ 2022-11-15 10:36 UTC (permalink / raw)
  To: Tamar Christina; +Cc: gcc-patches, Richard Earnshaw, nd, Marcus Shawcroft

Tamar Christina <Tamar.Christina@arm.com> writes:
> Hello,
>
> Ping and updated patch.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>         * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
>         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
>         (tbranch<mode>4): New.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/aarch64/tbz_1.c: New test.
>
> --- inline copy of patch ---
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd712bde55c7c72e 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
>                       (const_int 1)))]
>  )
>
> -(define_insn "*tb<optab><mode>1"
> +(define_expand "tbranch<mode>4"
>    [(set (pc) (if_then_else
> -             (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
> -                                   (const_int 1)
> -                                   (match_operand 1
> -                                     "aarch64_simd_shift_imm_<mode>" "n"))
> +               (match_operator 0 "aarch64_comparison_operator"
> +                [(match_operand:ALLI 1 "register_operand")
> +                 (match_operand:ALLI 2 "aarch64_simd_shift_imm_<ALLI:mode>")])
> +               (label_ref (match_operand 3 "" ""))
> +               (pc)))]
> +  "optimize > 0"

Why's the pattern conditional on optimize?  Seems a valid choice at -O0 too.

I think the split here shows the difficulty with having a single optab
and a comparison operator though.  operand 0 can be something like:

  (eq x 1)

but we're not comparing x for equality with 1.  We're testing whether
bit 1 is zero.  This means that operand 0 can't be taken literally
and can't be used directly in insn patterns.

In an earlier review, I'd said:

  For the TB instructions (and for other similar instructions that I've
  seen on other architectures) it would be more useful to have a single-bit
  test, with operand 4 specifying the bit position.  Arguably it might then
  be better to have separate eq and ne optabs, to avoid the awkward doubling
  of the operands (operand 1 contains operands 2 and 3).

I think we should do that eq/ne split (sorry for not pushing harder for
it before).

Thanks,
Richard



> +{
> +  rtx bitvalue = gen_reg_rtx (DImode);
> +  rtx tmp = simplify_gen_subreg (DImode, operands[1], GET_MODE (operands[1]), 0);
> +  emit_insn (gen_extzv (bitvalue, tmp, const1_rtx, operands[2]));
> +  operands[2] = const0_rtx;
> +  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), bitvalue,
> +                                        operands[2]);
> +})
> +
> +(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
> +  [(set (pc) (if_then_else
> +             (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
> +                                    (const_int 1)
> +                                    (match_operand 1
> +                                      "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
>                    (const_int 0))
>              (label_ref (match_operand 2 "" ""))
>              (pc)))
> @@ -959,15 +976,15 @@ (define_insn "*tb<optab><mode>1"
>        {
>         if (get_attr_far_branch (insn) == 1)
>           return aarch64_gen_far_branch (operands, 2, "Ltb",
> -                                        "<inv_tb>\\t%<w>0, %1, ");
> +                                        "<inv_tb>\\t%<ALLI:w>0, %1, ");
>         else
>           {
>             operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
> -           return "tst\t%<w>0, %1\;<bcond>\t%l2";
> +           return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
>           }
>        }
>      else
> -      return "<tbz>\t%<w>0, %1, %l2";
> +      return "<tbz>\t%<ALLI:w>0, %1, %l2";
>    }
>    [(set_attr "type" "branch")
>     (set (attr "length")
> diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549ce1a0cff6774463
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> @@ -0,0 +1,95 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
> +
> +#include <stdbool.h>
> +
> +void h(void);
> +
> +/*
> +** g1:
> +**     tbnz    x[0-9]+, #?0, .L([0-9]+)
> +**     ret
> +**     ...
> +*/
> +void g1(bool x)
> +{
> +  if (__builtin_expect (x, 0))
> +    h ();
> +}
> +
> +/*
> +** g2:
> +**     tbz     x[0-9]+, #?0, .L([0-9]+)
> +**     b       h
> +**     ...
> +*/
> +void g2(bool x)
> +{
> +  if (__builtin_expect (x, 1))
> +    h ();
> +}
> +
> +/*
> +** g3_ge:
> +**     tbnz    w[0-9]+, #?31, .L[0-9]+
> +**     b       h
> +**     ...
> +*/
> +void g3_ge(int x)
> +{
> +  if (__builtin_expect (x >= 0, 1))
> +    h ();
> +}
> +
> +/*
> +** g3_gt:
> +**     cmp     w[0-9]+, 0
> +**     ble     .L[0-9]+
> +**     b       h
> +**     ...
> +*/
> +void g3_gt(int x)
> +{
> +  if (__builtin_expect (x > 0, 1))
> +    h ();
> +}
> +
> +/*
> +** g3_lt:
> +**     tbz     w[0-9]+, #?31, .L[0-9]+
> +**     b       h
> +**     ...
> +*/
> +void g3_lt(int x)
> +{
> +  if (__builtin_expect (x < 0, 1))
> +    h ();
> +}
> +
> +/*
> +** g3_le:
> +**     cmp     w[0-9]+, 0
> +**     bgt     .L[0-9]+
> +**     b       h
> +**     ...
> +*/
> +void g3_le(int x)
> +{
> +  if (__builtin_expect (x <= 0, 1))
> +    h ();
> +}
> +
> +/*
> +** g5:
> +**     mov     w[0-9]+, 65279
> +**     tst     w[0-9]+, w[0-9]+
> +**     beq     .L[0-9]+
> +**     b       h
> +**     ...
> +*/
> +void g5(int x)
> +{
> +  if (__builtin_expect (x & 0xfeff, 1))
> +    h ();
> +}

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH 2/2]AArch64 Support new tbranch optab.
  2022-11-15 10:36     ` Richard Sandiford
@ 2022-11-15 10:42       ` Tamar Christina
  2022-11-15 10:50         ` Richard Sandiford
  0 siblings, 1 reply; 33+ messages in thread
From: Tamar Christina @ 2022-11-15 10:42 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: gcc-patches, Richard Earnshaw, nd, Marcus Shawcroft

> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Tuesday, November 15, 2022 10:36 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> <Marcus.Shawcroft@arm.com>
> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> 
> Tamar Christina <Tamar.Christina@arm.com> writes:
> > Hello,
> >
> > Ping and updated patch.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> >         * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
> >         (tbranch<mode>4): New.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/aarch64/tbz_1.c: New test.
> >
> > --- inline copy of patch ---
> >
> > diff --git a/gcc/config/aarch64/aarch64.md
> > b/gcc/config/aarch64/aarch64.md index
> >
> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
> 71
> > 2bde55c7c72e 100644
> > --- a/gcc/config/aarch64/aarch64.md
> > +++ b/gcc/config/aarch64/aarch64.md
> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
> >                       (const_int 1)))]
> >  )
> >
> > -(define_insn "*tb<optab><mode>1"
> > +(define_expand "tbranch<mode>4"
> >    [(set (pc) (if_then_else
> > -             (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand"
> "r")
> > -                                   (const_int 1)
> > -                                   (match_operand 1
> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
> > +               (match_operator 0 "aarch64_comparison_operator"
> > +                [(match_operand:ALLI 1 "register_operand")
> > +                 (match_operand:ALLI 2
> "aarch64_simd_shift_imm_<ALLI:mode>")])
> > +               (label_ref (match_operand 3 "" ""))
> > +               (pc)))]
> > +  "optimize > 0"
> 
> Why's the pattern conditional on optimize?  Seems a valid choice at -O0 too.
> 

Hi,

I had explained the reason why in the original patch, just didn't repeat it in the ping:

Instead of emitting the instruction directly I've chosen to expand the pattern using a zero extract and generating the existing pattern for comparisons for two
reasons:

  1. Allows for CSE of the actual comparison.
  2. It looks like the code in expand makes the label as unused and removed it
     if it doesn't see a separate reference to it.

Because of this expansion though I disable the pattern at -O0 since we have no combine in that case so we'd end up with worse code.  I did try emitting the pattern directly, but as mentioned in no#2 expand would then kill the label.

Basically I emit the pattern directly, immediately during expand the label is marked as dead for some weird reason.

Tamar.

> I think the split here shows the difficulty with having a single optab and a
> comparison operator though.  operand 0 can be something like:
> 
>   (eq x 1)
> 
> but we're not comparing x for equality with 1.  We're testing whether bit 1 is
> zero.  This means that operand 0 can't be taken literally and can't be used
> directly in insn patterns.
> 
> In an earlier review, I'd said:
> 
>   For the TB instructions (and for other similar instructions that I've
>   seen on other architectures) it would be more useful to have a single-bit
>   test, with operand 4 specifying the bit position.  Arguably it might then
>   be better to have separate eq and ne optabs, to avoid the awkward
> doubling
>   of the operands (operand 1 contains operands 2 and 3).
> 
> I think we should do that eq/ne split (sorry for not pushing harder for it
> before).
> 
> Thanks,
> Richard
> 
> 
> 
> > +{
> > +  rtx bitvalue = gen_reg_rtx (DImode);
> > +  rtx tmp = simplify_gen_subreg (DImode, operands[1], GET_MODE
> > +(operands[1]), 0);
> > +  emit_insn (gen_extzv (bitvalue, tmp, const1_rtx, operands[2]));
> > +  operands[2] = const0_rtx;
> > +  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]),
> bitvalue,
> > +                                        operands[2]);
> > +})
> > +
> > +(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
> > +  [(set (pc) (if_then_else
> > +             (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand"
> "r")
> > +                                    (const_int 1)
> > +                                    (match_operand 1
> > +
> > +"aarch64_simd_shift_imm_<ALLI:mode>" "n"))
> >                    (const_int 0))
> >              (label_ref (match_operand 2 "" ""))
> >              (pc)))
> > @@ -959,15 +976,15 @@ (define_insn "*tb<optab><mode>1"
> >        {
> >         if (get_attr_far_branch (insn) == 1)
> >           return aarch64_gen_far_branch (operands, 2, "Ltb",
> > -                                        "<inv_tb>\\t%<w>0, %1, ");
> > +                                        "<inv_tb>\\t%<ALLI:w>0, %1,
> > + ");
> >         else
> >           {
> >             operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL
> (operands[1]));
> > -           return "tst\t%<w>0, %1\;<bcond>\t%l2";
> > +           return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
> >           }
> >        }
> >      else
> > -      return "<tbz>\t%<w>0, %1, %l2";
> > +      return "<tbz>\t%<ALLI:w>0, %1, %l2";
> >    }
> >    [(set_attr "type" "branch")
> >     (set (attr "length")
> > diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> > b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> > new file mode 100644
> > index
> >
> 0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549c
> e1
> > a0cff6774463
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> > @@ -0,0 +1,95 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables
> > +-fno-asynchronous-unwind-tables" } */
> > +/* { dg-final { check-function-bodies "**" "" "" { target { le } } }
> > +} */
> > +
> > +#include <stdbool.h>
> > +
> > +void h(void);
> > +
> > +/*
> > +** g1:
> > +**     tbnz    x[0-9]+, #?0, .L([0-9]+)
> > +**     ret
> > +**     ...
> > +*/
> > +void g1(bool x)
> > +{
> > +  if (__builtin_expect (x, 0))
> > +    h ();
> > +}
> > +
> > +/*
> > +** g2:
> > +**     tbz     x[0-9]+, #?0, .L([0-9]+)
> > +**     b       h
> > +**     ...
> > +*/
> > +void g2(bool x)
> > +{
> > +  if (__builtin_expect (x, 1))
> > +    h ();
> > +}
> > +
> > +/*
> > +** g3_ge:
> > +**     tbnz    w[0-9]+, #?31, .L[0-9]+
> > +**     b       h
> > +**     ...
> > +*/
> > +void g3_ge(int x)
> > +{
> > +  if (__builtin_expect (x >= 0, 1))
> > +    h ();
> > +}
> > +
> > +/*
> > +** g3_gt:
> > +**     cmp     w[0-9]+, 0
> > +**     ble     .L[0-9]+
> > +**     b       h
> > +**     ...
> > +*/
> > +void g3_gt(int x)
> > +{
> > +  if (__builtin_expect (x > 0, 1))
> > +    h ();
> > +}
> > +
> > +/*
> > +** g3_lt:
> > +**     tbz     w[0-9]+, #?31, .L[0-9]+
> > +**     b       h
> > +**     ...
> > +*/
> > +void g3_lt(int x)
> > +{
> > +  if (__builtin_expect (x < 0, 1))
> > +    h ();
> > +}
> > +
> > +/*
> > +** g3_le:
> > +**     cmp     w[0-9]+, 0
> > +**     bgt     .L[0-9]+
> > +**     b       h
> > +**     ...
> > +*/
> > +void g3_le(int x)
> > +{
> > +  if (__builtin_expect (x <= 0, 1))
> > +    h ();
> > +}
> > +
> > +/*
> > +** g5:
> > +**     mov     w[0-9]+, 65279
> > +**     tst     w[0-9]+, w[0-9]+
> > +**     beq     .L[0-9]+
> > +**     b       h
> > +**     ...
> > +*/
> > +void g5(int x)
> > +{
> > +  if (__builtin_expect (x & 0xfeff, 1))
> > +    h ();
> > +}

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/2]AArch64 Support new tbranch optab.
  2022-11-15 10:42       ` Tamar Christina
@ 2022-11-15 10:50         ` Richard Sandiford
  2022-11-15 11:00           ` Tamar Christina
  0 siblings, 1 reply; 33+ messages in thread
From: Richard Sandiford @ 2022-11-15 10:50 UTC (permalink / raw)
  To: Tamar Christina; +Cc: gcc-patches, Richard Earnshaw, nd, Marcus Shawcroft

Tamar Christina <Tamar.Christina@arm.com> writes:
>> -----Original Message-----
>> From: Richard Sandiford <richard.sandiford@arm.com>
>> Sent: Tuesday, November 15, 2022 10:36 AM
>> To: Tamar Christina <Tamar.Christina@arm.com>
>> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
>> <Marcus.Shawcroft@arm.com>
>> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> 
>> Tamar Christina <Tamar.Christina@arm.com> writes:
>> > Hello,
>> >
>> > Ping and updated patch.
>> >
>> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>> >
>> > Ok for master?
>> >
>> > Thanks,
>> > Tamar
>> >
>> > gcc/ChangeLog:
>> >
>> >         * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
>> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
>> >         (tbranch<mode>4): New.
>> >
>> > gcc/testsuite/ChangeLog:
>> >
>> >         * gcc.target/aarch64/tbz_1.c: New test.
>> >
>> > --- inline copy of patch ---
>> >
>> > diff --git a/gcc/config/aarch64/aarch64.md
>> > b/gcc/config/aarch64/aarch64.md index
>> >
>> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
>> 71
>> > 2bde55c7c72e 100644
>> > --- a/gcc/config/aarch64/aarch64.md
>> > +++ b/gcc/config/aarch64/aarch64.md
>> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
>> >                       (const_int 1)))]
>> >  )
>> >
>> > -(define_insn "*tb<optab><mode>1"
>> > +(define_expand "tbranch<mode>4"
>> >    [(set (pc) (if_then_else
>> > -             (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand"
>> "r")
>> > -                                   (const_int 1)
>> > -                                   (match_operand 1
>> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
>> > +               (match_operator 0 "aarch64_comparison_operator"
>> > +                [(match_operand:ALLI 1 "register_operand")
>> > +                 (match_operand:ALLI 2
>> "aarch64_simd_shift_imm_<ALLI:mode>")])
>> > +               (label_ref (match_operand 3 "" ""))
>> > +               (pc)))]
>> > +  "optimize > 0"
>> 
>> Why's the pattern conditional on optimize?  Seems a valid choice at -O0 too.
>> 
>
> Hi,
>
> I had explained the reason why in the original patch, just didn't repeat it in the ping:
>
> Instead of emitting the instruction directly I've chosen to expand the pattern using a zero extract and generating the existing pattern for comparisons for two
> reasons:
>
>   1. Allows for CSE of the actual comparison.
>   2. It looks like the code in expand makes the label as unused and removed it
>      if it doesn't see a separate reference to it.
>
> Because of this expansion though I disable the pattern at -O0 since we have no combine in that case so we'd end up with worse code.  I did try emitting the pattern directly, but as mentioned in no#2 expand would then kill the label.
>
> Basically I emit the pattern directly, immediately during expand the label is marked as dead for some weird reason.

Isn't #2 a bug though?  It seems like something we should fix rather than
work around.

Thanks,
Richard


>
> Tamar.
>
>> I think the split here shows the difficulty with having a single optab and a
>> comparison operator though.  operand 0 can be something like:
>> 
>>   (eq x 1)
>> 
>> but we're not comparing x for equality with 1.  We're testing whether bit 1 is
>> zero.  This means that operand 0 can't be taken literally and can't be used
>> directly in insn patterns.
>> 
>> In an earlier review, I'd said:
>> 
>>   For the TB instructions (and for other similar instructions that I've
>>   seen on other architectures) it would be more useful to have a single-bit
>>   test, with operand 4 specifying the bit position.  Arguably it might then
>>   be better to have separate eq and ne optabs, to avoid the awkward
>> doubling
>>   of the operands (operand 1 contains operands 2 and 3).
>> 
>> I think we should do that eq/ne split (sorry for not pushing harder for it
>> before).
>> 
>> Thanks,
>> Richard
>> 
>> 
>> 
>> > +{
>> > +  rtx bitvalue = gen_reg_rtx (DImode);
>> > +  rtx tmp = simplify_gen_subreg (DImode, operands[1], GET_MODE
>> > +(operands[1]), 0);
>> > +  emit_insn (gen_extzv (bitvalue, tmp, const1_rtx, operands[2]));
>> > +  operands[2] = const0_rtx;
>> > +  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]),
>> bitvalue,
>> > +                                        operands[2]);
>> > +})
>> > +
>> > +(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
>> > +  [(set (pc) (if_then_else
>> > +             (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand"
>> "r")
>> > +                                    (const_int 1)
>> > +                                    (match_operand 1
>> > +
>> > +"aarch64_simd_shift_imm_<ALLI:mode>" "n"))
>> >                    (const_int 0))
>> >              (label_ref (match_operand 2 "" ""))
>> >              (pc)))
>> > @@ -959,15 +976,15 @@ (define_insn "*tb<optab><mode>1"
>> >        {
>> >         if (get_attr_far_branch (insn) == 1)
>> >           return aarch64_gen_far_branch (operands, 2, "Ltb",
>> > -                                        "<inv_tb>\\t%<w>0, %1, ");
>> > +                                        "<inv_tb>\\t%<ALLI:w>0, %1,
>> > + ");
>> >         else
>> >           {
>> >             operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL
>> (operands[1]));
>> > -           return "tst\t%<w>0, %1\;<bcond>\t%l2";
>> > +           return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
>> >           }
>> >        }
>> >      else
>> > -      return "<tbz>\t%<w>0, %1, %l2";
>> > +      return "<tbz>\t%<ALLI:w>0, %1, %l2";
>> >    }
>> >    [(set_attr "type" "branch")
>> >     (set (attr "length")
>> > diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c
>> > b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
>> > new file mode 100644
>> > index
>> >
>> 0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549c
>> e1
>> > a0cff6774463
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
>> > @@ -0,0 +1,95 @@
>> > +/* { dg-do compile } */
>> > +/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables
>> > +-fno-asynchronous-unwind-tables" } */
>> > +/* { dg-final { check-function-bodies "**" "" "" { target { le } } }
>> > +} */
>> > +
>> > +#include <stdbool.h>
>> > +
>> > +void h(void);
>> > +
>> > +/*
>> > +** g1:
>> > +**     tbnz    x[0-9]+, #?0, .L([0-9]+)
>> > +**     ret
>> > +**     ...
>> > +*/
>> > +void g1(bool x)
>> > +{
>> > +  if (__builtin_expect (x, 0))
>> > +    h ();
>> > +}
>> > +
>> > +/*
>> > +** g2:
>> > +**     tbz     x[0-9]+, #?0, .L([0-9]+)
>> > +**     b       h
>> > +**     ...
>> > +*/
>> > +void g2(bool x)
>> > +{
>> > +  if (__builtin_expect (x, 1))
>> > +    h ();
>> > +}
>> > +
>> > +/*
>> > +** g3_ge:
>> > +**     tbnz    w[0-9]+, #?31, .L[0-9]+
>> > +**     b       h
>> > +**     ...
>> > +*/
>> > +void g3_ge(int x)
>> > +{
>> > +  if (__builtin_expect (x >= 0, 1))
>> > +    h ();
>> > +}
>> > +
>> > +/*
>> > +** g3_gt:
>> > +**     cmp     w[0-9]+, 0
>> > +**     ble     .L[0-9]+
>> > +**     b       h
>> > +**     ...
>> > +*/
>> > +void g3_gt(int x)
>> > +{
>> > +  if (__builtin_expect (x > 0, 1))
>> > +    h ();
>> > +}
>> > +
>> > +/*
>> > +** g3_lt:
>> > +**     tbz     w[0-9]+, #?31, .L[0-9]+
>> > +**     b       h
>> > +**     ...
>> > +*/
>> > +void g3_lt(int x)
>> > +{
>> > +  if (__builtin_expect (x < 0, 1))
>> > +    h ();
>> > +}
>> > +
>> > +/*
>> > +** g3_le:
>> > +**     cmp     w[0-9]+, 0
>> > +**     bgt     .L[0-9]+
>> > +**     b       h
>> > +**     ...
>> > +*/
>> > +void g3_le(int x)
>> > +{
>> > +  if (__builtin_expect (x <= 0, 1))
>> > +    h ();
>> > +}
>> > +
>> > +/*
>> > +** g5:
>> > +**     mov     w[0-9]+, 65279
>> > +**     tst     w[0-9]+, w[0-9]+
>> > +**     beq     .L[0-9]+
>> > +**     b       h
>> > +**     ...
>> > +*/
>> > +void g5(int x)
>> > +{
>> > +  if (__builtin_expect (x & 0xfeff, 1))
>> > +    h ();
>> > +}

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH 2/2]AArch64 Support new tbranch optab.
  2022-11-15 10:50         ` Richard Sandiford
@ 2022-11-15 11:00           ` Tamar Christina
  2022-11-15 11:14             ` Richard Sandiford
  0 siblings, 1 reply; 33+ messages in thread
From: Tamar Christina @ 2022-11-15 11:00 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: gcc-patches, Richard Earnshaw, nd, Marcus Shawcroft

> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Tuesday, November 15, 2022 10:51 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> <Marcus.Shawcroft@arm.com>
> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> 
> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> -----Original Message-----
> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> Sent: Tuesday, November 15, 2022 10:36 AM
> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> >> <Marcus.Shawcroft@arm.com>
> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >>
> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> > Hello,
> >> >
> >> > Ping and updated patch.
> >> >
> >> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >> >
> >> > Ok for master?
> >> >
> >> > Thanks,
> >> > Tamar
> >> >
> >> > gcc/ChangeLog:
> >> >
> >> >         * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
> >> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
> >> >         (tbranch<mode>4): New.
> >> >
> >> > gcc/testsuite/ChangeLog:
> >> >
> >> >         * gcc.target/aarch64/tbz_1.c: New test.
> >> >
> >> > --- inline copy of patch ---
> >> >
> >> > diff --git a/gcc/config/aarch64/aarch64.md
> >> > b/gcc/config/aarch64/aarch64.md index
> >> >
> >>
> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
> >> 71
> >> > 2bde55c7c72e 100644
> >> > --- a/gcc/config/aarch64/aarch64.md
> >> > +++ b/gcc/config/aarch64/aarch64.md
> >> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
> >> >                       (const_int 1)))]
> >> >  )
> >> >
> >> > -(define_insn "*tb<optab><mode>1"
> >> > +(define_expand "tbranch<mode>4"
> >> >    [(set (pc) (if_then_else
> >> > -             (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand"
> >> "r")
> >> > -                                   (const_int 1)
> >> > -                                   (match_operand 1
> >> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
> >> > +               (match_operator 0 "aarch64_comparison_operator"
> >> > +                [(match_operand:ALLI 1 "register_operand")
> >> > +                 (match_operand:ALLI 2
> >> "aarch64_simd_shift_imm_<ALLI:mode>")])
> >> > +               (label_ref (match_operand 3 "" ""))
> >> > +               (pc)))]
> >> > +  "optimize > 0"
> >>
> >> Why's the pattern conditional on optimize?  Seems a valid choice at -O0
> too.
> >>
> >
> > Hi,
> >
> > I had explained the reason why in the original patch, just didn't repeat it in
> the ping:
> >
> > Instead of emitting the instruction directly I've chosen to expand the
> > pattern using a zero extract and generating the existing pattern for
> > comparisons for two
> > reasons:
> >
> >   1. Allows for CSE of the actual comparison.
> >   2. It looks like the code in expand makes the label as unused and removed
> it
> >      if it doesn't see a separate reference to it.
> >
> > Because of this expansion though I disable the pattern at -O0 since we
> have no combine in that case so we'd end up with worse code.  I did try
> emitting the pattern directly, but as mentioned in no#2 expand would then
> kill the label.
> >
> > Basically I emit the pattern directly, immediately during expand the label is
> marked as dead for some weird reason.
> 
> Isn't #2 a bug though?  It seems like something we should fix rather than
> work around.

Yes it's a bug ☹ ok if I'm going to fix that bug then do I need to split the optabs
still? Isn't the problem atm that I need the split?  If I'm emitting the instruction
directly then the recog pattern for it can just be (eq (vec_extract x 1) 0) which is
the correct semantics?

Thanks,
Tamar
> 
> Thanks,
> Richard
> 
> 
> >
> > Tamar.
> >
> >> I think the split here shows the difficulty with having a single
> >> optab and a comparison operator though.  operand 0 can be something
> like:
> >>
> >>   (eq x 1)
> >>
> >> but we're not comparing x for equality with 1.  We're testing whether
> >> bit 1 is zero.  This means that operand 0 can't be taken literally
> >> and can't be used directly in insn patterns.
> >>
> >> In an earlier review, I'd said:
> >>
> >>   For the TB instructions (and for other similar instructions that I've
> >>   seen on other architectures) it would be more useful to have a single-bit
> >>   test, with operand 4 specifying the bit position.  Arguably it might then
> >>   be better to have separate eq and ne optabs, to avoid the awkward
> >> doubling
> >>   of the operands (operand 1 contains operands 2 and 3).
> >>
> >> I think we should do that eq/ne split (sorry for not pushing harder
> >> for it before).
> >>
> >> Thanks,
> >> Richard
> >>
> >>
> >>
> >> > +{
> >> > +  rtx bitvalue = gen_reg_rtx (DImode);
> >> > +  rtx tmp = simplify_gen_subreg (DImode, operands[1], GET_MODE
> >> > +(operands[1]), 0);
> >> > +  emit_insn (gen_extzv (bitvalue, tmp, const1_rtx, operands[2]));
> >> > +  operands[2] = const0_rtx;
> >> > +  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]),
> >> bitvalue,
> >> > +                                        operands[2]);
> >> > +})
> >> > +
> >> > +(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
> >> > +  [(set (pc) (if_then_else
> >> > +             (EQL (zero_extract:GPI (match_operand:ALLI 0
> "register_operand"
> >> "r")
> >> > +                                    (const_int 1)
> >> > +                                    (match_operand 1
> >> > +
> >> > +"aarch64_simd_shift_imm_<ALLI:mode>" "n"))
> >> >                    (const_int 0))
> >> >              (label_ref (match_operand 2 "" ""))
> >> >              (pc)))
> >> > @@ -959,15 +976,15 @@ (define_insn "*tb<optab><mode>1"
> >> >        {
> >> >         if (get_attr_far_branch (insn) == 1)
> >> >           return aarch64_gen_far_branch (operands, 2, "Ltb",
> >> > -                                        "<inv_tb>\\t%<w>0, %1, ");
> >> > +                                        "<inv_tb>\\t%<ALLI:w>0,
> >> > + %1, ");
> >> >         else
> >> >           {
> >> >             operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL
> >> (operands[1]));
> >> > -           return "tst\t%<w>0, %1\;<bcond>\t%l2";
> >> > +           return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
> >> >           }
> >> >        }
> >> >      else
> >> > -      return "<tbz>\t%<w>0, %1, %l2";
> >> > +      return "<tbz>\t%<ALLI:w>0, %1, %l2";
> >> >    }
> >> >    [(set_attr "type" "branch")
> >> >     (set (attr "length")
> >> > diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> >> > b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> >> > new file mode 100644
> >> > index
> >> >
> >>
> 0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549c
> >> e1
> >> > a0cff6774463
> >> > --- /dev/null
> >> > +++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> >> > @@ -0,0 +1,95 @@
> >> > +/* { dg-do compile } */
> >> > +/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables
> >> > +-fno-asynchronous-unwind-tables" } */
> >> > +/* { dg-final { check-function-bodies "**" "" "" { target { le } }
> >> > +} } */
> >> > +
> >> > +#include <stdbool.h>
> >> > +
> >> > +void h(void);
> >> > +
> >> > +/*
> >> > +** g1:
> >> > +**     tbnz    x[0-9]+, #?0, .L([0-9]+)
> >> > +**     ret
> >> > +**     ...
> >> > +*/
> >> > +void g1(bool x)
> >> > +{
> >> > +  if (__builtin_expect (x, 0))
> >> > +    h ();
> >> > +}
> >> > +
> >> > +/*
> >> > +** g2:
> >> > +**     tbz     x[0-9]+, #?0, .L([0-9]+)
> >> > +**     b       h
> >> > +**     ...
> >> > +*/
> >> > +void g2(bool x)
> >> > +{
> >> > +  if (__builtin_expect (x, 1))
> >> > +    h ();
> >> > +}
> >> > +
> >> > +/*
> >> > +** g3_ge:
> >> > +**     tbnz    w[0-9]+, #?31, .L[0-9]+
> >> > +**     b       h
> >> > +**     ...
> >> > +*/
> >> > +void g3_ge(int x)
> >> > +{
> >> > +  if (__builtin_expect (x >= 0, 1))
> >> > +    h ();
> >> > +}
> >> > +
> >> > +/*
> >> > +** g3_gt:
> >> > +**     cmp     w[0-9]+, 0
> >> > +**     ble     .L[0-9]+
> >> > +**     b       h
> >> > +**     ...
> >> > +*/
> >> > +void g3_gt(int x)
> >> > +{
> >> > +  if (__builtin_expect (x > 0, 1))
> >> > +    h ();
> >> > +}
> >> > +
> >> > +/*
> >> > +** g3_lt:
> >> > +**     tbz     w[0-9]+, #?31, .L[0-9]+
> >> > +**     b       h
> >> > +**     ...
> >> > +*/
> >> > +void g3_lt(int x)
> >> > +{
> >> > +  if (__builtin_expect (x < 0, 1))
> >> > +    h ();
> >> > +}
> >> > +
> >> > +/*
> >> > +** g3_le:
> >> > +**     cmp     w[0-9]+, 0
> >> > +**     bgt     .L[0-9]+
> >> > +**     b       h
> >> > +**     ...
> >> > +*/
> >> > +void g3_le(int x)
> >> > +{
> >> > +  if (__builtin_expect (x <= 0, 1))
> >> > +    h ();
> >> > +}
> >> > +
> >> > +/*
> >> > +** g5:
> >> > +**     mov     w[0-9]+, 65279
> >> > +**     tst     w[0-9]+, w[0-9]+
> >> > +**     beq     .L[0-9]+
> >> > +**     b       h
> >> > +**     ...
> >> > +*/
> >> > +void g5(int x)
> >> > +{
> >> > +  if (__builtin_expect (x & 0xfeff, 1))
> >> > +    h ();
> >> > +}

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH]AArch64 Extend umov and sbfx patterns.
  2022-11-11 14:42     ` Tamar Christina
@ 2022-11-15 11:10       ` Richard Sandiford
  0 siblings, 0 replies; 33+ messages in thread
From: Richard Sandiford @ 2022-11-15 11:10 UTC (permalink / raw)
  To: Tamar Christina
  Cc: gcc-patches, nd, Richard Earnshaw, Marcus Shawcroft, Kyrylo Tkachov

Tamar Christina <Tamar.Christina@arm.com> writes:
> Hi,
>
>> > --- a/gcc/config/aarch64/aarch64-simd.md
>> > +++ b/gcc/config/aarch64/aarch64-simd.md
>> > @@ -4259,7 +4259,7 @@ (define_insn
>> "*aarch64_get_lane_zero_extend<GPI:mode><VDQV_L:mode>"
>> >  ;; Extracting lane zero is split into a simple move when it is
>> > between SIMD  ;; registers or a store.
>> >  (define_insn_and_split "aarch64_get_lane<mode>"
>> > -  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand"
>> > "=?r, w, Utv")
>> > +  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand"
>> > + "=r, w, Utv")
>> >  	(vec_select:<VEL>
>> >  	  (match_operand:VALL_F16_FULL 1 "register_operand" "w, w, w")
>> >  	  (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
>> 
>> Which testcase does this help with?  It didn't look like the new tests do any
>> vector stuff.
>> 
>
> Right, sorry about that, splitting up my patches resulted in this sneaking in from a different series.
> Moved now.
>
>> > -(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>"
>> > +(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<ALLX:mode>"
>> >    [(set (match_operand:GPI 0 "register_operand" "=r")
>> >  	(ANY_EXTEND:GPI
>> > -	 (ashift:SHORT (match_operand:SHORT 1 "register_operand" "r")
>> > +	 (ashift:ALLX (match_operand:ALLX 1 "register_operand" "r")
>> >  		       (match_operand 2 "const_int_operand" "n"))))]
>> > -  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
>> > +  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
>> 
>> It'd be better to avoid even defining si<-si or si<-di "extensions"
>> (even though nothing should try to match them), so how about adding:
>> 
>>   <GPI:sizen> > <ALLX:sizen> &&
>> 
>> or similar to the beginning of the condition?  The conditions for the invalid
>> combos will then be provably false at compile time and the patterns will be
>> compiled out.
>> 
>
> Done.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	* config/aarch64/aarch64.md
> 	(*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>): Renamed to...
> 	(*<ANY_EXTEND:optab><GPI:mode>_ashl<ALLX:mode>): ...this.
> 	(*zero_extend<GPI:mode>_lshr<SHORT:mode>): Renamed to...
> 	(*zero_extend<GPI:mode>_lshr<ALLX:mode>): ...this.
> 	(*extend<GPI:mode>_ashr<SHORT:mode>): Rename to...
> 	(*extend<GPI:mode>_ashr<ALLX:mode>): ...this.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/aarch64/bitmove_1.c: New test.
> 	* gcc.target/aarch64/bitmove_2.c: New test.
>
> --- inline copy of patch ---
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index d7684c93fba5b717d568e1a4fd712bde55c7c72e..d230bbb833f97813c8371aa07b587bd8b0292cee 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -5711,40 +5711,43 @@ (define_insn "*extrsi5_insn_di"
>    [(set_attr "type" "rotate_imm")]
>  )
>  
> -(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>"
> +(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<ALLX:mode>"
>    [(set (match_operand:GPI 0 "register_operand" "=r")
>  	(ANY_EXTEND:GPI
> -	 (ashift:SHORT (match_operand:SHORT 1 "register_operand" "r")
> +	 (ashift:ALLX (match_operand:ALLX 1 "register_operand" "r")
>  		       (match_operand 2 "const_int_operand" "n"))))]
> -  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
> +  "<GPI:sizen> > <ALLX:sizen>
> +   && UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
>  {
> -  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
> +  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
>    return "<su>bfiz\t%<GPI:w>0, %<GPI:w>1, %2, %3";
>  }
>    [(set_attr "type" "bfx")]
>  )
>  
> -(define_insn "*zero_extend<GPI:mode>_lshr<SHORT:mode>"
> +(define_insn "*zero_extend<GPI:mode>_lshr<ALLX:mode>"
>    [(set (match_operand:GPI 0 "register_operand" "=r")
>  	(zero_extend:GPI
> -	 (lshiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r")
> -			 (match_operand 2 "const_int_operand" "n"))))]
> -  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
> +	 (lshiftrt:ALLX (match_operand:ALLX 1 "register_operand" "r")
> +			(match_operand 2 "const_int_operand" "n"))))]
> +  "<GPI:sizen> > <ALLX:sizen>
> +   && UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
>  {
> -  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
> +  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
>    return "ubfx\t%<GPI:w>0, %<GPI:w>1, %2, %3";
>  }
>    [(set_attr "type" "bfx")]
>  )
>  
> -(define_insn "*extend<GPI:mode>_ashr<SHORT:mode>"
> +(define_insn "*extend<GPI:mode>_ashr<ALLX:mode>"
>    [(set (match_operand:GPI 0 "register_operand" "=r")
>  	(sign_extend:GPI
> -	 (ashiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r")
> -			 (match_operand 2 "const_int_operand" "n"))))]
> -  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
> +	 (ashiftrt:ALLX (match_operand:ALLX 1 "register_operand" "r")
> +			(match_operand 2 "const_int_operand" "n"))))]
> +  "<GPI:sizen> > <ALLX:sizen>
> +   && UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
>  {
> -  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
> +  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
>    return "sbfx\\t%<GPI:w>0, %<GPI:w>1, %2, %3";
>  }
>    [(set_attr "type" "bfx")]
> diff --git a/gcc/testsuite/gcc.target/aarch64/bitmove_1.c b/gcc/testsuite/gcc.target/aarch64/bitmove_1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..5ea4265f55213d7e7e5193a3a3681c9350867b50
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bitmove_1.c
> @@ -0,0 +1,76 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O3 -std=c99" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
> +
> +#include <stdint.h>
> +
> +/*
> +** sfoo6:
> +** 	asr	x0, x0, 16
> +** 	ret
> +*/
> +int64_t sfoo6 (int32_t x)
> +{
> +  return x >> 16;
> +}
> +
> +/*
> +** ufoo6:
> +** 	lsr	w0, w0, 30
> +** 	ret
> +*/
> +uint64_t ufoo6 (uint32_t x)
> +{
> +  return x >> 30;
> +}
> +
> +/*
> +** ufoo6s:
> +** 	ubfx	w0, w0, 7, 9
> +** 	ret
> +*/
> +uint32_t ufoo6s (uint16_t x)
> +{
> +  return x >> 7;
> +}
> +
> +/*
> +** ufoo6h:
> +** 	ubfx	w0, w0, 4, 4
> +** 	ret
> +*/
> +uint16_t ufoo6h (uint8_t x)
> +{
> +  return x >> 4;
> +}
> +
> +/*
> +** sfoo62:
> +** 	sbfx	x0, x0, 10, 22
> +** 	ret
> +*/
> +int64_t sfoo62 (int32_t x)
> +{
> +  return x >> 10;
> +}
> +
> +/*
> +** ufoo62:
> +** 	lsr	w0, w0, 10
> +** 	ret
> +*/
> +uint64_t ufoo62 (uint32_t x)
> +{
> +  return x >> 10;
> +}
> +
> +/*
> +** sfoo63:
> +** 	sbfx	x0, x0, 10, 22
> +** 	ret
> +*/
> +int64_t sfoo63 (int32_t x)
> +{
> +  return x >> 10;
> +}

This is the same as sfoo62, not sure if that's intentional.

> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/bitmove_2.c b/gcc/testsuite/gcc.target/aarch64/bitmove_2.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..329600cb3dbecf4cdfed994f6cfdf98ab77e8a01
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bitmove_2.c
> @@ -0,0 +1,76 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O3 -std=c99" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
> +
> +#include <stdint.h>
> +
> +/*
> +** sfoo6:
> +** 	sbfiz	x0, x0, 16, 16
> +** 	ret
> +*/
> +int64_t sfoo6 (int32_t x)
> +{
> +  return x << 16;
> +}
> +
> +/*
> +** ufoo6:
> +** 	lsl	w0, w0, 30
> +** 	ret
> +*/
> +uint64_t ufoo6 (uint32_t x)
> +{
> +  return x << 30;
> +}
> +
> +/*
> +** ufoo6s:
> +** 	ubfiz	w0, w0, 7, 16
> +** 	ret
> +*/
> +uint32_t ufoo6s (uint16_t x)
> +{
> +  return x << 7;
> +}
> +
> +/*
> +** ufoo6h:
> +** 	...
> +** 	ubfiz	w0, w0, 4, 12
> +** 	ret
> +*/
> +uint16_t ufoo6h (uint8_t x)
> +{
> +  return x << 4;
> +}

This looks odd without the ... filled in.  It raises the question why
the width is 12 bits when the original type was only 8.

> +
> +/*
> +** sfoo62:
> +** 	sbfiz	x0, x0, 10, 22
> +** 	ret
> +*/
> +int64_t sfoo62 (int32_t x)
> +{
> +  return x << 10;
> +}
> +
> +/*
> +** ufoo62:
> +** 	lsl	w0, w0, 10
> +** 	ret
> +*/
> +uint64_t ufoo62 (uint32_t x)
> +{
> +  return x << 10;
> +}
> +
> +/*
> +** sfoo63:
> +** 	sbfiz	x0, x0, 10, 22
> +** 	ret
> +*/
> +int64_t sfoo63 (int32_t x)
> +{
> +  return x << 10;
> +}

Similarly a dup of sfoo62.

OK with those things fixed, thanks.

Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/2]AArch64 Support new tbranch optab.
  2022-11-15 11:00           ` Tamar Christina
@ 2022-11-15 11:14             ` Richard Sandiford
  2022-11-15 11:23               ` Tamar Christina
  0 siblings, 1 reply; 33+ messages in thread
From: Richard Sandiford @ 2022-11-15 11:14 UTC (permalink / raw)
  To: Tamar Christina; +Cc: gcc-patches, Richard Earnshaw, nd, Marcus Shawcroft

Tamar Christina <Tamar.Christina@arm.com> writes:
>> -----Original Message-----
>> From: Richard Sandiford <richard.sandiford@arm.com>
>> Sent: Tuesday, November 15, 2022 10:51 AM
>> To: Tamar Christina <Tamar.Christina@arm.com>
>> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
>> <Marcus.Shawcroft@arm.com>
>> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> 
>> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> -----Original Message-----
>> >> From: Richard Sandiford <richard.sandiford@arm.com>
>> >> Sent: Tuesday, November 15, 2022 10:36 AM
>> >> To: Tamar Christina <Tamar.Christina@arm.com>
>> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
>> >> <Marcus.Shawcroft@arm.com>
>> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> >>
>> >> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> > Hello,
>> >> >
>> >> > Ping and updated patch.
>> >> >
>> >> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>> >> >
>> >> > Ok for master?
>> >> >
>> >> > Thanks,
>> >> > Tamar
>> >> >
>> >> > gcc/ChangeLog:
>> >> >
>> >> >         * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
>> >> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
>> >> >         (tbranch<mode>4): New.
>> >> >
>> >> > gcc/testsuite/ChangeLog:
>> >> >
>> >> >         * gcc.target/aarch64/tbz_1.c: New test.
>> >> >
>> >> > --- inline copy of patch ---
>> >> >
>> >> > diff --git a/gcc/config/aarch64/aarch64.md
>> >> > b/gcc/config/aarch64/aarch64.md index
>> >> >
>> >>
>> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
>> >> 71
>> >> > 2bde55c7c72e 100644
>> >> > --- a/gcc/config/aarch64/aarch64.md
>> >> > +++ b/gcc/config/aarch64/aarch64.md
>> >> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
>> >> >                       (const_int 1)))]
>> >> >  )
>> >> >
>> >> > -(define_insn "*tb<optab><mode>1"
>> >> > +(define_expand "tbranch<mode>4"
>> >> >    [(set (pc) (if_then_else
>> >> > -             (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand"
>> >> "r")
>> >> > -                                   (const_int 1)
>> >> > -                                   (match_operand 1
>> >> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
>> >> > +               (match_operator 0 "aarch64_comparison_operator"
>> >> > +                [(match_operand:ALLI 1 "register_operand")
>> >> > +                 (match_operand:ALLI 2
>> >> "aarch64_simd_shift_imm_<ALLI:mode>")])
>> >> > +               (label_ref (match_operand 3 "" ""))
>> >> > +               (pc)))]
>> >> > +  "optimize > 0"
>> >>
>> >> Why's the pattern conditional on optimize?  Seems a valid choice at -O0
>> too.
>> >>
>> >
>> > Hi,
>> >
>> > I had explained the reason why in the original patch, just didn't repeat it in
>> the ping:
>> >
>> > Instead of emitting the instruction directly I've chosen to expand the
>> > pattern using a zero extract and generating the existing pattern for
>> > comparisons for two
>> > reasons:
>> >
>> >   1. Allows for CSE of the actual comparison.
>> >   2. It looks like the code in expand makes the label as unused and removed
>> it
>> >      if it doesn't see a separate reference to it.
>> >
>> > Because of this expansion though I disable the pattern at -O0 since we
>> have no combine in that case so we'd end up with worse code.  I did try
>> emitting the pattern directly, but as mentioned in no#2 expand would then
>> kill the label.
>> >
>> > Basically I emit the pattern directly, immediately during expand the label is
>> marked as dead for some weird reason.
>> 
>> Isn't #2 a bug though?  It seems like something we should fix rather than
>> work around.
>
> Yes it's a bug ☹ ok if I'm going to fix that bug then do I need to split the optabs
> still? Isn't the problem atm that I need the split?  If I'm emitting the instruction
> directly then the recog pattern for it can just be (eq (vec_extract x 1) 0) which is
> the correct semantics?

What rtx does the code that uses the optab pass for operand 0?

Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH 2/2]AArch64 Support new tbranch optab.
  2022-11-15 11:14             ` Richard Sandiford
@ 2022-11-15 11:23               ` Tamar Christina
  2022-11-15 11:33                 ` Richard Sandiford
  0 siblings, 1 reply; 33+ messages in thread
From: Tamar Christina @ 2022-11-15 11:23 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: gcc-patches, Richard Earnshaw, nd, Marcus Shawcroft

> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Tuesday, November 15, 2022 11:15 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> <Marcus.Shawcroft@arm.com>
> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> 
> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> -----Original Message-----
> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> Sent: Tuesday, November 15, 2022 10:51 AM
> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> >> <Marcus.Shawcroft@arm.com>
> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >>
> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> -----Original Message-----
> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> >> Sent: Tuesday, November 15, 2022 10:36 AM
> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
> Shawcroft
> >> >> <Marcus.Shawcroft@arm.com>
> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >> >>
> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> > Hello,
> >> >> >
> >> >> > Ping and updated patch.
> >> >> >
> >> >> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >> >> >
> >> >> > Ok for master?
> >> >> >
> >> >> > Thanks,
> >> >> > Tamar
> >> >> >
> >> >> > gcc/ChangeLog:
> >> >> >
> >> >> >         * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename
> to...
> >> >> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
> >> >> >         (tbranch<mode>4): New.
> >> >> >
> >> >> > gcc/testsuite/ChangeLog:
> >> >> >
> >> >> >         * gcc.target/aarch64/tbz_1.c: New test.
> >> >> >
> >> >> > --- inline copy of patch ---
> >> >> >
> >> >> > diff --git a/gcc/config/aarch64/aarch64.md
> >> >> > b/gcc/config/aarch64/aarch64.md index
> >> >> >
> >> >>
> >>
> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
> >> >> 71
> >> >> > 2bde55c7c72e 100644
> >> >> > --- a/gcc/config/aarch64/aarch64.md
> >> >> > +++ b/gcc/config/aarch64/aarch64.md
> >> >> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
> >> >> >                       (const_int 1)))]
> >> >> >  )
> >> >> >
> >> >> > -(define_insn "*tb<optab><mode>1"
> >> >> > +(define_expand "tbranch<mode>4"
> >> >> >    [(set (pc) (if_then_else
> >> >> > -             (EQL (zero_extract:DI (match_operand:GPI 0
> "register_operand"
> >> >> "r")
> >> >> > -                                   (const_int 1)
> >> >> > -                                   (match_operand 1
> >> >> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
> >> >> > +               (match_operator 0 "aarch64_comparison_operator"
> >> >> > +                [(match_operand:ALLI 1 "register_operand")
> >> >> > +                 (match_operand:ALLI 2
> >> >> "aarch64_simd_shift_imm_<ALLI:mode>")])
> >> >> > +               (label_ref (match_operand 3 "" ""))
> >> >> > +               (pc)))]
> >> >> > +  "optimize > 0"
> >> >>
> >> >> Why's the pattern conditional on optimize?  Seems a valid choice
> >> >> at -O0
> >> too.
> >> >>
> >> >
> >> > Hi,
> >> >
> >> > I had explained the reason why in the original patch, just didn't
> >> > repeat it in
> >> the ping:
> >> >
> >> > Instead of emitting the instruction directly I've chosen to expand
> >> > the pattern using a zero extract and generating the existing
> >> > pattern for comparisons for two
> >> > reasons:
> >> >
> >> >   1. Allows for CSE of the actual comparison.
> >> >   2. It looks like the code in expand makes the label as unused and
> >> > removed
> >> it
> >> >      if it doesn't see a separate reference to it.
> >> >
> >> > Because of this expansion though I disable the pattern at -O0 since
> >> > we
> >> have no combine in that case so we'd end up with worse code.  I did
> >> try emitting the pattern directly, but as mentioned in no#2 expand
> >> would then kill the label.
> >> >
> >> > Basically I emit the pattern directly, immediately during expand
> >> > the label is
> >> marked as dead for some weird reason.
> >>
> >> Isn't #2 a bug though?  It seems like something we should fix rather
> >> than work around.
> >
> > Yes it's a bug ☹ ok if I'm going to fix that bug then do I need to
> > split the optabs still? Isn't the problem atm that I need the split?
> > If I'm emitting the instruction directly then the recog pattern for it
> > can just be (eq (vec_extract x 1) 0) which is the correct semantics?
> 
> What rtx does the code that uses the optab pass for operand 0?

It gets passed the full comparison:

(eq (reg/v:SI 92 [ x ])
    (const_int 0 [0]))

of which we only look at the operator.

Tamar.

> 
> Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/2]AArch64 Support new tbranch optab.
  2022-11-15 11:23               ` Tamar Christina
@ 2022-11-15 11:33                 ` Richard Sandiford
  2022-11-15 11:39                   ` Tamar Christina
  2022-11-22 13:48                   ` Tamar Christina
  0 siblings, 2 replies; 33+ messages in thread
From: Richard Sandiford @ 2022-11-15 11:33 UTC (permalink / raw)
  To: Tamar Christina; +Cc: gcc-patches, Richard Earnshaw, nd, Marcus Shawcroft

Tamar Christina <Tamar.Christina@arm.com> writes:
>> -----Original Message-----
>> From: Richard Sandiford <richard.sandiford@arm.com>
>> Sent: Tuesday, November 15, 2022 11:15 AM
>> To: Tamar Christina <Tamar.Christina@arm.com>
>> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
>> <Marcus.Shawcroft@arm.com>
>> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> 
>> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> -----Original Message-----
>> >> From: Richard Sandiford <richard.sandiford@arm.com>
>> >> Sent: Tuesday, November 15, 2022 10:51 AM
>> >> To: Tamar Christina <Tamar.Christina@arm.com>
>> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
>> >> <Marcus.Shawcroft@arm.com>
>> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> >>
>> >> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> >> -----Original Message-----
>> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
>> >> >> Sent: Tuesday, November 15, 2022 10:36 AM
>> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
>> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
>> Shawcroft
>> >> >> <Marcus.Shawcroft@arm.com>
>> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> >> >>
>> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> >> > Hello,
>> >> >> >
>> >> >> > Ping and updated patch.
>> >> >> >
>> >> >> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>> >> >> >
>> >> >> > Ok for master?
>> >> >> >
>> >> >> > Thanks,
>> >> >> > Tamar
>> >> >> >
>> >> >> > gcc/ChangeLog:
>> >> >> >
>> >> >> >         * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename
>> to...
>> >> >> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
>> >> >> >         (tbranch<mode>4): New.
>> >> >> >
>> >> >> > gcc/testsuite/ChangeLog:
>> >> >> >
>> >> >> >         * gcc.target/aarch64/tbz_1.c: New test.
>> >> >> >
>> >> >> > --- inline copy of patch ---
>> >> >> >
>> >> >> > diff --git a/gcc/config/aarch64/aarch64.md
>> >> >> > b/gcc/config/aarch64/aarch64.md index
>> >> >> >
>> >> >>
>> >>
>> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
>> >> >> 71
>> >> >> > 2bde55c7c72e 100644
>> >> >> > --- a/gcc/config/aarch64/aarch64.md
>> >> >> > +++ b/gcc/config/aarch64/aarch64.md
>> >> >> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
>> >> >> >                       (const_int 1)))]
>> >> >> >  )
>> >> >> >
>> >> >> > -(define_insn "*tb<optab><mode>1"
>> >> >> > +(define_expand "tbranch<mode>4"
>> >> >> >    [(set (pc) (if_then_else
>> >> >> > -             (EQL (zero_extract:DI (match_operand:GPI 0
>> "register_operand"
>> >> >> "r")
>> >> >> > -                                   (const_int 1)
>> >> >> > -                                   (match_operand 1
>> >> >> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
>> >> >> > +               (match_operator 0 "aarch64_comparison_operator"
>> >> >> > +                [(match_operand:ALLI 1 "register_operand")
>> >> >> > +                 (match_operand:ALLI 2
>> >> >> "aarch64_simd_shift_imm_<ALLI:mode>")])
>> >> >> > +               (label_ref (match_operand 3 "" ""))
>> >> >> > +               (pc)))]
>> >> >> > +  "optimize > 0"
>> >> >>
>> >> >> Why's the pattern conditional on optimize?  Seems a valid choice
>> >> >> at -O0
>> >> too.
>> >> >>
>> >> >
>> >> > Hi,
>> >> >
>> >> > I had explained the reason why in the original patch, just didn't
>> >> > repeat it in
>> >> the ping:
>> >> >
>> >> > Instead of emitting the instruction directly I've chosen to expand
>> >> > the pattern using a zero extract and generating the existing
>> >> > pattern for comparisons for two
>> >> > reasons:
>> >> >
>> >> >   1. Allows for CSE of the actual comparison.
>> >> >   2. It looks like the code in expand makes the label as unused and
>> >> > removed
>> >> it
>> >> >      if it doesn't see a separate reference to it.
>> >> >
>> >> > Because of this expansion though I disable the pattern at -O0 since
>> >> > we
>> >> have no combine in that case so we'd end up with worse code.  I did
>> >> try emitting the pattern directly, but as mentioned in no#2 expand
>> >> would then kill the label.
>> >> >
>> >> > Basically I emit the pattern directly, immediately during expand
>> >> > the label is
>> >> marked as dead for some weird reason.
>> >>
>> >> Isn't #2 a bug though?  It seems like something we should fix rather
>> >> than work around.
>> >
>> > Yes it's a bug ☹ ok if I'm going to fix that bug then do I need to
>> > split the optabs still? Isn't the problem atm that I need the split?
>> > If I'm emitting the instruction directly then the recog pattern for it
>> > can just be (eq (vec_extract x 1) 0) which is the correct semantics?
>> 
>> What rtx does the code that uses the optab pass for operand 0?
>
> It gets passed the full comparison:
>
> (eq (reg/v:SI 92 [ x ])
>     (const_int 0 [0]))
>
> of which we only look at the operator.

OK, that's what I thought.  The problem is then the one I mentioned above.
This rtx doesn't describe the operation that the optab is supposed to
perform, so it can never be used in the instruction pattern.  (This is
different from something like cbranch, where operand 0 can be used directly
if the target supports a very general compare-and-branch instruction.)

If we want to use a single optab, the code that generates the optab should
pass something like:

  (eq/ne (zero_extract op0 (const_int 1) op1) (const_int 0))

as operand 0, so that operand 0 specifies the real test condition.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH 2/2]AArch64 Support new tbranch optab.
  2022-11-15 11:33                 ` Richard Sandiford
@ 2022-11-15 11:39                   ` Tamar Christina
  2022-11-22 13:48                   ` Tamar Christina
  1 sibling, 0 replies; 33+ messages in thread
From: Tamar Christina @ 2022-11-15 11:39 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: gcc-patches, Richard Earnshaw, nd, Marcus Shawcroft

> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Tuesday, November 15, 2022 11:34 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> <Marcus.Shawcroft@arm.com>
> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> 
> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> -----Original Message-----
> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> Sent: Tuesday, November 15, 2022 11:15 AM
> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> >> <Marcus.Shawcroft@arm.com>
> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >>
> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> -----Original Message-----
> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> >> Sent: Tuesday, November 15, 2022 10:51 AM
> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
> Shawcroft
> >> >> <Marcus.Shawcroft@arm.com>
> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >> >>
> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> >> -----Original Message-----
> >> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> >> >> Sent: Tuesday, November 15, 2022 10:36 AM
> >> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
> >> Shawcroft
> >> >> >> <Marcus.Shawcroft@arm.com>
> >> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >> >> >>
> >> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> >> > Hello,
> >> >> >> >
> >> >> >> > Ping and updated patch.
> >> >> >> >
> >> >> >> > Bootstrapped Regtested on aarch64-none-linux-gnu and no
> issues.
> >> >> >> >
> >> >> >> > Ok for master?
> >> >> >> >
> >> >> >> > Thanks,
> >> >> >> > Tamar
> >> >> >> >
> >> >> >> > gcc/ChangeLog:
> >> >> >> >
> >> >> >> >         * config/aarch64/aarch64.md (*tb<optab><mode>1):
> >> >> >> > Rename
> >> to...
> >> >> >> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
> >> >> >> >         (tbranch<mode>4): New.
> >> >> >> >
> >> >> >> > gcc/testsuite/ChangeLog:
> >> >> >> >
> >> >> >> >         * gcc.target/aarch64/tbz_1.c: New test.
> >> >> >> >
> >> >> >> > --- inline copy of patch ---
> >> >> >> >
> >> >> >> > diff --git a/gcc/config/aarch64/aarch64.md
> >> >> >> > b/gcc/config/aarch64/aarch64.md index
> >> >> >> >
> >> >> >>
> >> >>
> >>
> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
> >> >> >> 71
> >> >> >> > 2bde55c7c72e 100644
> >> >> >> > --- a/gcc/config/aarch64/aarch64.md
> >> >> >> > +++ b/gcc/config/aarch64/aarch64.md
> >> >> >> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
> >> >> >> >                       (const_int 1)))]
> >> >> >> >  )
> >> >> >> >
> >> >> >> > -(define_insn "*tb<optab><mode>1"
> >> >> >> > +(define_expand "tbranch<mode>4"
> >> >> >> >    [(set (pc) (if_then_else
> >> >> >> > -             (EQL (zero_extract:DI (match_operand:GPI 0
> >> "register_operand"
> >> >> >> "r")
> >> >> >> > -                                   (const_int 1)
> >> >> >> > -                                   (match_operand 1
> >> >> >> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
> >> >> >> > +               (match_operator 0 "aarch64_comparison_operator"
> >> >> >> > +                [(match_operand:ALLI 1 "register_operand")
> >> >> >> > +                 (match_operand:ALLI 2
> >> >> >> "aarch64_simd_shift_imm_<ALLI:mode>")])
> >> >> >> > +               (label_ref (match_operand 3 "" ""))
> >> >> >> > +               (pc)))]
> >> >> >> > +  "optimize > 0"
> >> >> >>
> >> >> >> Why's the pattern conditional on optimize?  Seems a valid
> >> >> >> choice at -O0
> >> >> too.
> >> >> >>
> >> >> >
> >> >> > Hi,
> >> >> >
> >> >> > I had explained the reason why in the original patch, just
> >> >> > didn't repeat it in
> >> >> the ping:
> >> >> >
> >> >> > Instead of emitting the instruction directly I've chosen to
> >> >> > expand the pattern using a zero extract and generating the
> >> >> > existing pattern for comparisons for two
> >> >> > reasons:
> >> >> >
> >> >> >   1. Allows for CSE of the actual comparison.
> >> >> >   2. It looks like the code in expand makes the label as unused
> >> >> > and removed
> >> >> it
> >> >> >      if it doesn't see a separate reference to it.
> >> >> >
> >> >> > Because of this expansion though I disable the pattern at -O0
> >> >> > since we
> >> >> have no combine in that case so we'd end up with worse code.  I
> >> >> did try emitting the pattern directly, but as mentioned in no#2
> >> >> expand would then kill the label.
> >> >> >
> >> >> > Basically I emit the pattern directly, immediately during expand
> >> >> > the label is
> >> >> marked as dead for some weird reason.
> >> >>
> >> >> Isn't #2 a bug though?  It seems like something we should fix
> >> >> rather than work around.
> >> >
> >> > Yes it's a bug ☹ ok if I'm going to fix that bug then do I need to
> >> > split the optabs still? Isn't the problem atm that I need the split?
> >> > If I'm emitting the instruction directly then the recog pattern for
> >> > it can just be (eq (vec_extract x 1) 0) which is the correct semantics?
> >>
> >> What rtx does the code that uses the optab pass for operand 0?
> >
> > It gets passed the full comparison:
> >
> > (eq (reg/v:SI 92 [ x ])
> >     (const_int 0 [0]))
> >
> > of which we only look at the operator.
> 
> OK, that's what I thought.  The problem is then the one I mentioned above.
> This rtx doesn't describe the operation that the optab is supposed to
> perform, so it can never be used in the instruction pattern.  (This is different
> from something like cbranch, where operand 0 can be used directly if the
> target supports a very general compare-and-branch instruction.)
> 
> If we want to use a single optab, the code that generates the optab should
> pass something like:
> 
>   (eq/ne (zero_extract op0 (const_int 1) op1) (const_int 0))
> 
> as operand 0, so that operand 0 specifies the real test condition.

Ok, I guess you're worried about the generic case as another target could
could use operand0 as is rather than looking at the operator only like we do.

I think I rather change the RTX expression, as I do so anyway to add the pos.
This way I avoid another back and forth about the generic optab in the mid-end..

So I'll change the RTX, thanks!

> 
> Thanks,
> Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH 2/2]AArch64 Support new tbranch optab.
  2022-11-15 11:33                 ` Richard Sandiford
  2022-11-15 11:39                   ` Tamar Christina
@ 2022-11-22 13:48                   ` Tamar Christina
  2022-11-22 14:00                     ` Richard Sandiford
  1 sibling, 1 reply; 33+ messages in thread
From: Tamar Christina @ 2022-11-22 13:48 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: gcc-patches, Richard Earnshaw, nd, Marcus Shawcroft

> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Tuesday, November 15, 2022 11:34 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> <Marcus.Shawcroft@arm.com>
> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> 
> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> -----Original Message-----
> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> Sent: Tuesday, November 15, 2022 11:15 AM
> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> >> <Marcus.Shawcroft@arm.com>
> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >>
> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> -----Original Message-----
> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> >> Sent: Tuesday, November 15, 2022 10:51 AM
> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
> Shawcroft
> >> >> <Marcus.Shawcroft@arm.com>
> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >> >>
> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> >> -----Original Message-----
> >> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> >> >> Sent: Tuesday, November 15, 2022 10:36 AM
> >> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
> >> Shawcroft
> >> >> >> <Marcus.Shawcroft@arm.com>
> >> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >> >> >>
> >> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> >> > Hello,
> >> >> >> >
> >> >> >> > Ping and updated patch.
> >> >> >> >
> >> >> >> > Bootstrapped Regtested on aarch64-none-linux-gnu and no
> issues.
> >> >> >> >
> >> >> >> > Ok for master?
> >> >> >> >
> >> >> >> > Thanks,
> >> >> >> > Tamar
> >> >> >> >
> >> >> >> > gcc/ChangeLog:
> >> >> >> >
> >> >> >> >         * config/aarch64/aarch64.md (*tb<optab><mode>1):
> >> >> >> > Rename
> >> to...
> >> >> >> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
> >> >> >> >         (tbranch<mode>4): New.
> >> >> >> >
> >> >> >> > gcc/testsuite/ChangeLog:
> >> >> >> >
> >> >> >> >         * gcc.target/aarch64/tbz_1.c: New test.
> >> >> >> >
> >> >> >> > --- inline copy of patch ---
> >> >> >> >
> >> >> >> > diff --git a/gcc/config/aarch64/aarch64.md
> >> >> >> > b/gcc/config/aarch64/aarch64.md index
> >> >> >> >
> >> >> >>
> >> >>
> >>
> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
> >> >> >> 71
> >> >> >> > 2bde55c7c72e 100644
> >> >> >> > --- a/gcc/config/aarch64/aarch64.md
> >> >> >> > +++ b/gcc/config/aarch64/aarch64.md
> >> >> >> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
> >> >> >> >                       (const_int 1)))]
> >> >> >> >  )
> >> >> >> >
> >> >> >> > -(define_insn "*tb<optab><mode>1"
> >> >> >> > +(define_expand "tbranch<mode>4"
> >> >> >> >    [(set (pc) (if_then_else
> >> >> >> > -             (EQL (zero_extract:DI (match_operand:GPI 0
> >> "register_operand"
> >> >> >> "r")
> >> >> >> > -                                   (const_int 1)
> >> >> >> > -                                   (match_operand 1
> >> >> >> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
> >> >> >> > +               (match_operator 0 "aarch64_comparison_operator"
> >> >> >> > +                [(match_operand:ALLI 1 "register_operand")
> >> >> >> > +                 (match_operand:ALLI 2
> >> >> >> "aarch64_simd_shift_imm_<ALLI:mode>")])
> >> >> >> > +               (label_ref (match_operand 3 "" ""))
> >> >> >> > +               (pc)))]
> >> >> >> > +  "optimize > 0"
> >> >> >>
> >> >> >> Why's the pattern conditional on optimize?  Seems a valid
> >> >> >> choice at -O0
> >> >> too.
> >> >> >>
> >> >> >
> >> >> > Hi,
> >> >> >
> >> >> > I had explained the reason why in the original patch, just
> >> >> > didn't repeat it in
> >> >> the ping:
> >> >> >
> >> >> > Instead of emitting the instruction directly I've chosen to
> >> >> > expand the pattern using a zero extract and generating the
> >> >> > existing pattern for comparisons for two
> >> >> > reasons:
> >> >> >
> >> >> >   1. Allows for CSE of the actual comparison.
> >> >> >   2. It looks like the code in expand makes the label as unused
> >> >> > and removed
> >> >> it
> >> >> >      if it doesn't see a separate reference to it.
> >> >> >
> >> >> > Because of this expansion though I disable the pattern at -O0
> >> >> > since we
> >> >> have no combine in that case so we'd end up with worse code.  I
> >> >> did try emitting the pattern directly, but as mentioned in no#2
> >> >> expand would then kill the label.
> >> >> >
> >> >> > Basically I emit the pattern directly, immediately during expand
> >> >> > the label is
> >> >> marked as dead for some weird reason.
> >> >>
> >> >> Isn't #2 a bug though?  It seems like something we should fix
> >> >> rather than work around.
> >> >
> >> > Yes it's a bug ☹ ok if I'm going to fix that bug then do I need to
> >> > split the optabs still? Isn't the problem atm that I need the split?
> >> > If I'm emitting the instruction directly then the recog pattern for
> >> > it can just be (eq (vec_extract x 1) 0) which is the correct semantics?
> >>
> >> What rtx does the code that uses the optab pass for operand 0?
> >
> > It gets passed the full comparison:
> >
> > (eq (reg/v:SI 92 [ x ])
> >     (const_int 0 [0]))
> >
> > of which we only look at the operator.
> 
> OK, that's what I thought.  The problem is then the one I mentioned above.
> This rtx doesn't describe the operation that the optab is supposed to
> perform, so it can never be used in the instruction pattern.  (This is different
> from something like cbranch, where operand 0 can be used directly if the
> target supports a very general compare-and-branch instruction.)

So I was wrong before about which RTL it gets passed.  Deep in the expansion
Code the rtl operation 

(eq (reg/v:SI 92 [ x ])
      (const_int 0 [0]))

Gets broken up and passed piecewise.

First thing it does it explicitly check that the first argument in RTL is an operator:

gcc_assert (insn_operand_matches (icode, 0, test));

and then the jump is emitted by breaking apart the rtl into it's operands:

4646      insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
4647                          XEXP (test, 1), label));

And so the operands are:

>>> p debug (operand0)
(reg/v:SI 92 [ xD.4391 ])

>>> p debug (operand1)
(const_int 0 [0])

>>> p debug (operand2)
(code_label 0 0 0 2 (nil) [0 uses])

And targets never get to see the equality check.  If the documentation of the optab is
Updated to say that the target operand1 is to be used in a zero_extract with operand0
and compared with 0 then that should be fine no?  that's the semantic of the optab itself.

Based on that I don't think we need to split this optab do we?  Just update the docs to
clarify the zero extract semantics?

Thanks,
Tamar

> 
> If we want to use a single optab, the code that generates the optab should
> pass something like:
> 
>   (eq/ne (zero_extract op0 (const_int 1) op1) (const_int 0))
> 
> as operand 0, so that operand 0 specifies the real test condition.
> 
> Thanks,
> Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/2]AArch64 Support new tbranch optab.
  2022-11-22 13:48                   ` Tamar Christina
@ 2022-11-22 14:00                     ` Richard Sandiford
  2022-11-24 12:18                       ` Tamar Christina
  0 siblings, 1 reply; 33+ messages in thread
From: Richard Sandiford @ 2022-11-22 14:00 UTC (permalink / raw)
  To: Tamar Christina; +Cc: gcc-patches, Richard Earnshaw, nd, Marcus Shawcroft

Tamar Christina <Tamar.Christina@arm.com> writes:
>> -----Original Message-----
>> From: Richard Sandiford <richard.sandiford@arm.com>
>> Sent: Tuesday, November 15, 2022 11:34 AM
>> To: Tamar Christina <Tamar.Christina@arm.com>
>> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
>> <Marcus.Shawcroft@arm.com>
>> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> 
>> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> -----Original Message-----
>> >> From: Richard Sandiford <richard.sandiford@arm.com>
>> >> Sent: Tuesday, November 15, 2022 11:15 AM
>> >> To: Tamar Christina <Tamar.Christina@arm.com>
>> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
>> >> <Marcus.Shawcroft@arm.com>
>> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> >>
>> >> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> >> -----Original Message-----
>> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
>> >> >> Sent: Tuesday, November 15, 2022 10:51 AM
>> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
>> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
>> Shawcroft
>> >> >> <Marcus.Shawcroft@arm.com>
>> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> >> >>
>> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> >> >> -----Original Message-----
>> >> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
>> >> >> >> Sent: Tuesday, November 15, 2022 10:36 AM
>> >> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
>> >> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> >> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
>> >> Shawcroft
>> >> >> >> <Marcus.Shawcroft@arm.com>
>> >> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> >> >> >>
>> >> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> >> >> > Hello,
>> >> >> >> >
>> >> >> >> > Ping and updated patch.
>> >> >> >> >
>> >> >> >> > Bootstrapped Regtested on aarch64-none-linux-gnu and no
>> issues.
>> >> >> >> >
>> >> >> >> > Ok for master?
>> >> >> >> >
>> >> >> >> > Thanks,
>> >> >> >> > Tamar
>> >> >> >> >
>> >> >> >> > gcc/ChangeLog:
>> >> >> >> >
>> >> >> >> >         * config/aarch64/aarch64.md (*tb<optab><mode>1):
>> >> >> >> > Rename
>> >> to...
>> >> >> >> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
>> >> >> >> >         (tbranch<mode>4): New.
>> >> >> >> >
>> >> >> >> > gcc/testsuite/ChangeLog:
>> >> >> >> >
>> >> >> >> >         * gcc.target/aarch64/tbz_1.c: New test.
>> >> >> >> >
>> >> >> >> > --- inline copy of patch ---
>> >> >> >> >
>> >> >> >> > diff --git a/gcc/config/aarch64/aarch64.md
>> >> >> >> > b/gcc/config/aarch64/aarch64.md index
>> >> >> >> >
>> >> >> >>
>> >> >>
>> >>
>> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
>> >> >> >> 71
>> >> >> >> > 2bde55c7c72e 100644
>> >> >> >> > --- a/gcc/config/aarch64/aarch64.md
>> >> >> >> > +++ b/gcc/config/aarch64/aarch64.md
>> >> >> >> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
>> >> >> >> >                       (const_int 1)))]
>> >> >> >> >  )
>> >> >> >> >
>> >> >> >> > -(define_insn "*tb<optab><mode>1"
>> >> >> >> > +(define_expand "tbranch<mode>4"
>> >> >> >> >    [(set (pc) (if_then_else
>> >> >> >> > -             (EQL (zero_extract:DI (match_operand:GPI 0
>> >> "register_operand"
>> >> >> >> "r")
>> >> >> >> > -                                   (const_int 1)
>> >> >> >> > -                                   (match_operand 1
>> >> >> >> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
>> >> >> >> > +               (match_operator 0 "aarch64_comparison_operator"
>> >> >> >> > +                [(match_operand:ALLI 1 "register_operand")
>> >> >> >> > +                 (match_operand:ALLI 2
>> >> >> >> "aarch64_simd_shift_imm_<ALLI:mode>")])
>> >> >> >> > +               (label_ref (match_operand 3 "" ""))
>> >> >> >> > +               (pc)))]
>> >> >> >> > +  "optimize > 0"
>> >> >> >>
>> >> >> >> Why's the pattern conditional on optimize?  Seems a valid
>> >> >> >> choice at -O0
>> >> >> too.
>> >> >> >>
>> >> >> >
>> >> >> > Hi,
>> >> >> >
>> >> >> > I had explained the reason why in the original patch, just
>> >> >> > didn't repeat it in
>> >> >> the ping:
>> >> >> >
>> >> >> > Instead of emitting the instruction directly I've chosen to
>> >> >> > expand the pattern using a zero extract and generating the
>> >> >> > existing pattern for comparisons for two
>> >> >> > reasons:
>> >> >> >
>> >> >> >   1. Allows for CSE of the actual comparison.
>> >> >> >   2. It looks like the code in expand makes the label as unused
>> >> >> > and removed
>> >> >> it
>> >> >> >      if it doesn't see a separate reference to it.
>> >> >> >
>> >> >> > Because of this expansion though I disable the pattern at -O0
>> >> >> > since we
>> >> >> have no combine in that case so we'd end up with worse code.  I
>> >> >> did try emitting the pattern directly, but as mentioned in no#2
>> >> >> expand would then kill the label.
>> >> >> >
>> >> >> > Basically I emit the pattern directly, immediately during expand
>> >> >> > the label is
>> >> >> marked as dead for some weird reason.
>> >> >>
>> >> >> Isn't #2 a bug though?  It seems like something we should fix
>> >> >> rather than work around.
>> >> >
>> >> > Yes it's a bug ☹ ok if I'm going to fix that bug then do I need to
>> >> > split the optabs still? Isn't the problem atm that I need the split?
>> >> > If I'm emitting the instruction directly then the recog pattern for
>> >> > it can just be (eq (vec_extract x 1) 0) which is the correct semantics?
>> >>
>> >> What rtx does the code that uses the optab pass for operand 0?
>> >
>> > It gets passed the full comparison:
>> >
>> > (eq (reg/v:SI 92 [ x ])
>> >     (const_int 0 [0]))
>> >
>> > of which we only look at the operator.
>> 
>> OK, that's what I thought.  The problem is then the one I mentioned above.
>> This rtx doesn't describe the operation that the optab is supposed to
>> perform, so it can never be used in the instruction pattern.  (This is different
>> from something like cbranch, where operand 0 can be used directly if the
>> target supports a very general compare-and-branch instruction.)
>
> So I was wrong before about which RTL it gets passed.  Deep in the expansion
> Code the rtl operation 
>
> (eq (reg/v:SI 92 [ x ])
>       (const_int 0 [0]))
>
> Gets broken up and passed piecewise.
>
> First thing it does it explicitly check that the first argument in RTL is an operator:
>
> gcc_assert (insn_operand_matches (icode, 0, test));
>
> and then the jump is emitted by breaking apart the rtl into it's operands:
>
> 4646      insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
> 4647                          XEXP (test, 1), label));

Yeah, but the question was what the code that generates the tbranch
optab passes for operand 0 ("test" in the call above).  And like you
said, it's the EQ rtx above, with XEXPs 0 and 1 being passed as operands
1 and 2.  I think the point still stands that that EQ rtx doesn't
describe the correct operation.

> And so the operands are:
>
>>>> p debug (operand0)
> (reg/v:SI 92 [ xD.4391 ])
>
>>>> p debug (operand1)
> (const_int 0 [0])
>
>>>> p debug (operand2)
> (code_label 0 0 0 2 (nil) [0 uses])
>
> And targets never get to see the equality check.

But the .md pattern was:

(define_expand "tbranch<mode>4"
  [(set (pc) (if_then_else
		(match_operator 0 "aarch64_comparison_operator"
		 [(match_operand:ALLI 1 "register_operand")
		  (match_operand:ALLI 2 "aarch64_simd_shift_imm_<ALLI:mode>")])
		(label_ref (match_operand 3 "" ""))
		(pc)))]
  "optimize > 0"
{
  rtx bitvalue = gen_reg_rtx (DImode);
  rtx tmp = simplify_gen_subreg (DImode, operands[1], GET_MODE (operands[1]), 0);
  emit_insn (gen_extzv (bitvalue, tmp, const1_rtx, operands[2]));
  operands[2] = const0_rtx;
  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), bitvalue,
					 operands[2]);
})

where the EQ/NE rtx is passed and matched as operand 0.

> If the documentation of the optab is
> Updated to say that the target operand1 is to be used in a zero_extract with operand0
> and compared with 0 then that should be fine no?  that's the semantic of the optab itself.
>
> Based on that I don't think we need to split this optab do we?  Just update the docs to
> clarify the zero extract semantics?

Well, the point of...

>> If we want to use a single optab, the code that generates the optab should
>> pass something like:
>> 
>>   (eq/ne (zero_extract op0 (const_int 1) op1) (const_int 0))
>> 
>> as operand 0, so that operand 0 specifies the real test condition.
>> 
>> Thanks,
>> Richard

...was that we should either (a) split the optab or (b) keep the single
optab and pass a "proper" description of the operation as operand 0.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH 2/2]AArch64 Support new tbranch optab.
  2022-11-22 14:00                     ` Richard Sandiford
@ 2022-11-24 12:18                       ` Tamar Christina
  2022-12-01 16:44                         ` Tamar Christina
  0 siblings, 1 reply; 33+ messages in thread
From: Tamar Christina @ 2022-11-24 12:18 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: gcc-patches, Richard Earnshaw, nd, Marcus Shawcroft

Hi,

I had a question and I figured I'd easier to ask before I spend more time implementing it 😊

I had noticed that one of the other reasons that cbranch and the other optabs like cmov explicitly
emit the compare separately and use combine to match up the final form is for ifcvt.

In particular by expanding tbranch directly to the final RTL we lose some ifcvt because there are
no patterns that can handle the new zero_extract idiom.

So the three solutions I can think of are:

1. Don't expand tbranch to its final form immediately, but still use zero_extract.  This regresses -O0. (but do we care?)
2. Expand tbranch with vec_extract and provide new zero_extract based rtl sequences for ifcvt.
     I currently tried this, and while it works, I don't fully trust the RTL.  In particular unlike say, combine
     Ifcvt doesn't allow me to add an extra clobber to say that CC Is clobbered by the pattern.  Now tbranch
     Itself also expands a clobber, so the RTL isn't wrong even after ifcvt, but I'm worried that the pattern can
     Be idiom recognized and then no clobber could be present.  I could modify the recog code in ifcvt to try to
     Ignore clobbers during matching.
3.  I could expand using AND instead of zero_extract instead.   We have more patterns handling AND, but I'm not
     Sure If this will fix the problem entirely, but in principle could expand to what ANDS generates and recog that instead.
    This shouldn't regress -O0 as we wouldn't put a zero_extract explicitly in RTL (and we already have a pattern for ANDS).

What do you think? I personally favor 3.. 

Thanks,
Tamar

> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Tuesday, November 22, 2022 2:00 PM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> <Marcus.Shawcroft@arm.com>
> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> 
> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> -----Original Message-----
> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> Sent: Tuesday, November 15, 2022 11:34 AM
> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> >> <Marcus.Shawcroft@arm.com>
> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >>
> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> -----Original Message-----
> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> >> Sent: Tuesday, November 15, 2022 11:15 AM
> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
> Shawcroft
> >> >> <Marcus.Shawcroft@arm.com>
> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >> >>
> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> >> -----Original Message-----
> >> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> >> >> Sent: Tuesday, November 15, 2022 10:51 AM
> >> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
> >> Shawcroft
> >> >> >> <Marcus.Shawcroft@arm.com>
> >> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >> >> >>
> >> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> >> >> -----Original Message-----
> >> >> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> >> >> >> Sent: Tuesday, November 15, 2022 10:36 AM
> >> >> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> >> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> >> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
> >> >> Shawcroft
> >> >> >> >> <Marcus.Shawcroft@arm.com>
> >> >> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >> >> >> >>
> >> >> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> >> >> > Hello,
> >> >> >> >> >
> >> >> >> >> > Ping and updated patch.
> >> >> >> >> >
> >> >> >> >> > Bootstrapped Regtested on aarch64-none-linux-gnu and no
> >> issues.
> >> >> >> >> >
> >> >> >> >> > Ok for master?
> >> >> >> >> >
> >> >> >> >> > Thanks,
> >> >> >> >> > Tamar
> >> >> >> >> >
> >> >> >> >> > gcc/ChangeLog:
> >> >> >> >> >
> >> >> >> >> >         * config/aarch64/aarch64.md (*tb<optab><mode>1):
> >> >> >> >> > Rename
> >> >> to...
> >> >> >> >> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
> >> >> >> >> >         (tbranch<mode>4): New.
> >> >> >> >> >
> >> >> >> >> > gcc/testsuite/ChangeLog:
> >> >> >> >> >
> >> >> >> >> >         * gcc.target/aarch64/tbz_1.c: New test.
> >> >> >> >> >
> >> >> >> >> > --- inline copy of patch ---
> >> >> >> >> >
> >> >> >> >> > diff --git a/gcc/config/aarch64/aarch64.md
> >> >> >> >> > b/gcc/config/aarch64/aarch64.md index
> >> >> >> >> >
> >> >> >> >>
> >> >> >>
> >> >>
> >>
> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
> >> >> >> >> 71
> >> >> >> >> > 2bde55c7c72e 100644
> >> >> >> >> > --- a/gcc/config/aarch64/aarch64.md
> >> >> >> >> > +++ b/gcc/config/aarch64/aarch64.md
> >> >> >> >> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
> >> >> >> >> >                       (const_int 1)))]
> >> >> >> >> >  )
> >> >> >> >> >
> >> >> >> >> > -(define_insn "*tb<optab><mode>1"
> >> >> >> >> > +(define_expand "tbranch<mode>4"
> >> >> >> >> >    [(set (pc) (if_then_else
> >> >> >> >> > -             (EQL (zero_extract:DI (match_operand:GPI 0
> >> >> "register_operand"
> >> >> >> >> "r")
> >> >> >> >> > -                                   (const_int 1)
> >> >> >> >> > -                                   (match_operand 1
> >> >> >> >> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
> >> >> >> >> > +               (match_operator 0 "aarch64_comparison_operator"
> >> >> >> >> > +                [(match_operand:ALLI 1 "register_operand")
> >> >> >> >> > +                 (match_operand:ALLI 2
> >> >> >> >> "aarch64_simd_shift_imm_<ALLI:mode>")])
> >> >> >> >> > +               (label_ref (match_operand 3 "" ""))
> >> >> >> >> > +               (pc)))]
> >> >> >> >> > +  "optimize > 0"
> >> >> >> >>
> >> >> >> >> Why's the pattern conditional on optimize?  Seems a valid
> >> >> >> >> choice at -O0
> >> >> >> too.
> >> >> >> >>
> >> >> >> >
> >> >> >> > Hi,
> >> >> >> >
> >> >> >> > I had explained the reason why in the original patch, just
> >> >> >> > didn't repeat it in
> >> >> >> the ping:
> >> >> >> >
> >> >> >> > Instead of emitting the instruction directly I've chosen to
> >> >> >> > expand the pattern using a zero extract and generating the
> >> >> >> > existing pattern for comparisons for two
> >> >> >> > reasons:
> >> >> >> >
> >> >> >> >   1. Allows for CSE of the actual comparison.
> >> >> >> >   2. It looks like the code in expand makes the label as
> >> >> >> > unused and removed
> >> >> >> it
> >> >> >> >      if it doesn't see a separate reference to it.
> >> >> >> >
> >> >> >> > Because of this expansion though I disable the pattern at -O0
> >> >> >> > since we
> >> >> >> have no combine in that case so we'd end up with worse code.  I
> >> >> >> did try emitting the pattern directly, but as mentioned in no#2
> >> >> >> expand would then kill the label.
> >> >> >> >
> >> >> >> > Basically I emit the pattern directly, immediately during
> >> >> >> > expand the label is
> >> >> >> marked as dead for some weird reason.
> >> >> >>
> >> >> >> Isn't #2 a bug though?  It seems like something we should fix
> >> >> >> rather than work around.
> >> >> >
> >> >> > Yes it's a bug ☹ ok if I'm going to fix that bug then do I need
> >> >> > to split the optabs still? Isn't the problem atm that I need the split?
> >> >> > If I'm emitting the instruction directly then the recog pattern
> >> >> > for it can just be (eq (vec_extract x 1) 0) which is the correct
> semantics?
> >> >>
> >> >> What rtx does the code that uses the optab pass for operand 0?
> >> >
> >> > It gets passed the full comparison:
> >> >
> >> > (eq (reg/v:SI 92 [ x ])
> >> >     (const_int 0 [0]))
> >> >
> >> > of which we only look at the operator.
> >>
> >> OK, that's what I thought.  The problem is then the one I mentioned
> above.
> >> This rtx doesn't describe the operation that the optab is supposed to
> >> perform, so it can never be used in the instruction pattern.  (This
> >> is different from something like cbranch, where operand 0 can be used
> >> directly if the target supports a very general compare-and-branch
> >> instruction.)
> >
> > So I was wrong before about which RTL it gets passed.  Deep in the
> > expansion Code the rtl operation
> >
> > (eq (reg/v:SI 92 [ x ])
> >       (const_int 0 [0]))
> >
> > Gets broken up and passed piecewise.
> >
> > First thing it does it explicitly check that the first argument in RTL is an
> operator:
> >
> > gcc_assert (insn_operand_matches (icode, 0, test));
> >
> > and then the jump is emitted by breaking apart the rtl into it's operands:
> >
> > 4646      insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
> > 4647                          XEXP (test, 1), label));
> 
> Yeah, but the question was what the code that generates the tbranch optab
> passes for operand 0 ("test" in the call above).  And like you said, it's the EQ
> rtx above, with XEXPs 0 and 1 being passed as operands
> 1 and 2.  I think the point still stands that that EQ rtx doesn't describe the
> correct operation.
> 
> > And so the operands are:
> >
> >>>> p debug (operand0)
> > (reg/v:SI 92 [ xD.4391 ])
> >
> >>>> p debug (operand1)
> > (const_int 0 [0])
> >
> >>>> p debug (operand2)
> > (code_label 0 0 0 2 (nil) [0 uses])
> >
> > And targets never get to see the equality check.
> 
> But the .md pattern was:
> 
> (define_expand "tbranch<mode>4"
>   [(set (pc) (if_then_else
> 		(match_operator 0 "aarch64_comparison_operator"
> 		 [(match_operand:ALLI 1 "register_operand")
> 		  (match_operand:ALLI 2
> "aarch64_simd_shift_imm_<ALLI:mode>")])
> 		(label_ref (match_operand 3 "" ""))
> 		(pc)))]
>   "optimize > 0"
> {
>   rtx bitvalue = gen_reg_rtx (DImode);
>   rtx tmp = simplify_gen_subreg (DImode, operands[1], GET_MODE
> (operands[1]), 0);
>   emit_insn (gen_extzv (bitvalue, tmp, const1_rtx, operands[2]));
>   operands[2] = const0_rtx;
>   operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]),
> bitvalue,
> 					 operands[2]);
> })
> 
> where the EQ/NE rtx is passed and matched as operand 0.
> 
> > If the documentation of the optab is
> > Updated to say that the target operand1 is to be used in a
> > zero_extract with operand0 and compared with 0 then that should be fine
> no?  that's the semantic of the optab itself.
> >
> > Based on that I don't think we need to split this optab do we?  Just
> > update the docs to clarify the zero extract semantics?
> 
> Well, the point of...
> 
> >> If we want to use a single optab, the code that generates the optab
> >> should pass something like:
> >>
> >>   (eq/ne (zero_extract op0 (const_int 1) op1) (const_int 0))
> >>
> >> as operand 0, so that operand 0 specifies the real test condition.
> >>
> >> Thanks,
> >> Richard
> 
> ...was that we should either (a) split the optab or (b) keep the single optab
> and pass a "proper" description of the operation as operand 0.
> 
> Thanks,
> Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations
  2022-11-15  7:33               ` Richard Biener
@ 2022-12-01 16:29                 ` Tamar Christina
  2022-12-02  7:09                   ` Richard Biener
  2022-12-05 12:00                   ` Richard Sandiford
  0 siblings, 2 replies; 33+ messages in thread
From: Tamar Christina @ 2022-12-01 16:29 UTC (permalink / raw)
  To: Richard Biener
  Cc: Richard Biener, Aldy Hernandez, Jeff Law, gcc-patches, nd,
	MacLeod, Andrew

> > +/* Check to see if the supplied comparison in PTEST can be performed as a
> > +   bit-test-and-branch instead.  VAL must contain the original tree
> > +   expression of the non-zero operand which will be used to rewrite the
> > +   comparison in PTEST.
> > +
> > +   Returns TRUE if operation succeeds and returns updated PMODE and
> PTEST,
> > +   else FALSE.  */
> > +
> > +enum insn_code
> > +static validate_test_and_branch (tree val, rtx *ptest, machine_mode
> > +*pmode) {
> > +  if (!val || TREE_CODE (val) != SSA_NAME)
> > +    return CODE_FOR_nothing;
> > +
> > +  machine_mode mode = TYPE_MODE (TREE_TYPE (val));  rtx test =
> > + *ptest;
> > +
> > +  if (GET_CODE (test) != EQ && GET_CODE (test) != NE)
> > +    return CODE_FOR_nothing;
> > +
> > +  /* If the target supports the testbit comparison directly, great.
> > + */  auto icode = direct_optab_handler (tbranch_optab, mode);  if
> > + (icode == CODE_FOR_nothing)
> > +    return icode;
> > +
> > +  if (tree_zero_one_valued_p (val))
> > +    {
> > +      auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
> 
> Does this work for BYTES_BIG_ENDIAN && !WORDS_BIG_ENDIAN and mode
> > word_mode?
> 

It does now. In this particular case all that matters is the bit ordering, so I've changed
It to BITS_BIG_ENDIAN.

Also during the review of the AArch64 optab Richard Sandiford wanted me to split the
optabs apart into two.  The reason is that a match_operator still gets the full RTL.

In the case of a tbranch the full RTL has an invalid comparison, so if a target doesn't implement
the hook correctly this would lead to incorrect code.  We've now moved the operator as part of
the name itself to avoid this.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* dojump.cc (do_jump): Pass along value.
	(do_jump_by_parts_greater_rtx): Likewise.
	(do_jump_by_parts_zero_rtx): Likewise.
	(do_jump_by_parts_equality_rtx): Likewise.
	(do_compare_rtx_and_jump): Likewise.
	(do_compare_and_jump): Likewise.
	* dojump.h (do_compare_rtx_and_jump): New.
	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
	(validate_test_and_branch): New.
	(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
	supplied then check if it's suitable for tbranch.
	* optabs.def (tbranch_eq$a4, tbranch_ne$a4): New.
	* doc/md.texi (tbranch_@var{op}@var{mode}4): Document it.
	* optabs.h (emit_cmp_and_jump_insns):
	* tree.h (tree_zero_one_valued_p): New.

--- inline copy of patch ---

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index d0a71ecbb806de3a6564c6ffe973fec5da5c597b..c6c4b13d756de28078a0a779876a00c614246914 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6964,6 +6964,14 @@ case, you can and should make operand 1's predicate reject some operators
 in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
 from the machine description.
 
+@cindex @code{tbranch_@var{op}@var{mode}4} instruction pattern
+@item @samp{tbranch_@var{op}@var{mode}4}
+Conditional branch instruction combined with a bit test-and-compare
+instruction. Operand 0 is a comparison operator.  Operand 1 is the
+operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
+Operand 3 is the @code{code_label} to jump to. @var{op} is one of @var{eq} or
+@var{ne}.
+
 @cindex @code{cbranch@var{mode}4} instruction pattern
 @item @samp{cbranch@var{mode}4}
 Conditional branch instruction combined with a compare instruction.
diff --git a/gcc/dojump.h b/gcc/dojump.h
index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644
--- a/gcc/dojump.h
+++ b/gcc/dojump.h
@@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
 extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
 			 profile_probability);
 
+extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
+				     machine_mode, rtx, rtx_code_label *,
+				     rtx_code_label *, profile_probability);
+
 extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
 				     machine_mode, rtx, rtx_code_label *,
 				     rtx_code_label *, profile_probability);
diff --git a/gcc/dojump.cc b/gcc/dojump.cc
index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644
--- a/gcc/dojump.cc
+++ b/gcc/dojump.cc
@@ -619,7 +619,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
 	}
       do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
 			       NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
-			       GET_MODE (temp), NULL_RTX,
+			       exp, GET_MODE (temp), NULL_RTX,
 			       if_false_label, if_true_label, prob);
     }
 
@@ -687,7 +687,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 
       /* All but high-order word must be compared as unsigned.  */
       do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
-			       word_mode, NULL_RTX, NULL, if_true_label,
+			       NULL, word_mode, NULL_RTX, NULL, if_true_label,
 			       prob);
 
       /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
@@ -695,8 +695,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 	break;
 
       /* Consider lower words only if these are equal.  */
-      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
-			       NULL_RTX, NULL, if_false_label,
+      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
+			       word_mode, NULL_RTX, NULL, if_false_label,
 			       prob.invert ());
     }
 
@@ -755,7 +755,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   if (part != 0)
     {
-      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
+      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
 			       NULL_RTX, if_false_label, if_true_label, prob);
       return;
     }
@@ -766,7 +766,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
+			     const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -809,8 +809,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             operand_subword_force (op1, i, mode),
-                             EQ, 0, word_mode, NULL_RTX,
+			     operand_subword_force (op1, i, mode),
+			     EQ, 0, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -962,6 +962,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 			 rtx_code_label *if_false_label,
 			 rtx_code_label *if_true_label,
 			 profile_probability prob)
+{
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
+			  if_false_label, if_true_label, prob);
+}
+
+/* Like do_compare_and_jump but expects the values to compare as two rtx's.
+   The decision as to signed or unsigned comparison must be made by the caller.
+
+   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
+   compared.  */
+
+void
+do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
+			 tree val, machine_mode mode, rtx size,
+			 rtx_code_label *if_false_label,
+			 rtx_code_label *if_true_label,
+			 profile_probability prob)
 {
   rtx tem;
   rtx_code_label *dummy_label = NULL;
@@ -1177,8 +1194,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 		    }
 		  else
 		    dest_label = if_false_label;
-                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, dest_label, NULL, first_prob);
+
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, dest_label, NULL,
+					   first_prob);
 		}
 	      /* For !and_them we want to split:
 		 if (x) goto t; // prob;
@@ -1192,8 +1211,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
               else
 		{
 		  profile_probability first_prob = prob.split (cprob);
-		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, NULL, if_true_label, first_prob);
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, NULL,
+					   if_true_label, first_prob);
 		  if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
 		    {
 		      /* x != y can be split into x unord y || x ltgt y
@@ -1215,7 +1235,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 	    }
 	}
 
-      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
+      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
 			       if_true_label, prob);
     }
 
@@ -1289,9 +1309,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
       op1 = new_op1;
     }
 
-  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
-                           ((mode == BLKmode)
-                            ? expr_size (treeop0) : NULL_RTX),
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
+			   ((mode == BLKmode)
+			    ? expr_size (treeop0) : NULL_RTX),
 			   if_false_label, if_true_label, prob);
 }
 
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 31b15fd3df5fa88119867a23d2abbed139a05115..303b4fd2def9278ddbc3d586103ac8274e73a982 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "libfuncs.h"
 #include "internal-fn.h"
 #include "langhooks.h"
+#include "gimple.h"
+#include "ssa.h"
 
 static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
 				   machine_mode *);
@@ -4623,7 +4625,8 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
 
 static void
 emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
-			  profile_probability prob)
+			  direct_optab cmp_optab, profile_probability prob,
+			  bool test_branch)
 {
   machine_mode optab_mode;
   enum mode_class mclass;
@@ -4632,12 +4635,17 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
 
   mclass = GET_MODE_CLASS (mode);
   optab_mode = (mclass == MODE_CC) ? CCmode : mode;
-  icode = optab_handler (cbranch_optab, optab_mode);
+  icode = optab_handler (cmp_optab, optab_mode);
 
   gcc_assert (icode != CODE_FOR_nothing);
-  gcc_assert (insn_operand_matches (icode, 0, test));
-  insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
-                                          XEXP (test, 1), label));
+  gcc_assert (test_branch || insn_operand_matches (icode, 0, test));
+  if (test_branch)
+    insn = emit_jump_insn (GEN_FCN (icode) (XEXP (test, 0),
+					    XEXP (test, 1), label));
+  else
+    insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
+					    XEXP (test, 1), label));
+
   if (prob.initialized_p ()
       && profile_status_for_fn (cfun) != PROFILE_ABSENT
       && insn
@@ -4647,6 +4655,63 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
     add_reg_br_prob_note (insn, prob);
 }
 
+/* Check to see if the supplied comparison in PTEST can be performed as a
+   bit-test-and-branch instead.  VAL must contain the original tree
+   expression of the non-zero operand which will be used to rewrite the
+   comparison in PTEST.
+
+   Returns TRUE if operation succeeds and returns updated PMODE and PTEST,
+   else FALSE.  */
+
+static enum insn_code
+validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode, optab *res)
+{
+  if (!val || TREE_CODE (val) != SSA_NAME)
+    return CODE_FOR_nothing;
+
+  machine_mode mode = TYPE_MODE (TREE_TYPE (val));
+  rtx test = *ptest;
+  direct_optab optab;
+
+  if (GET_CODE (test) == EQ)
+    optab = tbranch_eq_optab;
+  else if (GET_CODE (test) == NE)
+    optab = tbranch_ne_optab;
+  else
+    return CODE_FOR_nothing;
+
+  *res = optab;
+
+  /* If the target supports the testbit comparison directly, great.  */
+  auto icode = direct_optab_handler (optab, mode);
+  if (icode == CODE_FOR_nothing)
+    return icode;
+
+  if (tree_zero_one_valued_p (val))
+    {
+      auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
+      XEXP (test, 1) = gen_int_mode (pos, mode);
+      *ptest = test;
+      *pmode = mode;
+      return icode;
+    }
+
+  wide_int wcst = get_nonzero_bits (val);
+  if (wcst == -1)
+    return CODE_FOR_nothing;
+
+  int bitpos;
+
+  if ((bitpos = wi::exact_log2 (wcst)) == -1)
+    return CODE_FOR_nothing;
+
+  auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
+  XEXP (test, 1) = gen_int_mode (pos, mode);
+  *ptest = test;
+  *pmode = mode;
+  return icode;
+}
+
 /* Generate code to compare X with Y so that the condition codes are
    set and to jump to LABEL if the condition is true.  If X is a
    constant and Y is not a constant, then the comparison is swapped to
@@ -4664,11 +4729,13 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
    It will be potentially converted into an unsigned variant based on
    UNSIGNEDP to select a proper jump instruction.
    
-   PROB is the probability of jumping to LABEL.  */
+   PROB is the probability of jumping to LABEL.  If the comparison is against
+   zero then VAL contains the expression from which the non-zero RTL is
+   derived.  */
 
 void
 emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
-			 machine_mode mode, int unsignedp, rtx label,
+			 machine_mode mode, int unsignedp, tree val, rtx label,
                          profile_probability prob)
 {
   rtx op0 = x, op1 = y;
@@ -4693,10 +4760,34 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
 
   prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
 		    &test, &mode);
-  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
+
+  /* Check if we're comparing a truth type with 0, and if so check if
+     the target supports tbranch.  */
+  machine_mode tmode = mode;
+  direct_optab optab;
+  if (op1 == CONST0_RTX (GET_MODE (op1))
+      && validate_test_and_branch (val, &test, &tmode,
+				   &optab) != CODE_FOR_nothing)
+    {
+      emit_cmp_and_jump_insn_1 (test, tmode, label, optab, prob, true);
+      return;
+    }
+
+  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob, false);
 }
 
-

+/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
+
+void
+emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
+			 machine_mode mode, int unsignedp, rtx label,
+			 profile_probability prob)
+{
+  emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
+			   label, prob);
+}
+
+
 /* Emit a library call comparison between floating point X and Y.
    COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).  */
 
diff --git a/gcc/optabs.def b/gcc/optabs.def
index a6db2342bed6baf13ecbd84112c8432c6972e6fe..3199b05e90d6b9b9c6fb3c0353db3db02321e964 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -220,6 +220,8 @@ OPTAB_D (reload_in_optab, "reload_in$a")
 OPTAB_D (reload_out_optab, "reload_out$a")
 
 OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
+OPTAB_D (tbranch_eq_optab, "tbranch_eq$a4")
+OPTAB_D (tbranch_ne_optab, "tbranch_ne$a4")
 OPTAB_D (addcc_optab, "add$acc")
 OPTAB_D (negcc_optab, "neg$acc")
 OPTAB_D (notcc_optab, "not$acc")
diff --git a/gcc/optabs.h b/gcc/optabs.h
index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
 				     machine_mode, int, rtx,
 				     profile_probability prob
 					= profile_probability::uninitialized ());
+extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
+				     machine_mode, int, tree, rtx,
+				     profile_probability prob
+					= profile_probability::uninitialized ());
 
 /* Generate code to indirectly jump to a location given in the rtx LOC.  */
 extern void emit_indirect_jump (rtx);
diff --git a/gcc/tree.h b/gcc/tree.h
index a863d2e50e5ecafa3f5da4dda98d9637261d07a9..abedaa80a3983ebb6f9ac733b2eaa8d039688f0a 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4726,6 +4726,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
 extern tree signed_type_for (tree);
 extern tree unsigned_type_for (tree);
 extern bool is_truth_type_for (tree, tree);
+extern bool tree_zero_one_valued_p (tree);
 extern tree truth_type_for (tree);
 extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
 extern tree build_pointer_type (tree);

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH 2/2]AArch64 Support new tbranch optab.
  2022-11-24 12:18                       ` Tamar Christina
@ 2022-12-01 16:44                         ` Tamar Christina
  2022-12-05 14:06                           ` Richard Sandiford
  0 siblings, 1 reply; 33+ messages in thread
From: Tamar Christina @ 2022-12-01 16:44 UTC (permalink / raw)
  To: Tamar Christina, Richard Sandiford
  Cc: gcc-patches, Richard Earnshaw, nd, Marcus Shawcroft

[-- Attachment #1: Type: text/plain, Size: 8807 bytes --]

Hi,

I hadn't received any reply so I had implemented various ways to do this (about 8 of them in fact).

The conclusion is that no, we cannot emit one big RTL for the final instruction immediately.
The reason that all comparisons in the AArch64 backend expand to separate CC compares, and
separate testing of the operands is for ifcvt.

The separate CC compare is needed so ifcvt can produce csel, cset etc from the compares.  Unlike
say combine, ifcvt can not do recog on a parallel with a clobber.  Should we emit the instruction
directly then ifcvt will not be able to say, make a csel, because we have no patterns which handle
zero_extract and compare. (unlike combine ifcvt cannot transform the extract into an AND).

While you could provide various patterns for this (and I did try) you end up with broken patterns
because you can't add the clobber to the CC register.  If you do, ifcvt recog fails.

i.e.

int
f1 (int x)
{
  if (x & 1)
    return 1;
  return x;
}

We lose csel here.

Secondly the reason the compare with an explicit CC mode is needed is so that ifcvt can transform
the operation into a version that doesn't require the flags to be set.  But it only does so if it know
the explicit usage of the CC reg.

For instance 

int
foo (int a, int b)
{
  return ((a & (1 << 25)) ? 5 : 4);
}

Doesn't require a comparison, the optimal form is:

foo(int, int):
        ubfx    x0, x0, 25, 1
        add     w0, w0, 4
        ret

and no compare is actually needed.  If you represent the instruction using an ANDS instead of a zero_extract
then you get close, but you end up with an ands followed by an add, which is a slower operation.

These two reasons are the main reasons why all comparisons in AArch64 expand the way they do, so tbranch
Shouldn't do anything differently here.  Additionally the reason for the optab was to pass range information
to the backend during expansion.

In this version however I have represented the expand using an ANDS instead.  This allows us not to regress
on -O0 as the previous version did.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Note that this patch relies on https://patchwork.sourceware.org/project/gcc/patch/Y1+4qItMrQHbdqqD@arm.com/ 
which has yet to be reviewed but which cleans up extensions so they can be used like this.

Thanks,
Tamar

gcc/ChangeLog:

	* config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
	(*tb<optab><ALLI:mode><GPI:mode>1): ... this.
	(tbranch_<code><mode>4): New.
	(zero_extend<SI_ONLY:mode><SD_HSDI:mode>2,
	zero_extend<HI_ONLY:mode><SD_HSDI:mode>2,
	zero_extend<QI_ONLY:mode><SD_HSDI:mode>2): Make dynamic calls with @.
	* config/aarch64/iterators.md(ZEROM, zerom): New.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/tbz_1.c: New test.

--- inline copy of patch ---

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 4c181a96e555c2a58c59fc991000b2a2fa9bd244..7ee1d01e050004e42cd2d0049f0200da71d918bb 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -946,12 +946,33 @@ (define_insn "*cb<optab><mode>1"
 		      (const_int 1)))]
 )
 
-(define_insn "*tb<optab><mode>1"
+(define_expand "tbranch_<code><mode>4"
   [(set (pc) (if_then_else
-	      (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
-				    (const_int 1)
-				    (match_operand 1
-				      "aarch64_simd_shift_imm_<mode>" "n"))
+              (EQL (match_operand:ALLI 0 "register_operand")
+                   (match_operand 1 "aarch64_simd_shift_imm_<mode>"))
+              (label_ref (match_operand 2 ""))
+              (pc)))]
+  ""
+{
+  rtx bitvalue = gen_reg_rtx (<ZEROM>mode);
+  rtx reg = gen_reg_rtx (<ZEROM>mode);
+  if (<MODE>mode == <ZEROM>mode)
+    reg = operands[0];
+  else
+    emit_insn (gen_zero_extend2 (<MODE>mode, <ZEROM>mode, reg, operands[0]));
+  rtx val = GEN_INT (1UL << UINTVAL (operands[1]));
+  emit_insn (gen_and<zerom>3 (bitvalue, reg, val));
+  operands[1] = const0_rtx;
+  operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue,
+					 operands[1]);
+})
+
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
+  [(set (pc) (if_then_else
+	      (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
+				     (const_int 1)
+				     (match_operand 1
+				       "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
 		   (const_int 0))
 	     (label_ref (match_operand 2 "" ""))
 	     (pc)))
@@ -962,15 +983,15 @@ (define_insn "*tb<optab><mode>1"
       {
 	if (get_attr_far_branch (insn) == 1)
 	  return aarch64_gen_far_branch (operands, 2, "Ltb",
-					 "<inv_tb>\\t%<w>0, %1, ");
+					 "<inv_tb>\\t%<ALLI:w>0, %1, ");
 	else
 	  {
 	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
-	    return "tst\t%<w>0, %1\;<bcond>\t%l2";
+	    return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
 	  }
       }
     else
-      return "<tbz>\t%<w>0, %1, %l2";
+      return "<tbz>\t%<ALLI:w>0, %1, %l2";
   }
   [(set_attr "type" "branch")
    (set (attr "length")
@@ -1962,7 +1983,7 @@ (define_insn "extend<ALLX:mode><SD_HSDI:mode>2"
    (set_attr "arch" "*,*,fp")]
 )
 
-(define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
+(define_insn "@zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
   [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
         (zero_extend:SD_HSDI
 	  (match_operand:SI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
@@ -1978,7 +1999,7 @@ (define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
    (set_attr "arch" "*,*,fp,fp,fp,fp")]
 )
 
-(define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
+(define_insn "@zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
   [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
         (zero_extend:SD_HSDI
 	  (match_operand:HI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
@@ -1994,7 +2015,7 @@ (define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
    (set_attr "arch" "*,*,fp16,fp,fp,fp16")]
 )
 
-(define_insn "zero_extend<QI_ONLY:mode><SD_HSDI:mode>2"
+(define_insn "@zero_extend<QI_ONLY:mode><SD_HSDI:mode>2"
   [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,r,w")
         (zero_extend:SD_HSDI
 	  (match_operand:QI_ONLY 1 "nonimmediate_operand" "r,m,m,w,w")))]
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index df72c079f218db9727a96924cab496e91ce6df59..816e44753fb9f6245f3abdb6d3e689a36986ac99 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1107,6 +1107,8 @@ (define_mode_attr s [(HF "h") (SF "s") (DF "d") (SI "s") (DI "d")])
 ;; Give the length suffix letter for a sign- or zero-extension.
 (define_mode_attr size [(QI "b") (HI "h") (SI "w")])
 (define_mode_attr sizel [(QI "b") (HI "h") (SI "")])
+(define_mode_attr ZEROM [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
+(define_mode_attr zerom [(QI "si") (HI "si") (SI "si") (DI "di")])
 
 ;; Give the number of bits in the mode
 (define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..39deb58e278e2180ab270b5a999cac62cb17c682
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdbool.h>
+
+void h(void);
+
+/*
+** g1:
+** 	tbnz	w[0-9]+, #?0, .L([0-9]+)
+** 	ret
+**	...
+*/
+void g1(bool x)
+{
+  if (__builtin_expect (x, 0))
+    h ();
+}
+
+/*
+** g2:
+** 	tbz	w[0-9]+, #?0, .L([0-9]+)
+** 	b	h
+**	...
+*/
+void g2(bool x)
+{
+  if (__builtin_expect (x, 1))
+    h ();
+}
+
+/*
+** g3_ge:
+** 	tbnz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_ge(int x)
+{
+  if (__builtin_expect (x >= 0, 1))
+    h ();
+}
+
+/*
+** g3_gt:
+** 	cmp	w[0-9]+, 0
+** 	ble	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_gt(int x)
+{
+  if (__builtin_expect (x > 0, 1))
+    h ();
+}
+
+/*
+** g3_lt:
+** 	tbz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_lt(int x)
+{
+  if (__builtin_expect (x < 0, 1))
+    h ();
+}
+
+/*
+** g3_le:
+** 	cmp	w[0-9]+, 0
+** 	bgt	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_le(int x)
+{
+  if (__builtin_expect (x <= 0, 1))
+    h ();
+}
+
+/*
+** g5:
+** 	mov	w[0-9]+, 65279
+** 	tst	w[0-9]+, w[0-9]+
+** 	beq	.L[0-9]+
+** 	b	h
+**	...
+*/ 
+void g5(int x)
+{
+  if (__builtin_expect (x & 0xfeff, 1))
+    h ();
+} 

[-- Attachment #2: rb16486.patch --]
[-- Type: application/octet-stream, Size: 5714 bytes --]

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 4c181a96e555c2a58c59fc991000b2a2fa9bd244..7ee1d01e050004e42cd2d0049f0200da71d918bb 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -946,12 +946,33 @@ (define_insn "*cb<optab><mode>1"
 		      (const_int 1)))]
 )
 
-(define_insn "*tb<optab><mode>1"
+(define_expand "tbranch_<code><mode>4"
   [(set (pc) (if_then_else
-	      (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
-				    (const_int 1)
-				    (match_operand 1
-				      "aarch64_simd_shift_imm_<mode>" "n"))
+              (EQL (match_operand:ALLI 0 "register_operand")
+                   (match_operand 1 "aarch64_simd_shift_imm_<mode>"))
+              (label_ref (match_operand 2 ""))
+              (pc)))]
+  ""
+{
+  rtx bitvalue = gen_reg_rtx (<ZEROM>mode);
+  rtx reg = gen_reg_rtx (<ZEROM>mode);
+  if (<MODE>mode == <ZEROM>mode)
+    reg = operands[0];
+  else
+    emit_insn (gen_zero_extend2 (<MODE>mode, <ZEROM>mode, reg, operands[0]));
+  rtx val = GEN_INT (1UL << UINTVAL (operands[1]));
+  emit_insn (gen_and<zerom>3 (bitvalue, reg, val));
+  operands[1] = const0_rtx;
+  operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue,
+					 operands[1]);
+})
+
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
+  [(set (pc) (if_then_else
+	      (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
+				     (const_int 1)
+				     (match_operand 1
+				       "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
 		   (const_int 0))
 	     (label_ref (match_operand 2 "" ""))
 	     (pc)))
@@ -962,15 +983,15 @@ (define_insn "*tb<optab><mode>1"
       {
 	if (get_attr_far_branch (insn) == 1)
 	  return aarch64_gen_far_branch (operands, 2, "Ltb",
-					 "<inv_tb>\\t%<w>0, %1, ");
+					 "<inv_tb>\\t%<ALLI:w>0, %1, ");
 	else
 	  {
 	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
-	    return "tst\t%<w>0, %1\;<bcond>\t%l2";
+	    return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
 	  }
       }
     else
-      return "<tbz>\t%<w>0, %1, %l2";
+      return "<tbz>\t%<ALLI:w>0, %1, %l2";
   }
   [(set_attr "type" "branch")
    (set (attr "length")
@@ -1962,7 +1983,7 @@ (define_insn "extend<ALLX:mode><SD_HSDI:mode>2"
    (set_attr "arch" "*,*,fp")]
 )
 
-(define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
+(define_insn "@zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
   [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
         (zero_extend:SD_HSDI
 	  (match_operand:SI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
@@ -1978,7 +1999,7 @@ (define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
    (set_attr "arch" "*,*,fp,fp,fp,fp")]
 )
 
-(define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
+(define_insn "@zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
   [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
         (zero_extend:SD_HSDI
 	  (match_operand:HI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
@@ -1994,7 +2015,7 @@ (define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
    (set_attr "arch" "*,*,fp16,fp,fp,fp16")]
 )
 
-(define_insn "zero_extend<QI_ONLY:mode><SD_HSDI:mode>2"
+(define_insn "@zero_extend<QI_ONLY:mode><SD_HSDI:mode>2"
   [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,r,w")
         (zero_extend:SD_HSDI
 	  (match_operand:QI_ONLY 1 "nonimmediate_operand" "r,m,m,w,w")))]
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index df72c079f218db9727a96924cab496e91ce6df59..816e44753fb9f6245f3abdb6d3e689a36986ac99 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1107,6 +1107,8 @@ (define_mode_attr s [(HF "h") (SF "s") (DF "d") (SI "s") (DI "d")])
 ;; Give the length suffix letter for a sign- or zero-extension.
 (define_mode_attr size [(QI "b") (HI "h") (SI "w")])
 (define_mode_attr sizel [(QI "b") (HI "h") (SI "")])
+(define_mode_attr ZEROM [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
+(define_mode_attr zerom [(QI "si") (HI "si") (SI "si") (DI "di")])
 
 ;; Give the number of bits in the mode
 (define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..39deb58e278e2180ab270b5a999cac62cb17c682
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdbool.h>
+
+void h(void);
+
+/*
+** g1:
+** 	tbnz	w[0-9]+, #?0, .L([0-9]+)
+** 	ret
+**	...
+*/
+void g1(bool x)
+{
+  if (__builtin_expect (x, 0))
+    h ();
+}
+
+/*
+** g2:
+** 	tbz	w[0-9]+, #?0, .L([0-9]+)
+** 	b	h
+**	...
+*/
+void g2(bool x)
+{
+  if (__builtin_expect (x, 1))
+    h ();
+}
+
+/*
+** g3_ge:
+** 	tbnz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_ge(int x)
+{
+  if (__builtin_expect (x >= 0, 1))
+    h ();
+}
+
+/*
+** g3_gt:
+** 	cmp	w[0-9]+, 0
+** 	ble	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_gt(int x)
+{
+  if (__builtin_expect (x > 0, 1))
+    h ();
+}
+
+/*
+** g3_lt:
+** 	tbz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_lt(int x)
+{
+  if (__builtin_expect (x < 0, 1))
+    h ();
+}
+
+/*
+** g3_le:
+** 	cmp	w[0-9]+, 0
+** 	bgt	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_le(int x)
+{
+  if (__builtin_expect (x <= 0, 1))
+    h ();
+}
+
+/*
+** g5:
+** 	mov	w[0-9]+, 65279
+** 	tst	w[0-9]+, w[0-9]+
+** 	beq	.L[0-9]+
+** 	b	h
+**	...
+*/ 
+void g5(int x)
+{
+  if (__builtin_expect (x & 0xfeff, 1))
+    h ();
+}

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations
  2022-12-01 16:29                 ` Tamar Christina
@ 2022-12-02  7:09                   ` Richard Biener
  2022-12-05 12:00                   ` Richard Sandiford
  1 sibling, 0 replies; 33+ messages in thread
From: Richard Biener @ 2022-12-02  7:09 UTC (permalink / raw)
  To: Tamar Christina
  Cc: Richard Biener, Aldy Hernandez, Jeff Law, gcc-patches, nd,
	MacLeod, Andrew

On Thu, 1 Dec 2022, Tamar Christina wrote:

> > > +/* Check to see if the supplied comparison in PTEST can be performed as a
> > > +   bit-test-and-branch instead.  VAL must contain the original tree
> > > +   expression of the non-zero operand which will be used to rewrite the
> > > +   comparison in PTEST.
> > > +
> > > +   Returns TRUE if operation succeeds and returns updated PMODE and
> > PTEST,
> > > +   else FALSE.  */
> > > +
> > > +enum insn_code
> > > +static validate_test_and_branch (tree val, rtx *ptest, machine_mode
> > > +*pmode) {
> > > +  if (!val || TREE_CODE (val) != SSA_NAME)
> > > +    return CODE_FOR_nothing;
> > > +
> > > +  machine_mode mode = TYPE_MODE (TREE_TYPE (val));  rtx test =
> > > + *ptest;
> > > +
> > > +  if (GET_CODE (test) != EQ && GET_CODE (test) != NE)
> > > +    return CODE_FOR_nothing;
> > > +
> > > +  /* If the target supports the testbit comparison directly, great.
> > > + */  auto icode = direct_optab_handler (tbranch_optab, mode);  if
> > > + (icode == CODE_FOR_nothing)
> > > +    return icode;
> > > +
> > > +  if (tree_zero_one_valued_p (val))
> > > +    {
> > > +      auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
> > 
> > Does this work for BYTES_BIG_ENDIAN && !WORDS_BIG_ENDIAN and mode
> > > word_mode?
> > 
> 
> It does now. In this particular case all that matters is the bit ordering, so I've changed
> It to BITS_BIG_ENDIAN.

It looks like this would fit indeed.

> Also during the review of the AArch64 optab Richard Sandiford wanted me to split the
> optabs apart into two.  The reason is that a match_operator still gets the full RTL.
> 
> In the case of a tbranch the full RTL has an invalid comparison, so if a target doesn't implement
> the hook correctly this would lead to incorrect code.  We've now moved the operator as part of
> the name itself to avoid this.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?

OK if Richard doesn't have any further comments.

Thanks,
Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	* dojump.cc (do_jump): Pass along value.
> 	(do_jump_by_parts_greater_rtx): Likewise.
> 	(do_jump_by_parts_zero_rtx): Likewise.
> 	(do_jump_by_parts_equality_rtx): Likewise.
> 	(do_compare_rtx_and_jump): Likewise.
> 	(do_compare_and_jump): Likewise.
> 	* dojump.h (do_compare_rtx_and_jump): New.
> 	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
> 	(validate_test_and_branch): New.
> 	(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
> 	supplied then check if it's suitable for tbranch.
> 	* optabs.def (tbranch_eq$a4, tbranch_ne$a4): New.
> 	* doc/md.texi (tbranch_@var{op}@var{mode}4): Document it.
> 	* optabs.h (emit_cmp_and_jump_insns):
> 	* tree.h (tree_zero_one_valued_p): New.
> 
> --- inline copy of patch ---
> 
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index d0a71ecbb806de3a6564c6ffe973fec5da5c597b..c6c4b13d756de28078a0a779876a00c614246914 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -6964,6 +6964,14 @@ case, you can and should make operand 1's predicate reject some operators
>  in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
>  from the machine description.
>  
> +@cindex @code{tbranch_@var{op}@var{mode}4} instruction pattern
> +@item @samp{tbranch_@var{op}@var{mode}4}
> +Conditional branch instruction combined with a bit test-and-compare
> +instruction. Operand 0 is a comparison operator.  Operand 1 is the
> +operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
> +Operand 3 is the @code{code_label} to jump to. @var{op} is one of @var{eq} or
> +@var{ne}.
> +
>  @cindex @code{cbranch@var{mode}4} instruction pattern
>  @item @samp{cbranch@var{mode}4}
>  Conditional branch instruction combined with a compare instruction.
> diff --git a/gcc/dojump.h b/gcc/dojump.h
> index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644
> --- a/gcc/dojump.h
> +++ b/gcc/dojump.h
> @@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
>  extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
>  			 profile_probability);
>  
> +extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
> +				     machine_mode, rtx, rtx_code_label *,
> +				     rtx_code_label *, profile_probability);
> +
>  extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
>  				     machine_mode, rtx, rtx_code_label *,
>  				     rtx_code_label *, profile_probability);
> diff --git a/gcc/dojump.cc b/gcc/dojump.cc
> index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644
> --- a/gcc/dojump.cc
> +++ b/gcc/dojump.cc
> @@ -619,7 +619,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
>  	}
>        do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
>  			       NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
> -			       GET_MODE (temp), NULL_RTX,
> +			       exp, GET_MODE (temp), NULL_RTX,
>  			       if_false_label, if_true_label, prob);
>      }
>  
> @@ -687,7 +687,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
>  
>        /* All but high-order word must be compared as unsigned.  */
>        do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
> -			       word_mode, NULL_RTX, NULL, if_true_label,
> +			       NULL, word_mode, NULL_RTX, NULL, if_true_label,
>  			       prob);
>  
>        /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
> @@ -695,8 +695,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
>  	break;
>  
>        /* Consider lower words only if these are equal.  */
> -      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
> -			       NULL_RTX, NULL, if_false_label,
> +      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
> +			       word_mode, NULL_RTX, NULL, if_false_label,
>  			       prob.invert ());
>      }
>  
> @@ -755,7 +755,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
>  
>    if (part != 0)
>      {
> -      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
> +      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
>  			       NULL_RTX, if_false_label, if_true_label, prob);
>        return;
>      }
> @@ -766,7 +766,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
>  
>    for (i = 0; i < nwords; i++)
>      do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
> -                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
> +			     const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
>  			     if_false_label, NULL, prob);
>  
>    if (if_true_label)
> @@ -809,8 +809,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
>  
>    for (i = 0; i < nwords; i++)
>      do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
> -                             operand_subword_force (op1, i, mode),
> -                             EQ, 0, word_mode, NULL_RTX,
> +			     operand_subword_force (op1, i, mode),
> +			     EQ, 0, NULL, word_mode, NULL_RTX,
>  			     if_false_label, NULL, prob);
>  
>    if (if_true_label)
> @@ -962,6 +962,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>  			 rtx_code_label *if_false_label,
>  			 rtx_code_label *if_true_label,
>  			 profile_probability prob)
> +{
> +  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
> +			  if_false_label, if_true_label, prob);
> +}
> +
> +/* Like do_compare_and_jump but expects the values to compare as two rtx's.
> +   The decision as to signed or unsigned comparison must be made by the caller.
> +
> +   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
> +   compared.  */
> +
> +void
> +do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
> +			 tree val, machine_mode mode, rtx size,
> +			 rtx_code_label *if_false_label,
> +			 rtx_code_label *if_true_label,
> +			 profile_probability prob)
>  {
>    rtx tem;
>    rtx_code_label *dummy_label = NULL;
> @@ -1177,8 +1194,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>  		    }
>  		  else
>  		    dest_label = if_false_label;
> -                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
> -					   size, dest_label, NULL, first_prob);
> +
> +		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
> +					   val, mode, size, dest_label, NULL,
> +					   first_prob);
>  		}
>  	      /* For !and_them we want to split:
>  		 if (x) goto t; // prob;
> @@ -1192,8 +1211,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>                else
>  		{
>  		  profile_probability first_prob = prob.split (cprob);
> -		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
> -					   size, NULL, if_true_label, first_prob);
> +		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
> +					   val, mode, size, NULL,
> +					   if_true_label, first_prob);
>  		  if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
>  		    {
>  		      /* x != y can be split into x unord y || x ltgt y
> @@ -1215,7 +1235,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>  	    }
>  	}
>  
> -      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
> +      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
>  			       if_true_label, prob);
>      }
>  
> @@ -1289,9 +1309,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
>        op1 = new_op1;
>      }
>  
> -  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
> -                           ((mode == BLKmode)
> -                            ? expr_size (treeop0) : NULL_RTX),
> +  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
> +			   ((mode == BLKmode)
> +			    ? expr_size (treeop0) : NULL_RTX),
>  			   if_false_label, if_true_label, prob);
>  }
>  
> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> index 31b15fd3df5fa88119867a23d2abbed139a05115..303b4fd2def9278ddbc3d586103ac8274e73a982 100644
> --- a/gcc/optabs.cc
> +++ b/gcc/optabs.cc
> @@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "libfuncs.h"
>  #include "internal-fn.h"
>  #include "langhooks.h"
> +#include "gimple.h"
> +#include "ssa.h"
>  
>  static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
>  				   machine_mode *);
> @@ -4623,7 +4625,8 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
>  
>  static void
>  emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
> -			  profile_probability prob)
> +			  direct_optab cmp_optab, profile_probability prob,
> +			  bool test_branch)
>  {
>    machine_mode optab_mode;
>    enum mode_class mclass;
> @@ -4632,12 +4635,17 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>  
>    mclass = GET_MODE_CLASS (mode);
>    optab_mode = (mclass == MODE_CC) ? CCmode : mode;
> -  icode = optab_handler (cbranch_optab, optab_mode);
> +  icode = optab_handler (cmp_optab, optab_mode);
>  
>    gcc_assert (icode != CODE_FOR_nothing);
> -  gcc_assert (insn_operand_matches (icode, 0, test));
> -  insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
> -                                          XEXP (test, 1), label));
> +  gcc_assert (test_branch || insn_operand_matches (icode, 0, test));
> +  if (test_branch)
> +    insn = emit_jump_insn (GEN_FCN (icode) (XEXP (test, 0),
> +					    XEXP (test, 1), label));
> +  else
> +    insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
> +					    XEXP (test, 1), label));
> +
>    if (prob.initialized_p ()
>        && profile_status_for_fn (cfun) != PROFILE_ABSENT
>        && insn
> @@ -4647,6 +4655,63 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>      add_reg_br_prob_note (insn, prob);
>  }
>  
> +/* Check to see if the supplied comparison in PTEST can be performed as a
> +   bit-test-and-branch instead.  VAL must contain the original tree
> +   expression of the non-zero operand which will be used to rewrite the
> +   comparison in PTEST.
> +
> +   Returns TRUE if operation succeeds and returns updated PMODE and PTEST,
> +   else FALSE.  */
> +
> +static enum insn_code
> +validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode, optab *res)
> +{
> +  if (!val || TREE_CODE (val) != SSA_NAME)
> +    return CODE_FOR_nothing;
> +
> +  machine_mode mode = TYPE_MODE (TREE_TYPE (val));
> +  rtx test = *ptest;
> +  direct_optab optab;
> +
> +  if (GET_CODE (test) == EQ)
> +    optab = tbranch_eq_optab;
> +  else if (GET_CODE (test) == NE)
> +    optab = tbranch_ne_optab;
> +  else
> +    return CODE_FOR_nothing;
> +
> +  *res = optab;
> +
> +  /* If the target supports the testbit comparison directly, great.  */
> +  auto icode = direct_optab_handler (optab, mode);
> +  if (icode == CODE_FOR_nothing)
> +    return icode;
> +
> +  if (tree_zero_one_valued_p (val))
> +    {
> +      auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
> +      XEXP (test, 1) = gen_int_mode (pos, mode);
> +      *ptest = test;
> +      *pmode = mode;
> +      return icode;
> +    }
> +
> +  wide_int wcst = get_nonzero_bits (val);
> +  if (wcst == -1)
> +    return CODE_FOR_nothing;
> +
> +  int bitpos;
> +
> +  if ((bitpos = wi::exact_log2 (wcst)) == -1)
> +    return CODE_FOR_nothing;
> +
> +  auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
> +  XEXP (test, 1) = gen_int_mode (pos, mode);
> +  *ptest = test;
> +  *pmode = mode;
> +  return icode;
> +}
> +
>  /* Generate code to compare X with Y so that the condition codes are
>     set and to jump to LABEL if the condition is true.  If X is a
>     constant and Y is not a constant, then the comparison is swapped to
> @@ -4664,11 +4729,13 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>     It will be potentially converted into an unsigned variant based on
>     UNSIGNEDP to select a proper jump instruction.
>     
> -   PROB is the probability of jumping to LABEL.  */
> +   PROB is the probability of jumping to LABEL.  If the comparison is against
> +   zero then VAL contains the expression from which the non-zero RTL is
> +   derived.  */
>  
>  void
>  emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
> -			 machine_mode mode, int unsignedp, rtx label,
> +			 machine_mode mode, int unsignedp, tree val, rtx label,
>                           profile_probability prob)
>  {
>    rtx op0 = x, op1 = y;
> @@ -4693,10 +4760,34 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
>  
>    prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
>  		    &test, &mode);
> -  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
> +
> +  /* Check if we're comparing a truth type with 0, and if so check if
> +     the target supports tbranch.  */
> +  machine_mode tmode = mode;
> +  direct_optab optab;
> +  if (op1 == CONST0_RTX (GET_MODE (op1))
> +      && validate_test_and_branch (val, &test, &tmode,
> +				   &optab) != CODE_FOR_nothing)
> +    {
> +      emit_cmp_and_jump_insn_1 (test, tmode, label, optab, prob, true);
> +      return;
> +    }
> +
> +  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob, false);
>  }
>  
> -
> 
> +/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
> +
> +void
> +emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
> +			 machine_mode mode, int unsignedp, rtx label,
> +			 profile_probability prob)
> +{
> +  emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
> +			   label, prob);
> +}
> +
> +
>  /* Emit a library call comparison between floating point X and Y.
>     COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).  */
>  
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index a6db2342bed6baf13ecbd84112c8432c6972e6fe..3199b05e90d6b9b9c6fb3c0353db3db02321e964 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -220,6 +220,8 @@ OPTAB_D (reload_in_optab, "reload_in$a")
>  OPTAB_D (reload_out_optab, "reload_out$a")
>  
>  OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
> +OPTAB_D (tbranch_eq_optab, "tbranch_eq$a4")
> +OPTAB_D (tbranch_ne_optab, "tbranch_ne$a4")
>  OPTAB_D (addcc_optab, "add$acc")
>  OPTAB_D (negcc_optab, "neg$acc")
>  OPTAB_D (notcc_optab, "not$acc")
> diff --git a/gcc/optabs.h b/gcc/optabs.h
> index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644
> --- a/gcc/optabs.h
> +++ b/gcc/optabs.h
> @@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
>  				     machine_mode, int, rtx,
>  				     profile_probability prob
>  					= profile_probability::uninitialized ());
> +extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
> +				     machine_mode, int, tree, rtx,
> +				     profile_probability prob
> +					= profile_probability::uninitialized ());
>  
>  /* Generate code to indirectly jump to a location given in the rtx LOC.  */
>  extern void emit_indirect_jump (rtx);
> diff --git a/gcc/tree.h b/gcc/tree.h
> index a863d2e50e5ecafa3f5da4dda98d9637261d07a9..abedaa80a3983ebb6f9ac733b2eaa8d039688f0a 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -4726,6 +4726,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
>  extern tree signed_type_for (tree);
>  extern tree unsigned_type_for (tree);
>  extern bool is_truth_type_for (tree, tree);
> +extern bool tree_zero_one_valued_p (tree);
>  extern tree truth_type_for (tree);
>  extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
>  extern tree build_pointer_type (tree);
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations
  2022-12-01 16:29                 ` Tamar Christina
  2022-12-02  7:09                   ` Richard Biener
@ 2022-12-05 12:00                   ` Richard Sandiford
  2022-12-05 13:14                     ` Richard Sandiford
  1 sibling, 1 reply; 33+ messages in thread
From: Richard Sandiford @ 2022-12-05 12:00 UTC (permalink / raw)
  To: Tamar Christina via Gcc-patches
  Cc: Richard Biener, Tamar Christina, Richard Biener, Aldy Hernandez,
	Jeff Law, nd, MacLeod, Andrew

Tamar Christina via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>> > +/* Check to see if the supplied comparison in PTEST can be performed as a
>> > +   bit-test-and-branch instead.  VAL must contain the original tree
>> > +   expression of the non-zero operand which will be used to rewrite the
>> > +   comparison in PTEST.
>> > +
>> > +   Returns TRUE if operation succeeds and returns updated PMODE and
>> PTEST,
>> > +   else FALSE.  */
>> > +
>> > +enum insn_code
>> > +static validate_test_and_branch (tree val, rtx *ptest, machine_mode
>> > +*pmode) {
>> > +  if (!val || TREE_CODE (val) != SSA_NAME)
>> > +    return CODE_FOR_nothing;
>> > +
>> > +  machine_mode mode = TYPE_MODE (TREE_TYPE (val));  rtx test =
>> > + *ptest;
>> > +
>> > +  if (GET_CODE (test) != EQ && GET_CODE (test) != NE)
>> > +    return CODE_FOR_nothing;
>> > +
>> > +  /* If the target supports the testbit comparison directly, great.
>> > + */  auto icode = direct_optab_handler (tbranch_optab, mode);  if
>> > + (icode == CODE_FOR_nothing)
>> > +    return icode;
>> > +
>> > +  if (tree_zero_one_valued_p (val))
>> > +    {
>> > +      auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
>> 
>> Does this work for BYTES_BIG_ENDIAN && !WORDS_BIG_ENDIAN and mode
>> > word_mode?
>> 
>
> It does now. In this particular case all that matters is the bit ordering, so I've changed
> It to BITS_BIG_ENDIAN.
>
> Also during the review of the AArch64 optab Richard Sandiford wanted me to split the
> optabs apart into two.  The reason is that a match_operator still gets the full RTL.
>
> In the case of a tbranch the full RTL has an invalid comparison, so if a target doesn't implement
> the hook correctly this would lead to incorrect code.  We've now moved the operator as part of
> the name itself to avoid this.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	* dojump.cc (do_jump): Pass along value.
> 	(do_jump_by_parts_greater_rtx): Likewise.
> 	(do_jump_by_parts_zero_rtx): Likewise.
> 	(do_jump_by_parts_equality_rtx): Likewise.
> 	(do_compare_rtx_and_jump): Likewise.
> 	(do_compare_and_jump): Likewise.
> 	* dojump.h (do_compare_rtx_and_jump): New.
> 	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
> 	(validate_test_and_branch): New.
> 	(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
> 	supplied then check if it's suitable for tbranch.
> 	* optabs.def (tbranch_eq$a4, tbranch_ne$a4): New.
> 	* doc/md.texi (tbranch_@var{op}@var{mode}4): Document it.
> 	* optabs.h (emit_cmp_and_jump_insns):
> 	* tree.h (tree_zero_one_valued_p): New.

Thanks for doing this.

> --- inline copy of patch ---
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index d0a71ecbb806de3a6564c6ffe973fec5da5c597b..c6c4b13d756de28078a0a779876a00c614246914 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -6964,6 +6964,14 @@ case, you can and should make operand 1's predicate reject some operators
>  in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
>  from the machine description.
>  
> +@cindex @code{tbranch_@var{op}@var{mode}4} instruction pattern
> +@item @samp{tbranch_@var{op}@var{mode}4}
> +Conditional branch instruction combined with a bit test-and-compare
> +instruction. Operand 0 is a comparison operator.  Operand 1 is the
> +operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
> +Operand 3 is the @code{code_label} to jump to. @var{op} is one of @var{eq} or
> +@var{ne}.
> +

The documentation still describes the old interface.  Also, there are only 3
operands now, rather than 4, so the optab name should end with 3.

>  @cindex @code{cbranch@var{mode}4} instruction pattern
>  @item @samp{cbranch@var{mode}4}
>  Conditional branch instruction combined with a compare instruction.
> diff --git a/gcc/dojump.h b/gcc/dojump.h
> index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644
> --- a/gcc/dojump.h
> +++ b/gcc/dojump.h
> @@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
>  extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
>  			 profile_probability);
>  
> +extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
> +				     machine_mode, rtx, rtx_code_label *,
> +				     rtx_code_label *, profile_probability);
> +
>  extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
>  				     machine_mode, rtx, rtx_code_label *,
>  				     rtx_code_label *, profile_probability);
> diff --git a/gcc/dojump.cc b/gcc/dojump.cc
> index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644
> --- a/gcc/dojump.cc
> +++ b/gcc/dojump.cc
> @@ -619,7 +619,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
>  	}
>        do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
>  			       NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
> -			       GET_MODE (temp), NULL_RTX,
> +			       exp, GET_MODE (temp), NULL_RTX,
>  			       if_false_label, if_true_label, prob);
>      }
>  
> @@ -687,7 +687,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
>  
>        /* All but high-order word must be compared as unsigned.  */
>        do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
> -			       word_mode, NULL_RTX, NULL, if_true_label,
> +			       NULL, word_mode, NULL_RTX, NULL, if_true_label,
>  			       prob);
>  
>        /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
> @@ -695,8 +695,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
>  	break;
>  
>        /* Consider lower words only if these are equal.  */
> -      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
> -			       NULL_RTX, NULL, if_false_label,
> +      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
> +			       word_mode, NULL_RTX, NULL, if_false_label,
>  			       prob.invert ());
>      }
>  
> @@ -755,7 +755,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
>  
>    if (part != 0)
>      {
> -      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
> +      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
>  			       NULL_RTX, if_false_label, if_true_label, prob);
>        return;
>      }
> @@ -766,7 +766,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
>  
>    for (i = 0; i < nwords; i++)
>      do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
> -                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
> +			     const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
>  			     if_false_label, NULL, prob);
>  
>    if (if_true_label)
> @@ -809,8 +809,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
>  
>    for (i = 0; i < nwords; i++)
>      do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
> -                             operand_subword_force (op1, i, mode),
> -                             EQ, 0, word_mode, NULL_RTX,
> +			     operand_subword_force (op1, i, mode),
> +			     EQ, 0, NULL, word_mode, NULL_RTX,
>  			     if_false_label, NULL, prob);
>  
>    if (if_true_label)
> @@ -962,6 +962,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>  			 rtx_code_label *if_false_label,
>  			 rtx_code_label *if_true_label,
>  			 profile_probability prob)
> +{
> +  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
> +			  if_false_label, if_true_label, prob);
> +}
> +
> +/* Like do_compare_and_jump but expects the values to compare as two rtx's.
> +   The decision as to signed or unsigned comparison must be made by the caller.
> +
> +   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
> +   compared.  */
> +
> +void
> +do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
> +			 tree val, machine_mode mode, rtx size,
> +			 rtx_code_label *if_false_label,
> +			 rtx_code_label *if_true_label,
> +			 profile_probability prob)
>  {
>    rtx tem;
>    rtx_code_label *dummy_label = NULL;
> @@ -1177,8 +1194,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>  		    }
>  		  else
>  		    dest_label = if_false_label;
> -                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
> -					   size, dest_label, NULL, first_prob);
> +
> +		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
> +					   val, mode, size, dest_label, NULL,
> +					   first_prob);
>  		}
>  	      /* For !and_them we want to split:
>  		 if (x) goto t; // prob;
> @@ -1192,8 +1211,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>                else
>  		{
>  		  profile_probability first_prob = prob.split (cprob);
> -		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
> -					   size, NULL, if_true_label, first_prob);
> +		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
> +					   val, mode, size, NULL,
> +					   if_true_label, first_prob);
>  		  if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
>  		    {
>  		      /* x != y can be split into x unord y || x ltgt y
> @@ -1215,7 +1235,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>  	    }
>  	}
>  
> -      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
> +      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
>  			       if_true_label, prob);
>      }
>  
> @@ -1289,9 +1309,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
>        op1 = new_op1;
>      }
>  
> -  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
> -                           ((mode == BLKmode)
> -                            ? expr_size (treeop0) : NULL_RTX),
> +  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
> +			   ((mode == BLKmode)
> +			    ? expr_size (treeop0) : NULL_RTX),
>  			   if_false_label, if_true_label, prob);
>  }
>  
> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> index 31b15fd3df5fa88119867a23d2abbed139a05115..303b4fd2def9278ddbc3d586103ac8274e73a982 100644
> --- a/gcc/optabs.cc
> +++ b/gcc/optabs.cc
> @@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "libfuncs.h"
>  #include "internal-fn.h"
>  #include "langhooks.h"
> +#include "gimple.h"
> +#include "ssa.h"
>  
>  static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
>  				   machine_mode *);
> @@ -4623,7 +4625,8 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
>  
>  static void
>  emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
> -			  profile_probability prob)
> +			  direct_optab cmp_optab, profile_probability prob,
> +			  bool test_branch)
>  {
>    machine_mode optab_mode;
>    enum mode_class mclass;
> @@ -4632,12 +4635,17 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>  
>    mclass = GET_MODE_CLASS (mode);
>    optab_mode = (mclass == MODE_CC) ? CCmode : mode;
> -  icode = optab_handler (cbranch_optab, optab_mode);
> +  icode = optab_handler (cmp_optab, optab_mode);
>  
>    gcc_assert (icode != CODE_FOR_nothing);
> -  gcc_assert (insn_operand_matches (icode, 0, test));
> -  insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
> -                                          XEXP (test, 1), label));
> +  gcc_assert (test_branch || insn_operand_matches (icode, 0, test));
> +  if (test_branch)
> +    insn = emit_jump_insn (GEN_FCN (icode) (XEXP (test, 0),
> +					    XEXP (test, 1), label));
> +  else
> +    insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
> +					    XEXP (test, 1), label));
> +
>    if (prob.initialized_p ()
>        && profile_status_for_fn (cfun) != PROFILE_ABSENT
>        && insn
> @@ -4647,6 +4655,63 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>      add_reg_br_prob_note (insn, prob);
>  }
>  
> +/* Check to see if the supplied comparison in PTEST can be performed as a
> +   bit-test-and-branch instead.  VAL must contain the original tree
> +   expression of the non-zero operand which will be used to rewrite the
> +   comparison in PTEST.
> +
> +   Returns TRUE if operation succeeds and returns updated PMODE and PTEST,
> +   else FALSE.  */

The function now returns an icode rather than true/false.  I think it'd
also be good to clarify what *PTEST means for the tbranch case.  How about:

/* PTEST points to a comparison that compares its first operand with zero.
   Check to see if it can be performed as a bit-test-and-branch instead.
   On success, return the instruction that performs the bit-and-test-and-branch
   and replace the second operand of *PTEST with the bit number to test.
   On failure, return CODE_FOR_nothing and leave *PTEST unchanged.

   Note that the comparison described by *PTEST should not be taken
   literally after a successful return.  *PTEST is just a convenient
   place to store the two operands of the bit-and-test.

   VAL must contain the original tree expression for the first operand
   of *PTEST.  */

Looks good to me otherwise.

Thanks,
Richard

> +static enum insn_code
> +validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode, optab *res)
> +{
> +  if (!val || TREE_CODE (val) != SSA_NAME)
> +    return CODE_FOR_nothing;
> +
> +  machine_mode mode = TYPE_MODE (TREE_TYPE (val));
> +  rtx test = *ptest;
> +  direct_optab optab;
> +
> +  if (GET_CODE (test) == EQ)
> +    optab = tbranch_eq_optab;
> +  else if (GET_CODE (test) == NE)
> +    optab = tbranch_ne_optab;
> +  else
> +    return CODE_FOR_nothing;
> +
> +  *res = optab;
> +
> +  /* If the target supports the testbit comparison directly, great.  */
> +  auto icode = direct_optab_handler (optab, mode);
> +  if (icode == CODE_FOR_nothing)
> +    return icode;
> +
> +  if (tree_zero_one_valued_p (val))
> +    {
> +      auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
> +      XEXP (test, 1) = gen_int_mode (pos, mode);
> +      *ptest = test;
> +      *pmode = mode;
> +      return icode;
> +    }
> +
> +  wide_int wcst = get_nonzero_bits (val);
> +  if (wcst == -1)
> +    return CODE_FOR_nothing;
> +
> +  int bitpos;
> +
> +  if ((bitpos = wi::exact_log2 (wcst)) == -1)
> +    return CODE_FOR_nothing;
> +
> +  auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
> +  XEXP (test, 1) = gen_int_mode (pos, mode);
> +  *ptest = test;
> +  *pmode = mode;
> +  return icode;
> +}
> +
>  /* Generate code to compare X with Y so that the condition codes are
>     set and to jump to LABEL if the condition is true.  If X is a
>     constant and Y is not a constant, then the comparison is swapped to
> @@ -4664,11 +4729,13 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>     It will be potentially converted into an unsigned variant based on
>     UNSIGNEDP to select a proper jump instruction.
>     
> -   PROB is the probability of jumping to LABEL.  */
> +   PROB is the probability of jumping to LABEL.  If the comparison is against
> +   zero then VAL contains the expression from which the non-zero RTL is
> +   derived.  */
>  
>  void
>  emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
> -			 machine_mode mode, int unsignedp, rtx label,
> +			 machine_mode mode, int unsignedp, tree val, rtx label,
>                           profile_probability prob)
>  {
>    rtx op0 = x, op1 = y;
> @@ -4693,10 +4760,34 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
>  
>    prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
>  		    &test, &mode);
> -  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
> +
> +  /* Check if we're comparing a truth type with 0, and if so check if
> +     the target supports tbranch.  */
> +  machine_mode tmode = mode;
> +  direct_optab optab;
> +  if (op1 == CONST0_RTX (GET_MODE (op1))
> +      && validate_test_and_branch (val, &test, &tmode,
> +				   &optab) != CODE_FOR_nothing)
> +    {
> +      emit_cmp_and_jump_insn_1 (test, tmode, label, optab, prob, true);
> +      return;
> +    }
> +
> +  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob, false);
>  }
>  
> -
>
> +/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
> +
> +void
> +emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
> +			 machine_mode mode, int unsignedp, rtx label,
> +			 profile_probability prob)
> +{
> +  emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
> +			   label, prob);
> +}
> +
> +
>  /* Emit a library call comparison between floating point X and Y.
>     COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).  */
>  
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index a6db2342bed6baf13ecbd84112c8432c6972e6fe..3199b05e90d6b9b9c6fb3c0353db3db02321e964 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -220,6 +220,8 @@ OPTAB_D (reload_in_optab, "reload_in$a")
>  OPTAB_D (reload_out_optab, "reload_out$a")
>  
>  OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
> +OPTAB_D (tbranch_eq_optab, "tbranch_eq$a4")
> +OPTAB_D (tbranch_ne_optab, "tbranch_ne$a4")
>  OPTAB_D (addcc_optab, "add$acc")
>  OPTAB_D (negcc_optab, "neg$acc")
>  OPTAB_D (notcc_optab, "not$acc")
> diff --git a/gcc/optabs.h b/gcc/optabs.h
> index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644
> --- a/gcc/optabs.h
> +++ b/gcc/optabs.h
> @@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
>  				     machine_mode, int, rtx,
>  				     profile_probability prob
>  					= profile_probability::uninitialized ());
> +extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
> +				     machine_mode, int, tree, rtx,
> +				     profile_probability prob
> +					= profile_probability::uninitialized ());
>  
>  /* Generate code to indirectly jump to a location given in the rtx LOC.  */
>  extern void emit_indirect_jump (rtx);
> diff --git a/gcc/tree.h b/gcc/tree.h
> index a863d2e50e5ecafa3f5da4dda98d9637261d07a9..abedaa80a3983ebb6f9ac733b2eaa8d039688f0a 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -4726,6 +4726,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
>  extern tree signed_type_for (tree);
>  extern tree unsigned_type_for (tree);
>  extern bool is_truth_type_for (tree, tree);
> +extern bool tree_zero_one_valued_p (tree);
>  extern tree truth_type_for (tree);
>  extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
>  extern tree build_pointer_type (tree);

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations
  2022-12-05 12:00                   ` Richard Sandiford
@ 2022-12-05 13:14                     ` Richard Sandiford
  0 siblings, 0 replies; 33+ messages in thread
From: Richard Sandiford @ 2022-12-05 13:14 UTC (permalink / raw)
  To: Tamar Christina via Gcc-patches
  Cc: Richard Biener, Tamar Christina, Richard Biener, Aldy Hernandez,
	Jeff Law, nd, MacLeod, Andrew

Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> Tamar Christina via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>>> > +/* Check to see if the supplied comparison in PTEST can be performed as a
>>> > +   bit-test-and-branch instead.  VAL must contain the original tree
>>> > +   expression of the non-zero operand which will be used to rewrite the
>>> > +   comparison in PTEST.
>>> > +
>>> > +   Returns TRUE if operation succeeds and returns updated PMODE and
>>> PTEST,
>>> > +   else FALSE.  */
>>> > +
>>> > +enum insn_code
>>> > +static validate_test_and_branch (tree val, rtx *ptest, machine_mode
>>> > +*pmode) {
>>> > +  if (!val || TREE_CODE (val) != SSA_NAME)
>>> > +    return CODE_FOR_nothing;
>>> > +
>>> > +  machine_mode mode = TYPE_MODE (TREE_TYPE (val));  rtx test =
>>> > + *ptest;
>>> > +
>>> > +  if (GET_CODE (test) != EQ && GET_CODE (test) != NE)
>>> > +    return CODE_FOR_nothing;
>>> > +
>>> > +  /* If the target supports the testbit comparison directly, great.
>>> > + */  auto icode = direct_optab_handler (tbranch_optab, mode);  if
>>> > + (icode == CODE_FOR_nothing)
>>> > +    return icode;
>>> > +
>>> > +  if (tree_zero_one_valued_p (val))
>>> > +    {
>>> > +      auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
>>> 
>>> Does this work for BYTES_BIG_ENDIAN && !WORDS_BIG_ENDIAN and mode
>>> > word_mode?
>>> 
>>
>> It does now. In this particular case all that matters is the bit ordering, so I've changed
>> It to BITS_BIG_ENDIAN.
>>
>> Also during the review of the AArch64 optab Richard Sandiford wanted me to split the
>> optabs apart into two.  The reason is that a match_operator still gets the full RTL.
>>
>> In the case of a tbranch the full RTL has an invalid comparison, so if a target doesn't implement
>> the hook correctly this would lead to incorrect code.  We've now moved the operator as part of
>> the name itself to avoid this.
>>
>> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>>
>> Ok for master?
>>
>> Thanks,
>> Tamar
>>
>> gcc/ChangeLog:
>>
>> 	* dojump.cc (do_jump): Pass along value.
>> 	(do_jump_by_parts_greater_rtx): Likewise.
>> 	(do_jump_by_parts_zero_rtx): Likewise.
>> 	(do_jump_by_parts_equality_rtx): Likewise.
>> 	(do_compare_rtx_and_jump): Likewise.
>> 	(do_compare_and_jump): Likewise.
>> 	* dojump.h (do_compare_rtx_and_jump): New.
>> 	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
>> 	(validate_test_and_branch): New.
>> 	(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
>> 	supplied then check if it's suitable for tbranch.
>> 	* optabs.def (tbranch_eq$a4, tbranch_ne$a4): New.
>> 	* doc/md.texi (tbranch_@var{op}@var{mode}4): Document it.
>> 	* optabs.h (emit_cmp_and_jump_insns):
>> 	* tree.h (tree_zero_one_valued_p): New.
>
> Thanks for doing this.
>
>> --- inline copy of patch ---
>>
>> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
>> index d0a71ecbb806de3a6564c6ffe973fec5da5c597b..c6c4b13d756de28078a0a779876a00c614246914 100644
>> --- a/gcc/doc/md.texi
>> +++ b/gcc/doc/md.texi
>> @@ -6964,6 +6964,14 @@ case, you can and should make operand 1's predicate reject some operators
>>  in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
>>  from the machine description.
>>  
>> +@cindex @code{tbranch_@var{op}@var{mode}4} instruction pattern
>> +@item @samp{tbranch_@var{op}@var{mode}4}
>> +Conditional branch instruction combined with a bit test-and-compare
>> +instruction. Operand 0 is a comparison operator.  Operand 1 is the
>> +operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
>> +Operand 3 is the @code{code_label} to jump to. @var{op} is one of @var{eq} or
>> +@var{ne}.
>> +
>
> The documentation still describes the old interface.  Also, there are only 3
> operands now, rather than 4, so the optab name should end with 3.
>
>>  @cindex @code{cbranch@var{mode}4} instruction pattern
>>  @item @samp{cbranch@var{mode}4}
>>  Conditional branch instruction combined with a compare instruction.
>> diff --git a/gcc/dojump.h b/gcc/dojump.h
>> index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644
>> --- a/gcc/dojump.h
>> +++ b/gcc/dojump.h
>> @@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
>>  extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
>>  			 profile_probability);
>>  
>> +extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
>> +				     machine_mode, rtx, rtx_code_label *,
>> +				     rtx_code_label *, profile_probability);
>> +
>>  extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
>>  				     machine_mode, rtx, rtx_code_label *,
>>  				     rtx_code_label *, profile_probability);
>> diff --git a/gcc/dojump.cc b/gcc/dojump.cc
>> index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644
>> --- a/gcc/dojump.cc
>> +++ b/gcc/dojump.cc
>> @@ -619,7 +619,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
>>  	}
>>        do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
>>  			       NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
>> -			       GET_MODE (temp), NULL_RTX,
>> +			       exp, GET_MODE (temp), NULL_RTX,
>>  			       if_false_label, if_true_label, prob);
>>      }
>>  
>> @@ -687,7 +687,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
>>  
>>        /* All but high-order word must be compared as unsigned.  */
>>        do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
>> -			       word_mode, NULL_RTX, NULL, if_true_label,
>> +			       NULL, word_mode, NULL_RTX, NULL, if_true_label,
>>  			       prob);
>>  
>>        /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
>> @@ -695,8 +695,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
>>  	break;
>>  
>>        /* Consider lower words only if these are equal.  */
>> -      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
>> -			       NULL_RTX, NULL, if_false_label,
>> +      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
>> +			       word_mode, NULL_RTX, NULL, if_false_label,
>>  			       prob.invert ());
>>      }
>>  
>> @@ -755,7 +755,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
>>  
>>    if (part != 0)
>>      {
>> -      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
>> +      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
>>  			       NULL_RTX, if_false_label, if_true_label, prob);
>>        return;
>>      }
>> @@ -766,7 +766,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
>>  
>>    for (i = 0; i < nwords; i++)
>>      do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
>> -                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
>> +			     const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
>>  			     if_false_label, NULL, prob);
>>  
>>    if (if_true_label)
>> @@ -809,8 +809,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
>>  
>>    for (i = 0; i < nwords; i++)
>>      do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
>> -                             operand_subword_force (op1, i, mode),
>> -                             EQ, 0, word_mode, NULL_RTX,
>> +			     operand_subword_force (op1, i, mode),
>> +			     EQ, 0, NULL, word_mode, NULL_RTX,
>>  			     if_false_label, NULL, prob);
>>  
>>    if (if_true_label)
>> @@ -962,6 +962,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>>  			 rtx_code_label *if_false_label,
>>  			 rtx_code_label *if_true_label,
>>  			 profile_probability prob)
>> +{
>> +  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
>> +			  if_false_label, if_true_label, prob);
>> +}
>> +
>> +/* Like do_compare_and_jump but expects the values to compare as two rtx's.
>> +   The decision as to signed or unsigned comparison must be made by the caller.
>> +
>> +   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
>> +   compared.  */
>> +
>> +void
>> +do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>> +			 tree val, machine_mode mode, rtx size,
>> +			 rtx_code_label *if_false_label,
>> +			 rtx_code_label *if_true_label,
>> +			 profile_probability prob)
>>  {
>>    rtx tem;
>>    rtx_code_label *dummy_label = NULL;
>> @@ -1177,8 +1194,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>>  		    }
>>  		  else
>>  		    dest_label = if_false_label;
>> -                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
>> -					   size, dest_label, NULL, first_prob);
>> +
>> +		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
>> +					   val, mode, size, dest_label, NULL,
>> +					   first_prob);
>>  		}
>>  	      /* For !and_them we want to split:
>>  		 if (x) goto t; // prob;
>> @@ -1192,8 +1211,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>>                else
>>  		{
>>  		  profile_probability first_prob = prob.split (cprob);
>> -		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
>> -					   size, NULL, if_true_label, first_prob);
>> +		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
>> +					   val, mode, size, NULL,
>> +					   if_true_label, first_prob);
>>  		  if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
>>  		    {
>>  		      /* x != y can be split into x unord y || x ltgt y
>> @@ -1215,7 +1235,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>>  	    }
>>  	}
>>  
>> -      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
>> +      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
>>  			       if_true_label, prob);
>>      }
>>  
>> @@ -1289,9 +1309,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
>>        op1 = new_op1;
>>      }
>>  
>> -  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
>> -                           ((mode == BLKmode)
>> -                            ? expr_size (treeop0) : NULL_RTX),
>> +  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
>> +			   ((mode == BLKmode)
>> +			    ? expr_size (treeop0) : NULL_RTX),
>>  			   if_false_label, if_true_label, prob);
>>  }
>>  
>> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
>> index 31b15fd3df5fa88119867a23d2abbed139a05115..303b4fd2def9278ddbc3d586103ac8274e73a982 100644
>> --- a/gcc/optabs.cc
>> +++ b/gcc/optabs.cc
>> @@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "libfuncs.h"
>>  #include "internal-fn.h"
>>  #include "langhooks.h"
>> +#include "gimple.h"
>> +#include "ssa.h"
>>  
>>  static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
>>  				   machine_mode *);
>> @@ -4623,7 +4625,8 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
>>  
>>  static void
>>  emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>> -			  profile_probability prob)
>> +			  direct_optab cmp_optab, profile_probability prob,
>> +			  bool test_branch)
>>  {
>>    machine_mode optab_mode;
>>    enum mode_class mclass;
>> @@ -4632,12 +4635,17 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>>  
>>    mclass = GET_MODE_CLASS (mode);
>>    optab_mode = (mclass == MODE_CC) ? CCmode : mode;
>> -  icode = optab_handler (cbranch_optab, optab_mode);
>> +  icode = optab_handler (cmp_optab, optab_mode);
>>  
>>    gcc_assert (icode != CODE_FOR_nothing);
>> -  gcc_assert (insn_operand_matches (icode, 0, test));
>> -  insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
>> -                                          XEXP (test, 1), label));
>> +  gcc_assert (test_branch || insn_operand_matches (icode, 0, test));
>> +  if (test_branch)
>> +    insn = emit_jump_insn (GEN_FCN (icode) (XEXP (test, 0),
>> +					    XEXP (test, 1), label));
>> +  else
>> +    insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
>> +					    XEXP (test, 1), label));
>> +
>>    if (prob.initialized_p ()
>>        && profile_status_for_fn (cfun) != PROFILE_ABSENT
>>        && insn
>> @@ -4647,6 +4655,63 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>>      add_reg_br_prob_note (insn, prob);
>>  }
>>  
>> +/* Check to see if the supplied comparison in PTEST can be performed as a
>> +   bit-test-and-branch instead.  VAL must contain the original tree
>> +   expression of the non-zero operand which will be used to rewrite the
>> +   comparison in PTEST.
>> +
>> +   Returns TRUE if operation succeeds and returns updated PMODE and PTEST,
>> +   else FALSE.  */
>
> The function now returns an icode rather than true/false.  I think it'd
> also be good to clarify what *PTEST means for the tbranch case.  How about:
>
> /* PTEST points to a comparison that compares its first operand with zero.
>    Check to see if it can be performed as a bit-test-and-branch instead.
>    On success, return the instruction that performs the bit-and-test-and-branch

(bit-test-and-branch)

>    and replace the second operand of *PTEST with the bit number to test.
>    On failure, return CODE_FOR_nothing and leave *PTEST unchanged.
>
>    Note that the comparison described by *PTEST should not be taken
>    literally after a successful return.  *PTEST is just a convenient
>    place to store the two operands of the bit-and-test.
>
>    VAL must contain the original tree expression for the first operand
>    of *PTEST.  */
>
> Looks good to me otherwise.
>
> Thanks,
> Richard
>
>> +static enum insn_code
>> +validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode, optab *res)
>> +{
>> +  if (!val || TREE_CODE (val) != SSA_NAME)
>> +    return CODE_FOR_nothing;
>> +
>> +  machine_mode mode = TYPE_MODE (TREE_TYPE (val));
>> +  rtx test = *ptest;
>> +  direct_optab optab;
>> +
>> +  if (GET_CODE (test) == EQ)
>> +    optab = tbranch_eq_optab;
>> +  else if (GET_CODE (test) == NE)
>> +    optab = tbranch_ne_optab;
>> +  else
>> +    return CODE_FOR_nothing;
>> +
>> +  *res = optab;
>> +
>> +  /* If the target supports the testbit comparison directly, great.  */
>> +  auto icode = direct_optab_handler (optab, mode);
>> +  if (icode == CODE_FOR_nothing)
>> +    return icode;
>> +
>> +  if (tree_zero_one_valued_p (val))
>> +    {
>> +      auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
>> +      XEXP (test, 1) = gen_int_mode (pos, mode);
>> +      *ptest = test;
>> +      *pmode = mode;
>> +      return icode;
>> +    }
>> +
>> +  wide_int wcst = get_nonzero_bits (val);
>> +  if (wcst == -1)
>> +    return CODE_FOR_nothing;
>> +
>> +  int bitpos;
>> +
>> +  if ((bitpos = wi::exact_log2 (wcst)) == -1)
>> +    return CODE_FOR_nothing;
>> +
>> +  auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
>> +  XEXP (test, 1) = gen_int_mode (pos, mode);
>> +  *ptest = test;
>> +  *pmode = mode;
>> +  return icode;
>> +}
>> +
>>  /* Generate code to compare X with Y so that the condition codes are
>>     set and to jump to LABEL if the condition is true.  If X is a
>>     constant and Y is not a constant, then the comparison is swapped to
>> @@ -4664,11 +4729,13 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>>     It will be potentially converted into an unsigned variant based on
>>     UNSIGNEDP to select a proper jump instruction.
>>     
>> -   PROB is the probability of jumping to LABEL.  */
>> +   PROB is the probability of jumping to LABEL.  If the comparison is against
>> +   zero then VAL contains the expression from which the non-zero RTL is
>> +   derived.  */
>>  
>>  void
>>  emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
>> -			 machine_mode mode, int unsignedp, rtx label,
>> +			 machine_mode mode, int unsignedp, tree val, rtx label,
>>                           profile_probability prob)
>>  {
>>    rtx op0 = x, op1 = y;
>> @@ -4693,10 +4760,34 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
>>  
>>    prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
>>  		    &test, &mode);
>> -  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
>> +
>> +  /* Check if we're comparing a truth type with 0, and if so check if
>> +     the target supports tbranch.  */
>> +  machine_mode tmode = mode;
>> +  direct_optab optab;
>> +  if (op1 == CONST0_RTX (GET_MODE (op1))
>> +      && validate_test_and_branch (val, &test, &tmode,
>> +				   &optab) != CODE_FOR_nothing)
>> +    {
>> +      emit_cmp_and_jump_insn_1 (test, tmode, label, optab, prob, true);
>> +      return;
>> +    }
>> +
>> +  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob, false);
>>  }
>>  
>> -
>>
>> +/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
>> +
>> +void
>> +emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
>> +			 machine_mode mode, int unsignedp, rtx label,
>> +			 profile_probability prob)
>> +{
>> +  emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
>> +			   label, prob);
>> +}
>> +
>> +
>>  /* Emit a library call comparison between floating point X and Y.
>>     COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).  */
>>  
>> diff --git a/gcc/optabs.def b/gcc/optabs.def
>> index a6db2342bed6baf13ecbd84112c8432c6972e6fe..3199b05e90d6b9b9c6fb3c0353db3db02321e964 100644
>> --- a/gcc/optabs.def
>> +++ b/gcc/optabs.def
>> @@ -220,6 +220,8 @@ OPTAB_D (reload_in_optab, "reload_in$a")
>>  OPTAB_D (reload_out_optab, "reload_out$a")
>>  
>>  OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
>> +OPTAB_D (tbranch_eq_optab, "tbranch_eq$a4")
>> +OPTAB_D (tbranch_ne_optab, "tbranch_ne$a4")
>>  OPTAB_D (addcc_optab, "add$acc")
>>  OPTAB_D (negcc_optab, "neg$acc")
>>  OPTAB_D (notcc_optab, "not$acc")
>> diff --git a/gcc/optabs.h b/gcc/optabs.h
>> index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644
>> --- a/gcc/optabs.h
>> +++ b/gcc/optabs.h
>> @@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
>>  				     machine_mode, int, rtx,
>>  				     profile_probability prob
>>  					= profile_probability::uninitialized ());
>> +extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
>> +				     machine_mode, int, tree, rtx,
>> +				     profile_probability prob
>> +					= profile_probability::uninitialized ());
>>  
>>  /* Generate code to indirectly jump to a location given in the rtx LOC.  */
>>  extern void emit_indirect_jump (rtx);
>> diff --git a/gcc/tree.h b/gcc/tree.h
>> index a863d2e50e5ecafa3f5da4dda98d9637261d07a9..abedaa80a3983ebb6f9ac733b2eaa8d039688f0a 100644
>> --- a/gcc/tree.h
>> +++ b/gcc/tree.h
>> @@ -4726,6 +4726,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
>>  extern tree signed_type_for (tree);
>>  extern tree unsigned_type_for (tree);
>>  extern bool is_truth_type_for (tree, tree);
>> +extern bool tree_zero_one_valued_p (tree);
>>  extern tree truth_type_for (tree);
>>  extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
>>  extern tree build_pointer_type (tree);

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/2]AArch64 Support new tbranch optab.
  2022-12-01 16:44                         ` Tamar Christina
@ 2022-12-05 14:06                           ` Richard Sandiford
  0 siblings, 0 replies; 33+ messages in thread
From: Richard Sandiford @ 2022-12-05 14:06 UTC (permalink / raw)
  To: Tamar Christina; +Cc: gcc-patches, Richard Earnshaw, nd, Marcus Shawcroft

Tamar Christina <Tamar.Christina@arm.com> writes:
> Hi,
>
> I hadn't received any reply so I had implemented various ways to do this (about 8 of them in fact).
>
> The conclusion is that no, we cannot emit one big RTL for the final instruction immediately.
> The reason that all comparisons in the AArch64 backend expand to separate CC compares, and
> separate testing of the operands is for ifcvt.
>
> The separate CC compare is needed so ifcvt can produce csel, cset etc from the compares.  Unlike
> say combine, ifcvt can not do recog on a parallel with a clobber.  Should we emit the instruction
> directly then ifcvt will not be able to say, make a csel, because we have no patterns which handle
> zero_extract and compare. (unlike combine ifcvt cannot transform the extract into an AND).
>
> While you could provide various patterns for this (and I did try) you end up with broken patterns
> because you can't add the clobber to the CC register.  If you do, ifcvt recog fails.
>
> i.e.
>
> int
> f1 (int x)
> {
>   if (x & 1)
>     return 1;
>   return x;
> }
>
> We lose csel here.
>
> Secondly the reason the compare with an explicit CC mode is needed is so that ifcvt can transform
> the operation into a version that doesn't require the flags to be set.  But it only does so if it know
> the explicit usage of the CC reg.
>
> For instance 
>
> int
> foo (int a, int b)
> {
>   return ((a & (1 << 25)) ? 5 : 4);
> }
>
> Doesn't require a comparison, the optimal form is:
>
> foo(int, int):
>         ubfx    x0, x0, 25, 1
>         add     w0, w0, 4
>         ret
>
> and no compare is actually needed.  If you represent the instruction using an ANDS instead of a zero_extract
> then you get close, but you end up with an ands followed by an add, which is a slower operation.
>
> These two reasons are the main reasons why all comparisons in AArch64 expand the way they do, so tbranch
> Shouldn't do anything differently here.

Thanks for the (useful) investigation.  Makes sense.

> Additionally the reason for the optab was to pass range information
> to the backend during expansion.

Yeah.  But I think the fundamental reason that AArch64 defines the
optab is still that it has native support for the associated operation
(which is a good thing, an honest reason).  The fact that we split it
apart for if-conversion---in a different form from normal comparisons---
is an implementation detail.  So it still seems like a proper optab,
rather than a crutch to convey tree info.

> In this version however I have represented the expand using an ANDS instead.  This allows us not to regress
> on -O0 as the previous version did.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Note that this patch relies on https://patchwork.sourceware.org/project/gcc/patch/Y1+4qItMrQHbdqqD@arm.com/ 
> which has yet to be reviewed but which cleans up extensions so they can be used like this.
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	* config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
> 	(*tb<optab><ALLI:mode><GPI:mode>1): ... this.
> 	(tbranch_<code><mode>4): New.
> 	(zero_extend<SI_ONLY:mode><SD_HSDI:mode>2,
> 	zero_extend<HI_ONLY:mode><SD_HSDI:mode>2,
> 	zero_extend<QI_ONLY:mode><SD_HSDI:mode>2): Make dynamic calls with @.
> 	* config/aarch64/iterators.md(ZEROM, zerom): New.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/aarch64/tbz_1.c: New test.
>
> --- inline copy of patch ---
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 4c181a96e555c2a58c59fc991000b2a2fa9bd244..7ee1d01e050004e42cd2d0049f0200da71d918bb 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -946,12 +946,33 @@ (define_insn "*cb<optab><mode>1"
>  		      (const_int 1)))]
>  )
>  
> -(define_insn "*tb<optab><mode>1"
> +(define_expand "tbranch_<code><mode>4"
>    [(set (pc) (if_then_else
> -	      (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
> -				    (const_int 1)
> -				    (match_operand 1
> -				      "aarch64_simd_shift_imm_<mode>" "n"))
> +              (EQL (match_operand:ALLI 0 "register_operand")
> +                   (match_operand 1 "aarch64_simd_shift_imm_<mode>"))
> +              (label_ref (match_operand 2 ""))
> +              (pc)))]
> +  ""
> +{
> +  rtx bitvalue = gen_reg_rtx (<ZEROM>mode);
> +  rtx reg = gen_reg_rtx (<ZEROM>mode);
> +  if (<MODE>mode == <ZEROM>mode)
> +    reg = operands[0];
> +  else
> +    emit_insn (gen_zero_extend2 (<MODE>mode, <ZEROM>mode, reg, operands[0]));

I think the last five lines should just be:

  rtx reg = gen_lowpart (<ZEROM>mode, operands[0]);

using paradoxical subregs for the QI and HI cases.  Using subregs should
generate better code, since the temporary runs the risk of having the
same value live in two different pseudos at the same time (one pseudo
with the original mode, one pseudo with the extended mode).

OK with that change and without the changes to the zero_extend pattern names.

Thanks,
Richard

> +  rtx val = GEN_INT (1UL << UINTVAL (operands[1]));
> +  emit_insn (gen_and<zerom>3 (bitvalue, reg, val));
> +  operands[1] = const0_rtx;
> +  operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue,
> +					 operands[1]);
> +})
> +
> +(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
> +  [(set (pc) (if_then_else
> +	      (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
> +				     (const_int 1)
> +				     (match_operand 1
> +				       "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
>  		   (const_int 0))
>  	     (label_ref (match_operand 2 "" ""))
>  	     (pc)))
> @@ -962,15 +983,15 @@ (define_insn "*tb<optab><mode>1"
>        {
>  	if (get_attr_far_branch (insn) == 1)
>  	  return aarch64_gen_far_branch (operands, 2, "Ltb",
> -					 "<inv_tb>\\t%<w>0, %1, ");
> +					 "<inv_tb>\\t%<ALLI:w>0, %1, ");
>  	else
>  	  {
>  	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
> -	    return "tst\t%<w>0, %1\;<bcond>\t%l2";
> +	    return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
>  	  }
>        }
>      else
> -      return "<tbz>\t%<w>0, %1, %l2";
> +      return "<tbz>\t%<ALLI:w>0, %1, %l2";
>    }
>    [(set_attr "type" "branch")
>     (set (attr "length")
> @@ -1962,7 +1983,7 @@ (define_insn "extend<ALLX:mode><SD_HSDI:mode>2"
>     (set_attr "arch" "*,*,fp")]
>  )
>  
> -(define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
> +(define_insn "@zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
>    [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
>          (zero_extend:SD_HSDI
>  	  (match_operand:SI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
> @@ -1978,7 +1999,7 @@ (define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
>     (set_attr "arch" "*,*,fp,fp,fp,fp")]
>  )
>  
> -(define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
> +(define_insn "@zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
>    [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
>          (zero_extend:SD_HSDI
>  	  (match_operand:HI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
> @@ -1994,7 +2015,7 @@ (define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
>     (set_attr "arch" "*,*,fp16,fp,fp,fp16")]
>  )
>  
> -(define_insn "zero_extend<QI_ONLY:mode><SD_HSDI:mode>2"
> +(define_insn "@zero_extend<QI_ONLY:mode><SD_HSDI:mode>2"
>    [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,r,w")
>          (zero_extend:SD_HSDI
>  	  (match_operand:QI_ONLY 1 "nonimmediate_operand" "r,m,m,w,w")))]
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index df72c079f218db9727a96924cab496e91ce6df59..816e44753fb9f6245f3abdb6d3e689a36986ac99 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1107,6 +1107,8 @@ (define_mode_attr s [(HF "h") (SF "s") (DF "d") (SI "s") (DI "d")])
>  ;; Give the length suffix letter for a sign- or zero-extension.
>  (define_mode_attr size [(QI "b") (HI "h") (SI "w")])
>  (define_mode_attr sizel [(QI "b") (HI "h") (SI "")])
> +(define_mode_attr ZEROM [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
> +(define_mode_attr zerom [(QI "si") (HI "si") (SI "si") (DI "di")])
>  
>  ;; Give the number of bits in the mode
>  (define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])
> diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..39deb58e278e2180ab270b5a999cac62cb17c682
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> @@ -0,0 +1,95 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
> +
> +#include <stdbool.h>
> +
> +void h(void);
> +
> +/*
> +** g1:
> +** 	tbnz	w[0-9]+, #?0, .L([0-9]+)
> +** 	ret
> +**	...
> +*/
> +void g1(bool x)
> +{
> +  if (__builtin_expect (x, 0))
> +    h ();
> +}
> +
> +/*
> +** g2:
> +** 	tbz	w[0-9]+, #?0, .L([0-9]+)
> +** 	b	h
> +**	...
> +*/
> +void g2(bool x)
> +{
> +  if (__builtin_expect (x, 1))
> +    h ();
> +}
> +
> +/*
> +** g3_ge:
> +** 	tbnz	w[0-9]+, #?31, .L[0-9]+
> +** 	b	h
> +**	...
> +*/
> +void g3_ge(int x)
> +{
> +  if (__builtin_expect (x >= 0, 1))
> +    h ();
> +}
> +
> +/*
> +** g3_gt:
> +** 	cmp	w[0-9]+, 0
> +** 	ble	.L[0-9]+
> +** 	b	h
> +**	...
> +*/
> +void g3_gt(int x)
> +{
> +  if (__builtin_expect (x > 0, 1))
> +    h ();
> +}
> +
> +/*
> +** g3_lt:
> +** 	tbz	w[0-9]+, #?31, .L[0-9]+
> +** 	b	h
> +**	...
> +*/
> +void g3_lt(int x)
> +{
> +  if (__builtin_expect (x < 0, 1))
> +    h ();
> +}
> +
> +/*
> +** g3_le:
> +** 	cmp	w[0-9]+, 0
> +** 	bgt	.L[0-9]+
> +** 	b	h
> +**	...
> +*/
> +void g3_le(int x)
> +{
> +  if (__builtin_expect (x <= 0, 1))
> +    h ();
> +}
> +
> +/*
> +** g5:
> +** 	mov	w[0-9]+, 65279
> +** 	tst	w[0-9]+, w[0-9]+
> +** 	beq	.L[0-9]+
> +** 	b	h
> +**	...
> +*/ 
> +void g5(int x)
> +{
> +  if (__builtin_expect (x & 0xfeff, 1))
> +    h ();
> +} 

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2022-12-05 14:06 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-31 11:53 [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations Tamar Christina
2022-10-31 11:53 ` [PATCH 2/2]AArch64 Support new tbranch optab Tamar Christina
2022-11-14 15:58   ` Tamar Christina
2022-11-15 10:36     ` Richard Sandiford
2022-11-15 10:42       ` Tamar Christina
2022-11-15 10:50         ` Richard Sandiford
2022-11-15 11:00           ` Tamar Christina
2022-11-15 11:14             ` Richard Sandiford
2022-11-15 11:23               ` Tamar Christina
2022-11-15 11:33                 ` Richard Sandiford
2022-11-15 11:39                   ` Tamar Christina
2022-11-22 13:48                   ` Tamar Christina
2022-11-22 14:00                     ` Richard Sandiford
2022-11-24 12:18                       ` Tamar Christina
2022-12-01 16:44                         ` Tamar Christina
2022-12-05 14:06                           ` Richard Sandiford
2022-10-31 11:54 ` [PATCH]AArch64 Extend umov and sbfx patterns Tamar Christina
2022-10-31 12:26   ` Richard Sandiford
2022-11-11 14:42     ` Tamar Christina
2022-11-15 11:10       ` Richard Sandiford
2022-10-31 21:16 ` [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations Jeff Law
2022-11-01 15:53   ` Tamar Christina
2022-11-01 17:00     ` Jeff Law
2022-11-02  9:55       ` Tamar Christina
2022-11-02 11:08         ` Aldy Hernandez
2022-11-05 14:23           ` Richard Biener
2022-11-14 15:56             ` Tamar Christina
2022-11-14 16:22               ` Jeff Law
2022-11-15  7:33               ` Richard Biener
2022-12-01 16:29                 ` Tamar Christina
2022-12-02  7:09                   ` Richard Biener
2022-12-05 12:00                   ` Richard Sandiford
2022-12-05 13:14                     ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).