> -----Original Message----- > From: Richard Biener > Sent: Saturday, November 5, 2022 2:23 PM > To: Aldy Hernandez > Cc: Tamar Christina ; Jeff Law > ; gcc-patches@gcc.gnu.org; nd ; > MacLeod, Andrew > Subject: Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support > for bit-test-and-branch operations > > On Wed, 2 Nov 2022, Aldy Hernandez wrote: > > > On Wed, Nov 2, 2022 at 10:55 AM Tamar Christina > wrote: > > > > > > Hi Aldy, > > > > > > I'm trying to use Ranger to determine if a range of an expression is a > single bit. > > > > > > If possible in case of a mask then also the position of the bit that's being > checked by the mask (or the mask itself). > > > > Just instantiate a ranger, and ask for the range of an SSA name (or an > > arbitrary tree expression) at a particular gimple statement (or an > > edge): > > > > gimple_ranger ranger; > > int_range_max r; > > if (ranger.range_of_expr (r, , )) { > > // do stuff with range "r" > > if (r.singleton_p ()) { > > wide_int num = r.lower_bound (); > > // Check the bits in NUM, etc... > > } > > } > > > > You can see the full ranger API in gimple-range.h. > > > > Note that instantiating a new ranger is relatively lightweight, but > > it's not free. So unless you're calling range_of_expr sporadically, > > you probably want to have one instance for your pass. You can pass > > around the gimple_ranger around your pass. Another way of doing this > > is calling enable_rager() at pass start, and then doing: > > > > get_range_query (cfun)->range_of_expr (r, , )); > > > > gimple-loop-versioning.cc has an example of using enable_ranger / > > disable_ranger. > > > > I am assuming you are interested in ranges for integers / pointers. > > Otherwise (floats, etc) you'd have to use "Value_Range" instead of > > int_range_max. I can give you examples on that if necessary. > > > > Let me know if that helps. It Did! I ended up going with Richi's suggestion, but the snippet was very helpful for a different range based patch I'm trying a prototype for. Many thanks for the example! > > I think you maybe just want get_nonzero_bits? Ah, looks like that uses range info as well. Thanks! Ok for master? Thanks, Tamar gcc/ChangeLog: * dojump.cc (do_jump): Pass along value. (do_jump_by_parts_greater_rtx): Likewise. (do_jump_by_parts_zero_rtx): Likewise. (do_jump_by_parts_equality_rtx): Likewise. (do_compare_rtx_and_jump): Likewise. (do_compare_and_jump): Likewise. * dojump.h (do_compare_rtx_and_jump): New. * optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check. (validate_test_and_branch): New. (emit_cmp_and_jump_insns): Optiobally take a value, and when value is supplied then check if it's suitable for tbranch. * optabs.def (tbranch$a4): New. * doc/md.texi (tbranch@var{mode}4): Document it. * optabs.h (emit_cmp_and_jump_insns): * tree.h (tree_zero_one_valued_p): New. --- inline copy of patch --- diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 34825549ed4e315b07d36dc3d63bae0cc0a3932d..342e8c4c670de251a35689d1805acceb72a8f6bf 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -6958,6 +6958,13 @@ case, you can and should make operand 1's predicate reject some operators in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether from the machine description. +@cindex @code{tbranch@var{mode}4} instruction pattern +@item @samp{tbranch@var{mode}4} +Conditional branch instruction combined with a bit test-and-compare +instruction. Operand 0 is a comparison operator. Operand 1 is the +operand of the comparison. Operand 2 is the bit position of Operand 1 to test. +Operand 3 is the @code{code_label} to jump to. + @cindex @code{cbranch@var{mode}4} instruction pattern @item @samp{cbranch@var{mode}4} Conditional branch instruction combined with a compare instruction. diff --git a/gcc/dojump.h b/gcc/dojump.h index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644 --- a/gcc/dojump.h +++ b/gcc/dojump.h @@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label, extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *, profile_probability); +extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree, + machine_mode, rtx, rtx_code_label *, + rtx_code_label *, profile_probability); + extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, machine_mode, rtx, rtx_code_label *, rtx_code_label *, profile_probability); diff --git a/gcc/dojump.cc b/gcc/dojump.cc index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644 --- a/gcc/dojump.cc +++ b/gcc/dojump.cc @@ -619,7 +619,7 @@ do_jump (tree exp, rtx_code_label *if_false_label, } do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)), NE, TYPE_UNSIGNED (TREE_TYPE (exp)), - GET_MODE (temp), NULL_RTX, + exp, GET_MODE (temp), NULL_RTX, if_false_label, if_true_label, prob); } @@ -687,7 +687,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0, /* All but high-order word must be compared as unsigned. */ do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0), - word_mode, NULL_RTX, NULL, if_true_label, + NULL, word_mode, NULL_RTX, NULL, if_true_label, prob); /* Emit only one comparison for 0. Do not emit the last cond jump. */ @@ -695,8 +695,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0, break; /* Consider lower words only if these are equal. */ - do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode, - NULL_RTX, NULL, if_false_label, + do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL, + word_mode, NULL_RTX, NULL, if_false_label, prob.invert ()); } @@ -755,7 +755,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0, if (part != 0) { - do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode, + do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX, if_false_label, if_true_label, prob); return; } @@ -766,7 +766,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0, for (i = 0; i < nwords; i++) do_compare_rtx_and_jump (operand_subword_force (op0, i, mode), - const0_rtx, EQ, 1, word_mode, NULL_RTX, + const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX, if_false_label, NULL, prob); if (if_true_label) @@ -809,8 +809,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1, for (i = 0; i < nwords; i++) do_compare_rtx_and_jump (operand_subword_force (op0, i, mode), - operand_subword_force (op1, i, mode), - EQ, 0, word_mode, NULL_RTX, + operand_subword_force (op1, i, mode), + EQ, 0, NULL, word_mode, NULL_RTX, if_false_label, NULL, prob); if (if_true_label) @@ -962,6 +962,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, rtx_code_label *if_false_label, rtx_code_label *if_true_label, profile_probability prob) +{ + do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size, + if_false_label, if_true_label, prob); +} + +/* Like do_compare_and_jump but expects the values to compare as two rtx's. + The decision as to signed or unsigned comparison must be made by the caller. + + If MODE is BLKmode, SIZE is an RTX giving the size of the objects being + compared. */ + +void +do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, + tree val, machine_mode mode, rtx size, + rtx_code_label *if_false_label, + rtx_code_label *if_true_label, + profile_probability prob) { rtx tem; rtx_code_label *dummy_label = NULL; @@ -1177,8 +1194,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, } else dest_label = if_false_label; - do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode, - size, dest_label, NULL, first_prob); + + do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, + val, mode, size, dest_label, NULL, + first_prob); } /* For !and_them we want to split: if (x) goto t; // prob; @@ -1192,8 +1211,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, else { profile_probability first_prob = prob.split (cprob); - do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode, - size, NULL, if_true_label, first_prob); + do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, + val, mode, size, NULL, + if_true_label, first_prob); if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump)) { /* x != y can be split into x unord y || x ltgt y @@ -1215,7 +1235,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, } } - emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, + emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val, if_true_label, prob); } @@ -1289,9 +1309,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code, op1 = new_op1; } - do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, - ((mode == BLKmode) - ? expr_size (treeop0) : NULL_RTX), + do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode, + ((mode == BLKmode) + ? expr_size (treeop0) : NULL_RTX), if_false_label, if_true_label, prob); } diff --git a/gcc/optabs.cc b/gcc/optabs.cc index f338df410265dfe55b6896160090a453cc6a28d9..0f662ebdb818d7538bdd13fb02bcf8bcf1dbab64 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -46,6 +46,8 @@ along with GCC; see the file COPYING3. If not see #include "libfuncs.h" #include "internal-fn.h" #include "langhooks.h" +#include "gimple.h" +#include "ssa.h" static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *, machine_mode *); @@ -4620,7 +4622,7 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode, static void emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label, - profile_probability prob) + direct_optab cmp_optab, profile_probability prob) { machine_mode optab_mode; enum mode_class mclass; @@ -4629,7 +4631,7 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label, mclass = GET_MODE_CLASS (mode); optab_mode = (mclass == MODE_CC) ? CCmode : mode; - icode = optab_handler (cbranch_optab, optab_mode); + icode = optab_handler (cmp_optab, optab_mode); gcc_assert (icode != CODE_FOR_nothing); gcc_assert (insn_operand_matches (icode, 0, test)); @@ -4644,6 +4646,56 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label, add_reg_br_prob_note (insn, prob); } +/* Check to see if the supplied comparison in PTEST can be performed as a + bit-test-and-branch instead. VAL must contain the original tree + expression of the non-zero operand which will be used to rewrite the + comparison in PTEST. + + Returns TRUE if operation succeeds and returns updated PMODE and PTEST, + else FALSE. */ + +enum insn_code +static validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode) +{ + if (!val || TREE_CODE (val) != SSA_NAME) + return CODE_FOR_nothing; + + machine_mode mode = TYPE_MODE (TREE_TYPE (val)); + rtx test = *ptest; + + if (GET_CODE (test) != EQ && GET_CODE (test) != NE) + return CODE_FOR_nothing; + + /* If the target supports the testbit comparison directly, great. */ + auto icode = direct_optab_handler (tbranch_optab, mode); + if (icode == CODE_FOR_nothing) + return icode; + + if (tree_zero_one_valued_p (val)) + { + auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0; + XEXP (test, 1) = gen_int_mode (pos, mode); + *ptest = test; + *pmode = mode; + return icode; + } + + wide_int wcst = get_nonzero_bits (val); + if (wcst == -1) + return CODE_FOR_nothing; + + int bitpos; + + if ((bitpos = wi::exact_log2 (wcst)) == -1) + return CODE_FOR_nothing; + + auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos; + XEXP (test, 1) = gen_int_mode (pos, mode); + *ptest = test; + *pmode = mode; + return icode; +} + /* Generate code to compare X with Y so that the condition codes are set and to jump to LABEL if the condition is true. If X is a constant and Y is not a constant, then the comparison is swapped to @@ -4661,11 +4713,13 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label, It will be potentially converted into an unsigned variant based on UNSIGNEDP to select a proper jump instruction. - PROB is the probability of jumping to LABEL. */ + PROB is the probability of jumping to LABEL. If the comparison is against + zero then VAL contains the expression from which the non-zero RTL is + derived. */ void emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size, - machine_mode mode, int unsignedp, rtx label, + machine_mode mode, int unsignedp, tree val, rtx label, profile_probability prob) { rtx op0 = x, op1 = y; @@ -4690,10 +4744,32 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size, prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN, &test, &mode); - emit_cmp_and_jump_insn_1 (test, mode, label, prob); + + /* Check if we're comparing a truth type with 0, and if so check if + the target supports tbranch. */ + machine_mode tmode = mode; + if (op1 == CONST0_RTX (GET_MODE (op1)) + && validate_test_and_branch (val, &test, &tmode) != CODE_FOR_nothing) + { + emit_cmp_and_jump_insn_1 (test, tmode, label, tbranch_optab, prob); + return; + } + + emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob); } - +/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown. */ + +void +emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size, + machine_mode mode, int unsignedp, rtx label, + profile_probability prob) +{ + emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL, + label, prob); +} + + /* Emit a library call comparison between floating point X and Y. COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.). */ diff --git a/gcc/optabs.def b/gcc/optabs.def index a6db2342bed6baf13ecbd84112c8432c6972e6fe..56e37d67231e1ba74ad6c5b81d74a65f315e26e2 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -220,6 +220,7 @@ OPTAB_D (reload_in_optab, "reload_in$a") OPTAB_D (reload_out_optab, "reload_out$a") OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE) +OPTAB_D (tbranch_optab, "tbranch$a4") OPTAB_D (addcc_optab, "add$acc") OPTAB_D (negcc_optab, "neg$acc") OPTAB_D (notcc_optab, "not$acc") diff --git a/gcc/optabs.h b/gcc/optabs.h index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx, machine_mode, int, rtx, profile_probability prob = profile_probability::uninitialized ()); +extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx, + machine_mode, int, tree, rtx, + profile_probability prob + = profile_probability::uninitialized ()); /* Generate code to indirectly jump to a location given in the rtx LOC. */ extern void emit_indirect_jump (rtx); diff --git a/gcc/tree.h b/gcc/tree.h index e6564aaccb7b69cd938ff60b6121aec41b7e8a59..f455008ceb8d91e7e073c0ad6d93dcaed65deccf 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -4690,6 +4690,7 @@ extern tree signed_or_unsigned_type_for (int, tree); extern tree signed_type_for (tree); extern tree unsigned_type_for (tree); extern bool is_truth_type_for (tree, tree); +extern bool tree_zero_one_valued_p (tree); extern tree truth_type_for (tree); extern tree build_pointer_type_for_mode (tree, machine_mode, bool); extern tree build_pointer_type (tree);