From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2153) id 7FA3C3858C62; Thu, 8 Jun 2023 08:13:57 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 7FA3C3858C62 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1686212037; bh=7IHSFVvmEN62GOzn0ttg+OVYx+6WNAhiehH9NHNlqo0=; h=From:To:Subject:Date:From; b=v6ne5XTZgaPIoE8IlpepIljEHA5vSpji63HekFchm47NNI6iGMFYj5SwBkrn3M48L xVzXtCe1pKQhehh1aWQ9MlLpRGsLFi/8QkwedfJ2DvsWW/rqD0aHX4lrmK9G1HKNfH M1ihpThhT5A4O7OBfVWcL783wPfMxGobUt0vkgCY= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Jakub Jelinek To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-1631] optabs: Implement double-word ctz and ffs expansion X-Act-Checkin: gcc X-Git-Author: Jakub Jelinek X-Git-Refname: refs/heads/master X-Git-Oldrev: 2b2bf793d3fb8980cb2b83e9e2a8c236ad2434f5 X-Git-Newrev: fcbef8e431c246c1f584537a08d58fcd5c5a1831 Message-Id: <20230608081357.7FA3C3858C62@sourceware.org> Date: Thu, 8 Jun 2023 08:13:57 +0000 (GMT) List-Id: https://gcc.gnu.org/g:fcbef8e431c246c1f584537a08d58fcd5c5a1831 commit r14-1631-gfcbef8e431c246c1f584537a08d58fcd5c5a1831 Author: Jakub Jelinek Date: Thu Jun 8 10:13:23 2023 +0200 optabs: Implement double-word ctz and ffs expansion We have expand_doubleword_clz for a couple of years, where we emit double-word CLZ as if (high_word == 0) return CLZ (low_word) + word_size; else return CLZ (high_word); We can do something similar for CTZ and FFS IMHO, just with the 2 words swapped. So if (low_word == 0) return CTZ (high_word) + word_size; else return CTZ (low_word); for CTZ and if (low_word == 0) { return high_word ? FFS (high_word) + word_size : 0; else return FFS (low_word); The following patch implements that. Note, on some targets which implement both word_mode ctz and ffs patterns, it might be better to incrementally implement those double-word ffs expansion patterns in md files, because we aren't able to optimize it correctly; nothing can detect we have just made sure that argument is not 0 and so don't need to bother with handling that case. So, on ia32 just using CTZ patterns would be better there, but I think we can even do better and instead of doing the comparisons of the operands against 0 do the CTZ expansion followed by testing of flags. 2023-06-08 Jakub Jelinek * optabs.cc (expand_ffs): Add forward declaration. (expand_doubleword_clz): Rename to ... (expand_doubleword_clz_ctz_ffs): ... this. Add UNOPTAB argument, handle also doubleword CTZ and FFS in addition to CLZ. (expand_unop): Adjust caller. Also call it for doubleword ctz_optab and ffs_optab. * gcc.target/i386/ctzll-1.c: New test. * gcc.target/i386/ffsll-1.c: New test. Diff: --- gcc/optabs.cc | 63 ++++++++++++++++++++++++++++----- gcc/testsuite/gcc.target/i386/ctzll-1.c | 9 +++++ gcc/testsuite/gcc.target/i386/ffsll-1.c | 9 +++++ 3 files changed, 72 insertions(+), 9 deletions(-) diff --git a/gcc/optabs.cc b/gcc/optabs.cc index aab6ab6faf2..89ab55b72ae 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -2697,10 +2697,14 @@ expand_clrsb_using_clz (scalar_int_mode mode, rtx op0, rtx target) return temp; } -/* Try calculating clz of a double-word quantity as two clz's of word-sized - quantities, choosing which based on whether the high word is nonzero. */ +static rtx expand_ffs (scalar_int_mode, rtx, rtx); + +/* Try calculating clz, ctz or ffs of a double-word quantity as two clz, ctz or + ffs operations on word-sized quantities, choosing which based on whether the + high (for clz) or low (for ctz and ffs) word is nonzero. */ static rtx -expand_doubleword_clz (scalar_int_mode mode, rtx op0, rtx target) +expand_doubleword_clz_ctz_ffs (scalar_int_mode mode, rtx op0, rtx target, + optab unoptab) { rtx xop0 = force_reg (mode, op0); rtx subhi = gen_highpart (word_mode, xop0); @@ -2709,6 +2713,7 @@ expand_doubleword_clz (scalar_int_mode mode, rtx op0, rtx target) rtx_code_label *after_label = gen_label_rtx (); rtx_insn *seq; rtx temp, result; + int addend = 0; /* If we were not given a target, use a word_mode register, not a 'mode' register. The result will fit, and nobody is expecting @@ -2721,6 +2726,9 @@ expand_doubleword_clz (scalar_int_mode mode, rtx op0, rtx target) 'target' to tag a REG_EQUAL note on. */ result = gen_reg_rtx (word_mode); + if (unoptab != clz_optab) + std::swap (subhi, sublo); + start_sequence (); /* If the high word is not equal to zero, @@ -2728,7 +2736,13 @@ expand_doubleword_clz (scalar_int_mode mode, rtx op0, rtx target) emit_cmp_and_jump_insns (subhi, CONST0_RTX (word_mode), EQ, 0, word_mode, true, hi0_label); - temp = expand_unop_direct (word_mode, clz_optab, subhi, result, true); + if (optab_handler (unoptab, word_mode) != CODE_FOR_nothing) + temp = expand_unop_direct (word_mode, unoptab, subhi, result, true); + else + { + gcc_assert (unoptab == ffs_optab); + temp = expand_ffs (word_mode, subhi, result); + } if (!temp) goto fail; @@ -2739,14 +2753,32 @@ expand_doubleword_clz (scalar_int_mode mode, rtx op0, rtx target) emit_barrier (); /* Else clz of the full value is clz of the low word plus the number - of bits in the high word. */ + of bits in the high word. Similarly for ctz/ffs of the high word, + except that ffs should be 0 when both words are zero. */ emit_label (hi0_label); - temp = expand_unop_direct (word_mode, clz_optab, sublo, 0, true); + if (unoptab == ffs_optab) + { + convert_move (result, const0_rtx, true); + emit_cmp_and_jump_insns (sublo, CONST0_RTX (word_mode), EQ, 0, + word_mode, true, after_label); + } + + if (optab_handler (unoptab, word_mode) != CODE_FOR_nothing) + temp = expand_unop_direct (word_mode, unoptab, sublo, NULL_RTX, true); + else + { + gcc_assert (unoptab == ffs_optab); + temp = expand_unop_direct (word_mode, ctz_optab, sublo, NULL_RTX, true); + addend = 1; + } + if (!temp) goto fail; + temp = expand_binop (word_mode, add_optab, temp, - gen_int_mode (GET_MODE_BITSIZE (word_mode), word_mode), + gen_int_mode (GET_MODE_BITSIZE (word_mode) + addend, + word_mode), result, true, OPTAB_DIRECT); if (!temp) goto fail; @@ -2759,7 +2791,7 @@ expand_doubleword_clz (scalar_int_mode mode, rtx op0, rtx target) seq = get_insns (); end_sequence (); - add_equal_note (seq, target, CLZ, xop0, NULL_RTX, mode); + add_equal_note (seq, target, optab_to_code (unoptab), xop0, NULL_RTX, mode); emit_insn (seq); return target; @@ -3252,7 +3284,8 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target, if (GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD && optab_handler (unoptab, word_mode) != CODE_FOR_nothing) { - temp = expand_doubleword_clz (int_mode, op0, target); + temp = expand_doubleword_clz_ctz_ffs (int_mode, op0, target, + unoptab); if (temp) return temp; } @@ -3500,6 +3533,18 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target, return temp; } + if ((unoptab == ctz_optab || unoptab == ffs_optab) + && optimize_insn_for_speed_p () + && is_a (mode, &int_mode) + && GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD + && (optab_handler (unoptab, word_mode) != CODE_FOR_nothing + || optab_handler (ctz_optab, word_mode) != CODE_FOR_nothing)) + { + temp = expand_doubleword_clz_ctz_ffs (int_mode, op0, target, unoptab); + if (temp) + return temp; + } + try_libcall: /* Now try a library call in this mode. */ libfunc = optab_libfunc (unoptab, mode); diff --git a/gcc/testsuite/gcc.target/i386/ctzll-1.c b/gcc/testsuite/gcc.target/i386/ctzll-1.c new file mode 100644 index 00000000000..7e75b81c323 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/ctzll-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-not "__ctzdi2" } } */ + +int +foo (unsigned long long x) +{ + return __builtin_ctzll (x); +} diff --git a/gcc/testsuite/gcc.target/i386/ffsll-1.c b/gcc/testsuite/gcc.target/i386/ffsll-1.c new file mode 100644 index 00000000000..b7003ebc005 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/ffsll-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-not "__ffsdi2" } } */ + +int +foo (unsigned long long x) +{ + return __builtin_ffsll (x); +}