* PATCH: ffs/ctz expansions using clz
@ 2007-08-02 1:37 Sandra Loosemore
2007-08-02 6:25 ` Richard Sandiford
0 siblings, 1 reply; 7+ messages in thread
From: Sandra Loosemore @ 2007-08-02 1:37 UTC (permalink / raw)
To: GCC Patches; +Cc: Richard Sandiford
[-- Attachment #1: Type: text/plain, Size: 768 bytes --]
This patch originally started out life as being specific to the MIPS backend,
but Richard pointed out that it could be done in a target-independent way in
optabs.c. The idea is that if the target has clz support but not ffs and/or
ctz, you can use the former to implement the latter more efficiently than with a
library call.
This patch depends on CLZ_DEFINED_VALUE_AT_ZERO applying to the clz optab as
well as the clz rtl expression. I couldn't tell from reading the documentation
whether this is a change from its previous meaning or not, but I don't think
it's inconsistent with current practice in all the other back ends. I could add
a doc patch if folks think it is a change, or just needs clarification generally.
OK to commit otherwise?
-Sandra
[-- Attachment #2: 24-ffs-ctz.log --]
[-- Type: text/x-log, Size: 218 bytes --]
2007-08-01 Sandra Loosemore <sandra@codesourcery.com>
Nigel Stephens <nigel@mips.com>
gcc/
* optabs.c (expand_ffs, expand_ctz): New functions to compute
ffs and ctz using clz.
(expand_unop): Call them.
[-- Attachment #3: 24-ffs-ctz.patch --]
[-- Type: text/x-patch, Size: 3626 bytes --]
Index: gcc/optabs.c
===================================================================
*** gcc/optabs.c (revision 127069)
--- gcc/optabs.c (working copy)
*************** static void prepare_float_lib_cmp (rtx *
*** 122,127 ****
--- 122,129 ----
enum machine_mode *, int *);
static rtx widen_clz (enum machine_mode, rtx, rtx);
static rtx expand_parity (enum machine_mode, rtx, rtx);
+ static rtx expand_ffs (enum machine_mode, rtx, rtx);
+ static rtx expand_ctz (enum machine_mode, rtx, rtx);
static enum rtx_code get_rtx_code (enum tree_code, bool);
static rtx vector_compare_rtx (tree, bool, enum insn_code);
*************** expand_parity (enum machine_mode mode, r
*** 2560,2565 ****
--- 2562,2629 ----
return 0;
}
+ /* Try calculating ffs(x) using clz(x). Since the ffs builtin promises
+ to return zero for a zero value and clz may have an undefined value
+ in that case, only do this if we know clz returns the right thing so
+ that we don't have to generate a test and branch. */
+ static rtx
+ expand_ffs (enum machine_mode mode, rtx op0, rtx target)
+ {
+ HOST_WIDE_INT val;
+ if (clz_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing
+ && CLZ_DEFINED_VALUE_AT_ZERO (mode, val)
+ && val == GET_MODE_BITSIZE (mode))
+ {
+ rtx last = get_last_insn ();
+ rtx temp;
+
+ temp = expand_unop (mode, neg_optab, op0, NULL_RTX, true);
+ if (temp)
+ temp = expand_binop (mode, and_optab, op0, temp, NULL_RTX,
+ true, OPTAB_DIRECT);
+ if (temp)
+ temp = expand_unop (mode, clz_optab, temp, NULL_RTX, true);
+ if (temp)
+ temp = expand_binop (mode, sub_optab,
+ GEN_INT (GET_MODE_BITSIZE (mode)),
+ temp,
+ target, true, OPTAB_DIRECT);
+ if (temp == 0)
+ delete_insns_since (last);
+ return temp;
+ }
+ return 0;
+ }
+
+ /* We can compute ctz(x) using clz(x) with a similar recipe. Here the ctz
+ builtin has an undefined result on zero, just like clz, so we don't have
+ to do that check. */
+ static rtx
+ expand_ctz (enum machine_mode mode, rtx op0, rtx target)
+ {
+ if (clz_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
+ {
+ rtx last = get_last_insn ();
+ rtx temp;
+
+ temp = expand_unop (mode, neg_optab, op0, NULL_RTX, true);
+ if (temp)
+ temp = expand_binop (mode, and_optab, op0, temp, NULL_RTX,
+ true, OPTAB_DIRECT);
+ if (temp)
+ temp = expand_unop (mode, clz_optab, temp, NULL_RTX, true);
+ if (temp)
+ temp = expand_binop (mode, xor_optab, temp,
+ GEN_INT (GET_MODE_BITSIZE (mode) - 1),
+ target,
+ true, OPTAB_DIRECT);
+ if (temp == 0)
+ delete_insns_since (last);
+ return temp;
+ }
+ return 0;
+ }
+
/* Extract the OMODE lowpart from VAL, which has IMODE. Under certain
conditions, VAL may already be a SUBREG against which we cannot generate
a further SUBREG. In this case, we expect forcing the value into a
*************** expand_unop (enum machine_mode mode, opt
*** 2885,2890 ****
--- 2949,2970 ----
return temp;
}
+ /* Try implementing ffs (x) in terms of clz (x). */
+ if (unoptab == ffs_optab)
+ {
+ temp = expand_ffs (mode, op0, target);
+ if (temp)
+ return temp;
+ }
+
+ /* Try implementing ctz (x) in terms of clz (x). */
+ if (unoptab == ctz_optab)
+ {
+ temp = expand_ctz (mode, op0, target);
+ if (temp)
+ return temp;
+ }
+
try_libcall:
/* Now try a library call in this mode. */
if (unoptab->handlers[(int) mode].libfunc)
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: PATCH: ffs/ctz expansions using clz
2007-08-02 1:37 PATCH: ffs/ctz expansions using clz Sandra Loosemore
@ 2007-08-02 6:25 ` Richard Sandiford
2007-08-08 19:11 ` Mark Mitchell
0 siblings, 1 reply; 7+ messages in thread
From: Richard Sandiford @ 2007-08-02 6:25 UTC (permalink / raw)
To: Sandra Loosemore; +Cc: GCC Patches
Sandra Loosemore <sandra@codesourcery.com> writes:
> This patch depends on CLZ_DEFINED_VALUE_AT_ZERO applying to the clz
> optab as well as the clz rtl expression. I couldn't tell from reading
> the documentation whether this is a change from its previous meaning
> or not, but I don't think it's inconsistent with current practice in
> all the other back ends.
It is a change. I was thinking the macro ought to be tristate:
defined always, defined for the clz rtl code only, or undefined.
Sorry for not making that clear.
(The patch looked good to me otherwise FWIW, although I obviously
can't approve it.)
Richard
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: PATCH: ffs/ctz expansions using clz
2007-08-02 6:25 ` Richard Sandiford
@ 2007-08-08 19:11 ` Mark Mitchell
2007-08-08 20:41 ` Andrew Pinski
2007-08-09 14:10 ` Sandra Loosemore
0 siblings, 2 replies; 7+ messages in thread
From: Mark Mitchell @ 2007-08-08 19:11 UTC (permalink / raw)
To: Sandra Loosemore, GCC Patches, richard
Richard Sandiford wrote:
> Sandra Loosemore <sandra@codesourcery.com> writes:
>> This patch depends on CLZ_DEFINED_VALUE_AT_ZERO applying to the clz
>> optab as well as the clz rtl expression. I couldn't tell from reading
>> the documentation whether this is a change from its previous meaning
>> or not, but I don't think it's inconsistent with current practice in
>> all the other back ends.
>
> It is a change. I was thinking the macro ought to be tristate:
> defined always, defined for the clz rtl code only, or undefined.
> Sorry for not making that clear.
All right. Let's do the following for CLZ_DEFINED_VALUE_DEFINED_AT_ZERO:
0 - Not defined at zero.
1 - Defined at zero for the RTL "clz" instruction.
2 - Defined at zero for both the RTL "clz" instruction and the optabs
"clz" expansion.
That's backwards-compatible, as the existing definitions are all
{0,1}-valued.
Sandra, please adjust the documentation in tm.texi to reflect the
possible value "2" and your patch to test for that. With those changes,
your patch is OK.
You will of course need to define the macro to "2" in the MIPS back end
to get the benefit of your patch. Given Richard's comments, that change
is OK as well, unless he indicates otherwise.
Thanks,
--
Mark Mitchell
CodeSourcery
mark@codesourcery.com
(650) 331-3385 x713
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: PATCH: ffs/ctz expansions using clz
2007-08-08 19:11 ` Mark Mitchell
@ 2007-08-08 20:41 ` Andrew Pinski
2007-08-08 20:48 ` Daniel Jacobowitz
2007-08-09 14:10 ` Sandra Loosemore
1 sibling, 1 reply; 7+ messages in thread
From: Andrew Pinski @ 2007-08-08 20:41 UTC (permalink / raw)
To: Mark Mitchell; +Cc: Sandra Loosemore, GCC Patches, richard
On 8/8/07, Mark Mitchell <mark@codesourcery.com> wrote:
> Richard Sandiford wrote:
> > Sandra Loosemore <sandra@codesourcery.com> writes:
> >> This patch depends on CLZ_DEFINED_VALUE_AT_ZERO applying to the clz
> >> optab as well as the clz rtl expression. I couldn't tell from reading
> >> the documentation whether this is a change from its previous meaning
> >> or not, but I don't think it's inconsistent with current practice in
> >> all the other back ends.
> >
> > It is a change. I was thinking the macro ought to be tristate:
> > defined always, defined for the clz rtl code only, or undefined.
> > Sorry for not making that clear.
>
> All right. Let's do the following for CLZ_DEFINED_VALUE_DEFINED_AT_ZERO:
>
> 0 - Not defined at zero.
> 1 - Defined at zero for the RTL "clz" instruction.
> 2 - Defined at zero for both the RTL "clz" instruction and the optabs
> "clz" expansion.
>
> That's backwards-compatible, as the existing definitions are all
> {0,1}-valued.
Is that true, rs6000 defines it as:
/* The cntlzw and cntlzd instructions return 32 and 64 for input of zero. */
#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
((VALUE) = ((MODE) == SImode ? 32 : 64))
Thanks,
Andrew Pinski
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: PATCH: ffs/ctz expansions using clz
2007-08-08 20:41 ` Andrew Pinski
@ 2007-08-08 20:48 ` Daniel Jacobowitz
2007-08-08 20:51 ` Mark Mitchell
0 siblings, 1 reply; 7+ messages in thread
From: Daniel Jacobowitz @ 2007-08-08 20:48 UTC (permalink / raw)
To: Andrew Pinski; +Cc: Mark Mitchell, Sandra Loosemore, GCC Patches, richard
On Wed, Aug 08, 2007 at 01:41:30PM -0700, Andrew Pinski wrote:
> > All right. Let's do the following for CLZ_DEFINED_VALUE_DEFINED_AT_ZERO:
> #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
Aren't those different macros?
--
Daniel Jacobowitz
CodeSourcery
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: PATCH: ffs/ctz expansions using clz
2007-08-08 20:48 ` Daniel Jacobowitz
@ 2007-08-08 20:51 ` Mark Mitchell
0 siblings, 0 replies; 7+ messages in thread
From: Mark Mitchell @ 2007-08-08 20:51 UTC (permalink / raw)
To: Andrew Pinski, Mark Mitchell, Sandra Loosemore, GCC Patches, richard
Daniel Jacobowitz wrote:
> On Wed, Aug 08, 2007 at 01:41:30PM -0700, Andrew Pinski wrote:
>>> All right. Let's do the following for CLZ_DEFINED_VALUE_DEFINED_AT_ZERO:
>
>> #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
>
> Aren't those different macros?
Only because I misspelled mine. Andrew's right; I misinterpreted what
the rs6000/rs6000.h macro is doing. So, Sandra, you will also need to
modify that implementation of the macro to return 1, in the same way
that, for example, ARM does.
Thanks,
--
Mark Mitchell
CodeSourcery
mark@codesourcery.com
(650) 331-3385 x713
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: PATCH: ffs/ctz expansions using clz
2007-08-08 19:11 ` Mark Mitchell
2007-08-08 20:41 ` Andrew Pinski
@ 2007-08-09 14:10 ` Sandra Loosemore
1 sibling, 0 replies; 7+ messages in thread
From: Sandra Loosemore @ 2007-08-09 14:10 UTC (permalink / raw)
To: Mark Mitchell; +Cc: GCC Patches, richard
[-- Attachment #1: Type: text/plain, Size: 281 bytes --]
Mark Mitchell wrote:
> Sandra, please adjust the documentation in tm.texi to reflect the
> possible value "2" and your patch to test for that. With those changes,
> your patch is OK.
OK, I've tested and committed the patch with that change. Here's the final version.
-Sandra
[-- Attachment #2: 24a-ffs-ctz.log --]
[-- Type: text/x-log, Size: 595 bytes --]
2007-08-09 Sandra Loosemore <sandra@codesourcery.com>
Nigel Stephens <nigel@mips.com>
gcc/
* doc/tm.texi (CLZ_DEFINED_VALUE_AT_ZERO, CTZ_DEFINED_VALUE_AT_ZERO):
Document change in interpretation of value from boolean to
tri-state integer.
* optabs.c (expand_ffs, expand_ctz): New functions to compute
ffs and ctz using clz.
(expand_unop): Call them.
* config/rs6000/rs6000.h (CLZ_DEFINED_VALUE_AT_ZERO): Fix its
result value.
(CTZ_DEFINED_VALUE_AT_ZERO): Likewise.
* config/mips/mips.h (CLZ_DEFINED_VALUE_AT_ZERO): Likewise, to
enable the new ffs expansion on this target.
[-- Attachment #3: 24a-ffs-ctz.patch --]
[-- Type: text/x-patch, Size: 7654 bytes --]
Index: gcc/doc/tm.texi
===================================================================
*** gcc/doc/tm.texi (revision 127304)
--- gcc/doc/tm.texi (working copy)
*************** given mode.
*** 9590,9603 ****
@defmac CLZ_DEFINED_VALUE_AT_ZERO (@var{mode}, @var{value})
@defmacx CTZ_DEFINED_VALUE_AT_ZERO (@var{mode}, @var{value})
! A C expression that evaluates to true if the architecture defines a value
! for @code{clz} or @code{ctz} with a zero operand. If so, @var{value}
! should be set to this value. If this macro is not defined, the value of
! @code{clz} or @code{ctz} is assumed to be undefined.
This macro must be defined if the target's expansion for @code{ffs}
relies on a particular value to get correct results. Otherwise it
! is not necessary, though it may be used to optimize some corner cases.
Note that regardless of this macro the ``definedness'' of @code{clz}
and @code{ctz} at zero do @emph{not} extend to the builtin functions
--- 9590,9612 ----
@defmac CLZ_DEFINED_VALUE_AT_ZERO (@var{mode}, @var{value})
@defmacx CTZ_DEFINED_VALUE_AT_ZERO (@var{mode}, @var{value})
! A C expression that indicates whether the architecture defines a value
! for @code{clz} or @code{ctz} with a zero operand.
! A result of @code{0} indicates the value is undefined.
! If the value is defined for only the RTL expression, the macro should
! evaluate to @code{1}; if the value applies also to the corresponding optab
! entry (which is normally the case if it expands directly into
! the corresponding RTL), then the macro should evaluate to @code{2}.
! In the cases where the value is defined, @var{value} should be set to
! this value.
!
! If this macro is not defined, the value of @code{clz} or
! @code{ctz} at zero is assumed to be undefined.
This macro must be defined if the target's expansion for @code{ffs}
relies on a particular value to get correct results. Otherwise it
! is not necessary, though it may be used to optimize some corner cases, and
! to provide a default expansion for the @code{ffs} optab.
Note that regardless of this macro the ``definedness'' of @code{clz}
and @code{ctz} at zero do @emph{not} extend to the builtin functions
Index: gcc/optabs.c
===================================================================
*** gcc/optabs.c (revision 127304)
--- gcc/optabs.c (working copy)
*************** static void prepare_float_lib_cmp (rtx *
*** 122,127 ****
--- 122,129 ----
enum machine_mode *, int *);
static rtx widen_clz (enum machine_mode, rtx, rtx);
static rtx expand_parity (enum machine_mode, rtx, rtx);
+ static rtx expand_ffs (enum machine_mode, rtx, rtx);
+ static rtx expand_ctz (enum machine_mode, rtx, rtx);
static enum rtx_code get_rtx_code (enum tree_code, bool);
static rtx vector_compare_rtx (tree, bool, enum insn_code);
*************** expand_parity (enum machine_mode mode, r
*** 2560,2565 ****
--- 2562,2629 ----
return 0;
}
+ /* Try calculating ffs(x) using clz(x). Since the ffs builtin promises
+ to return zero for a zero value and clz may have an undefined value
+ in that case, only do this if we know clz returns the right thing so
+ that we don't have to generate a test and branch. */
+ static rtx
+ expand_ffs (enum machine_mode mode, rtx op0, rtx target)
+ {
+ HOST_WIDE_INT val;
+ if (clz_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing
+ && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2
+ && val == GET_MODE_BITSIZE (mode))
+ {
+ rtx last = get_last_insn ();
+ rtx temp;
+
+ temp = expand_unop (mode, neg_optab, op0, NULL_RTX, true);
+ if (temp)
+ temp = expand_binop (mode, and_optab, op0, temp, NULL_RTX,
+ true, OPTAB_DIRECT);
+ if (temp)
+ temp = expand_unop (mode, clz_optab, temp, NULL_RTX, true);
+ if (temp)
+ temp = expand_binop (mode, sub_optab,
+ GEN_INT (GET_MODE_BITSIZE (mode)),
+ temp,
+ target, true, OPTAB_DIRECT);
+ if (temp == 0)
+ delete_insns_since (last);
+ return temp;
+ }
+ return 0;
+ }
+
+ /* We can compute ctz(x) using clz(x) with a similar recipe. Here the ctz
+ builtin has an undefined result on zero, just like clz, so we don't have
+ to do that check. */
+ static rtx
+ expand_ctz (enum machine_mode mode, rtx op0, rtx target)
+ {
+ if (clz_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
+ {
+ rtx last = get_last_insn ();
+ rtx temp;
+
+ temp = expand_unop (mode, neg_optab, op0, NULL_RTX, true);
+ if (temp)
+ temp = expand_binop (mode, and_optab, op0, temp, NULL_RTX,
+ true, OPTAB_DIRECT);
+ if (temp)
+ temp = expand_unop (mode, clz_optab, temp, NULL_RTX, true);
+ if (temp)
+ temp = expand_binop (mode, xor_optab, temp,
+ GEN_INT (GET_MODE_BITSIZE (mode) - 1),
+ target,
+ true, OPTAB_DIRECT);
+ if (temp == 0)
+ delete_insns_since (last);
+ return temp;
+ }
+ return 0;
+ }
+
/* Extract the OMODE lowpart from VAL, which has IMODE. Under certain
conditions, VAL may already be a SUBREG against which we cannot generate
a further SUBREG. In this case, we expect forcing the value into a
*************** expand_unop (enum machine_mode mode, opt
*** 2885,2890 ****
--- 2949,2970 ----
return temp;
}
+ /* Try implementing ffs (x) in terms of clz (x). */
+ if (unoptab == ffs_optab)
+ {
+ temp = expand_ffs (mode, op0, target);
+ if (temp)
+ return temp;
+ }
+
+ /* Try implementing ctz (x) in terms of clz (x). */
+ if (unoptab == ctz_optab)
+ {
+ temp = expand_ctz (mode, op0, target);
+ if (temp)
+ return temp;
+ }
+
try_libcall:
/* Now try a library call in this mode. */
if (unoptab->handlers[(int) mode].libfunc)
Index: gcc/config/rs6000/rs6000.h
===================================================================
*** gcc/config/rs6000/rs6000.h (revision 127304)
--- gcc/config/rs6000/rs6000.h (working copy)
*************** do { \
*** 1856,1865 ****
/* The cntlzw and cntlzd instructions return 32 and 64 for input of zero. */
#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
! ((VALUE) = ((MODE) == SImode ? 32 : 64))
/* The CTZ patterns return -1 for input of zero. */
! #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = -1)
/* Specify the machine mode that pointers have.
After generation of rtl, the compiler makes no further distinction
--- 1856,1865 ----
/* The cntlzw and cntlzd instructions return 32 and 64 for input of zero. */
#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
! ((VALUE) = ((MODE) == SImode ? 32 : 64), 1)
/* The CTZ patterns return -1 for input of zero. */
! #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = -1, 1)
/* Specify the machine mode that pointers have.
After generation of rtl, the compiler makes no further distinction
Index: gcc/config/mips/mips.h
===================================================================
*** gcc/config/mips/mips.h (revision 127304)
--- gcc/config/mips/mips.h (working copy)
*************** extern enum mips_code_readable_setting m
*** 1295,1301 ****
/* The [d]clz instructions have the natural values at 0. */
#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
! ((VALUE) = GET_MODE_BITSIZE (MODE), true)
\f
/* Standard register usage. */
--- 1295,1301 ----
/* The [d]clz instructions have the natural values at 0. */
#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
! ((VALUE) = GET_MODE_BITSIZE (MODE), 2)
\f
/* Standard register usage. */
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2007-08-09 14:10 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-08-02 1:37 PATCH: ffs/ctz expansions using clz Sandra Loosemore
2007-08-02 6:25 ` Richard Sandiford
2007-08-08 19:11 ` Mark Mitchell
2007-08-08 20:41 ` Andrew Pinski
2007-08-08 20:48 ` Daniel Jacobowitz
2007-08-08 20:51 ` Mark Mitchell
2007-08-09 14:10 ` Sandra Loosemore
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).