* [PATCH, i386]: Expand round(a) as sgn(a) * floor(fabs(a) + 0.5) for x87 math
@ 2011-08-08 19:52 Uros Bizjak
2011-08-09 9:36 ` Richard Guenther
2011-08-09 20:11 ` [PATCH v2, " Uros Bizjak
0 siblings, 2 replies; 4+ messages in thread
From: Uros Bizjak @ 2011-08-08 19:52 UTC (permalink / raw)
To: gcc-patches; +Cc: Richard Guenther
[-- Attachment #1: Type: text/plain, Size: 989 bytes --]
Hello!
One notable omission from x87 rounding sequences is plain round(),
since x87 does not support round-to-nearest, away-from-zero directly.
However, this function can be synthesized as round(a) = sgn(a) *
floor(fabs(a) + 0.5). Fortunately, we have all building blocks ready
in i386.md, we should just wire them correctly.
The x87 expansion is protected with flag_unsafe_math_optimizations,
since the input range is reduced (true?) due to addition - OTOH, insn
that calculates floor is protected by this flag anyway.
2011-08-08 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.c (ix86_emit_i387_round): New function.
* config/i386/i386-protos.h (ix86_emit_i387_round): Declare.
* config/i386/i386.md (round<mode>2): Use X87MODEF mode iterator.
Use ix86_emit_i387_round to expand round function for i387 math.
Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu.
It also computes correctly +-NaN, +-Inf and all inputs I have thrown
in.
Any comments?
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 4322 bytes --]
Index: i386.md
===================================================================
--- i386.md (revision 177572)
+++ i386.md (working copy)
@@ -14362,17 +14396,31 @@
})
(define_expand "round<mode>2"
- [(match_operand:MODEF 0 "register_operand" "")
- (match_operand:MODEF 1 "nonimmediate_operand" "")]
- "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
- && !flag_trapping_math && !flag_rounding_math"
+ [(match_operand:X87MODEF 0 "register_operand" "")
+ (match_operand:X87MODEF 1 "nonimmediate_operand" "")]
+ "(TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations)
+ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math && !flag_rounding_math)"
{
if (optimize_insn_for_size_p ())
FAIL;
- if (TARGET_64BIT || (<MODE>mode != DFmode))
- ix86_expand_round (operand0, operand1);
+
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math && !flag_rounding_math)
+ {
+ if (TARGET_64BIT || (<MODE>mode != DFmode))
+ ix86_expand_round (operands[0], operands[1]);
+ else
+ ix86_expand_rounddf_32 (operands[0], operands[1]);
+ }
else
- ix86_expand_rounddf_32 (operand0, operand1);
+ {
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+ ix86_emit_i387_round (operands[0], operands[1]);
+ }
DONE;
})
Index: i386-protos.h
===================================================================
--- i386-protos.h (revision 177572)
+++ i386-protos.h (working copy)
@@ -163,6 +163,7 @@ extern void x86_emit_floatuns (rtx [2]);
extern void ix86_emit_fp_unordered_jump (rtx);
extern void ix86_emit_i387_log1p (rtx, rtx);
+extern void ix86_emit_i387_round (rtx, rtx);
extern void ix86_emit_swdivsf (rtx, rtx, rtx, enum machine_mode);
extern void ix86_emit_swsqrtsf (rtx, rtx, enum machine_mode, bool);
Index: i386.c
===================================================================
--- i386.c (revision 177572)
+++ i386.c (working copy)
@@ -31722,6 +31729,76 @@ void ix86_emit_i387_log1p (rtx op0, rtx op1)
emit_label (label2);
}
+/* Emit code for round calculation. */
+void ix86_emit_i387_round (rtx op0, rtx op1)
+{
+ enum machine_mode mode = GET_MODE (op0);
+ rtx e1, e2, res, tmp0, tmp1, half;
+ rtx scratch = gen_reg_rtx (HImode);
+ rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ rtx jump_label = gen_label_rtx ();
+ rtx insn;
+
+ e1 = gen_reg_rtx (mode);
+ e2 = gen_reg_rtx (mode);
+ res = gen_reg_rtx (mode);
+
+ half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, mode);
+
+ /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
+
+ /* scratch = fxam(op1) */
+ emit_insn (gen_rtx_SET (VOIDmode, scratch,
+ gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
+ UNSPEC_FXAM)));
+ /* e1 = fabs(op1) */
+ emit_insn (gen_rtx_SET (VOIDmode, e1,
+ gen_rtx_ABS (mode, op1)));
+
+ /* e2 = e1 + 0.5 */
+ half = force_reg (mode, half);
+ emit_insn (gen_rtx_SET (VOIDmode, e2,
+ gen_rtx_PLUS (mode, e1, half)));
+
+ /* res = floor(e2) */
+ if (mode != XFmode)
+ {
+ tmp0 = gen_reg_rtx (XFmode);
+ tmp1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_rtx_SET (VOIDmode, tmp1,
+ gen_rtx_FLOAT_EXTEND (XFmode, e2)));
+ }
+ else
+ tmp0 = res, tmp1 = e2;
+
+ emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
+
+ if (mode != XFmode)
+ emit_insn (gen_rtx_SET (VOIDmode, res,
+ gen_rtx_UNSPEC (mode, gen_rtvec (1, tmp0),
+ UNSPEC_TRUNC_NOOP)));
+ /* flags = signbit(a) */
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
+
+ /* if (flags) then res = -res */
+ tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode,
+ gen_rtx_EQ (VOIDmode, flags, const0_rtx),
+ gen_rtx_LABEL_REF (VOIDmode, jump_label),
+ pc_rtx);
+ insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
+ predict_jump (REG_BR_PROB_BASE * 50 / 100);
+ JUMP_LABEL (insn) = jump_label;
+
+ emit_insn (gen_rtx_SET (VOIDmode, res,
+ gen_rtx_NEG (mode, res)));
+
+ emit_label (jump_label);
+ LABEL_NUSES (jump_label) = 1;
+
+ emit_move_insn (op0, res);
+}
+
/* Output code to perform a Newton-Rhapson approximation of a single precision
floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH, i386]: Expand round(a) as sgn(a) * floor(fabs(a) + 0.5) for x87 math
2011-08-08 19:52 [PATCH, i386]: Expand round(a) as sgn(a) * floor(fabs(a) + 0.5) for x87 math Uros Bizjak
@ 2011-08-09 9:36 ` Richard Guenther
2011-08-09 9:55 ` Uros Bizjak
2011-08-09 20:11 ` [PATCH v2, " Uros Bizjak
1 sibling, 1 reply; 4+ messages in thread
From: Richard Guenther @ 2011-08-09 9:36 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches
On Mon, 8 Aug 2011, Uros Bizjak wrote:
> Hello!
>
> One notable omission from x87 rounding sequences is plain round(),
> since x87 does not support round-to-nearest, away-from-zero directly.
> However, this function can be synthesized as round(a) = sgn(a) *
> floor(fabs(a) + 0.5). Fortunately, we have all building blocks ready
> in i386.md, we should just wire them correctly.
>
> The x87 expansion is protected with flag_unsafe_math_optimizations,
> since the input range is reduced (true?) due to addition - OTOH, insn
> that calculates floor is protected by this flag anyway.
>
> 2011-08-08 Uros Bizjak <ubizjak@gmail.com>
>
> * config/i386/i386.c (ix86_emit_i387_round): New function.
> * config/i386/i386-protos.h (ix86_emit_i387_round): Declare.
> * config/i386/i386.md (round<mode>2): Use X87MODEF mode iterator.
> Use ix86_emit_i387_round to expand round function for i387 math.
>
> Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu.
> It also computes correctly +-NaN, +-Inf and all inputs I have thrown
> in.
>
> Any comments?
Can't you do sth similar to ix86_expand_round instead,
using trunc (fabs (a) + nextafter (0.5, 0.0)) * sgn(a)
(not sure with the sign multiply, copysign would be better)?
The addition doesn't really reduce the range if you guard the
round by a proper conditional like ix86_expand_round does.
Richard.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH, i386]: Expand round(a) as sgn(a) * floor(fabs(a) + 0.5) for x87 math
2011-08-09 9:36 ` Richard Guenther
@ 2011-08-09 9:55 ` Uros Bizjak
0 siblings, 0 replies; 4+ messages in thread
From: Uros Bizjak @ 2011-08-09 9:55 UTC (permalink / raw)
To: Richard Guenther; +Cc: gcc-patches
On Tue, Aug 9, 2011 at 10:21 AM, Richard Guenther <rguenther@suse.de> wrote:
>> One notable omission from x87 rounding sequences is plain round(),
>> since x87 does not support round-to-nearest, away-from-zero directly.
>> However, this function can be synthesized as round(a) = sgn(a) *
>> floor(fabs(a) + 0.5). Fortunately, we have all building blocks ready
>> in i386.md, we should just wire them correctly.
>>
>> The x87 expansion is protected with flag_unsafe_math_optimizations,
>> since the input range is reduced (true?) due to addition - OTOH, insn
>> that calculates floor is protected by this flag anyway.
> Can't you do sth similar to ix86_expand_round instead,
> using trunc (fabs (a) + nextafter (0.5, 0.0)) * sgn(a)
> (not sure with the sign multiply, copysign would be better)?
No, you can't use nextafter due to inherent x87 XFmode extensions, and
the sign multiply is a simple conditional change-sign with chs insn
(the intermediate result is always positive at this point).
> The addition doesn't really reduce the range if you guard the
> round by a proper conditional like ix86_expand_round does.
Do we need both, flag_trapping_math and flag_rounding_math in x87
case? The sequence _will_ trap due to frndint instruction, and will
set correct rounding mode for frndint in any case.
Uros.
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v2, i386]: Expand round(a) as sgn(a) * floor(fabs(a) + 0.5) for x87 math
2011-08-08 19:52 [PATCH, i386]: Expand round(a) as sgn(a) * floor(fabs(a) + 0.5) for x87 math Uros Bizjak
2011-08-09 9:36 ` Richard Guenther
@ 2011-08-09 20:11 ` Uros Bizjak
1 sibling, 0 replies; 4+ messages in thread
From: Uros Bizjak @ 2011-08-09 20:11 UTC (permalink / raw)
To: gcc-patches; +Cc: Richard Guenther
[-- Attachment #1: Type: text/plain, Size: 966 bytes --]
Hello!
> One notable omission from x87 rounding sequences is plain round(),
> since x87 does not support round-to-nearest, away-from-zero directly.
> However, this function can be synthesized as round(a) = sgn(a) *
> floor(fabs(a) + 0.5). Fortunately, we have all building blocks ready
> in i386.md, we should just wire them correctly.
Attached patch also handles {l,ll}round using fistp x87 insn.
2011-08-08 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.c (ix86_emit_i387_round): New function.
* config/i386/i386-protos.h (ix86_emit_i387_round): Declare.
* config/i386/i386.md (round<mode>2): Use X87MODEF mode iterator.
Use ix86_emit_i387_round to expand round function for i387 math.
(lround<X87MODEF:mode><SWI248x:mode>2): Use X87MODEF mode iterator.
Use ix86_emit_i387_round to expand {l,ll}round function for i387 math.
Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32}.
Patch was committed to mainline SVN.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 7026 bytes --]
Index: i386.md
===================================================================
--- i386.md (revision 177604)
+++ i386.md (working copy)
@@ -14396,17 +14396,31 @@
})
(define_expand "round<mode>2"
- [(match_operand:MODEF 0 "register_operand" "")
- (match_operand:MODEF 1 "nonimmediate_operand" "")]
- "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
- && !flag_trapping_math && !flag_rounding_math"
+ [(match_operand:X87MODEF 0 "register_operand" "")
+ (match_operand:X87MODEF 1 "nonimmediate_operand" "")]
+ "(TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations)
+ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math && !flag_rounding_math)"
{
if (optimize_insn_for_size_p ())
FAIL;
- if (TARGET_64BIT || (<MODE>mode != DFmode))
- ix86_expand_round (operand0, operand1);
+
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math && !flag_rounding_math)
+ {
+ if (TARGET_64BIT || (<MODE>mode != DFmode))
+ ix86_expand_round (operands[0], operands[1]);
+ else
+ ix86_expand_rounddf_32 (operands[0], operands[1]);
+ }
else
- ix86_expand_rounddf_32 (operand0, operand1);
+ {
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+ ix86_emit_i387_round (operands[0], operands[1]);
+ }
DONE;
})
@@ -14542,16 +14556,31 @@
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
&& ((<SWI48x:MODE>mode != DImode) || TARGET_64BIT)")
-(define_expand "lround<MODEF:mode><SWI48x:mode>2"
- [(match_operand:SWI48x 0 "nonimmediate_operand" "")
- (match_operand:MODEF 1 "register_operand" "")]
- "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
- && ((<SWI48x:MODE>mode != DImode) || TARGET_64BIT)
- && !flag_trapping_math && !flag_rounding_math"
+(define_expand "lround<X87MODEF:mode><SWI248x:mode>2"
+ [(match_operand:SWI248x 0 "nonimmediate_operand" "")
+ (match_operand:X87MODEF 1 "register_operand" "")]
+ "(TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations)
+ || (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
+ && <SWI248x:MODE>mode != HImode
+ && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
+ && !flag_trapping_math && !flag_rounding_math)"
{
if (optimize_insn_for_size_p ())
FAIL;
- ix86_expand_lround (operand0, operand1);
+
+ if (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
+ && <SWI248x:MODE>mode != HImode
+ && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
+ && !flag_trapping_math && !flag_rounding_math)
+ ix86_expand_lround (operand0, operand1);
+ else
+ {
+ operands[0] = force_reg (<SWI248x:MODE>mode, operands[0]);
+ ix86_emit_i387_round (operands[0], operands[1]);
+ }
DONE;
})
Index: i386-protos.h
===================================================================
--- i386-protos.h (revision 177604)
+++ i386-protos.h (working copy)
@@ -163,6 +163,7 @@ extern void x86_emit_floatuns (rtx [2]);
extern void ix86_emit_fp_unordered_jump (rtx);
extern void ix86_emit_i387_log1p (rtx, rtx);
+extern void ix86_emit_i387_round (rtx, rtx);
extern void ix86_emit_swdivsf (rtx, rtx, rtx, enum machine_mode);
extern void ix86_emit_swsqrtsf (rtx, rtx, enum machine_mode, bool);
Index: i386.c
===================================================================
--- i386.c (revision 177604)
+++ i386.c (working copy)
@@ -31731,6 +31731,139 @@ void ix86_emit_i387_log1p (rtx op0, rtx op1)
emit_label (label2);
}
+/* Emit code for round calculation. */
+void ix86_emit_i387_round (rtx op0, rtx op1)
+{
+ enum machine_mode inmode = GET_MODE (op1);
+ enum machine_mode outmode = GET_MODE (op0);
+ rtx e1, e2, res, tmp, tmp1, half;
+ rtx scratch = gen_reg_rtx (HImode);
+ rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ rtx jump_label = gen_label_rtx ();
+ rtx insn;
+ rtx (*gen_abs) (rtx, rtx);
+ rtx (*gen_neg) (rtx, rtx);
+
+ switch (inmode)
+ {
+ case SFmode:
+ gen_abs = gen_abssf2;
+ break;
+ case DFmode:
+ gen_abs = gen_absdf2;
+ break;
+ case XFmode:
+ gen_abs = gen_absxf2;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ switch (outmode)
+ {
+ case SFmode:
+ gen_neg = gen_negsf2;
+ break;
+ case DFmode:
+ gen_neg = gen_negdf2;
+ break;
+ case XFmode:
+ gen_neg = gen_negxf2;
+ break;
+ case HImode:
+ gen_neg = gen_neghi2;
+ break;
+ case SImode:
+ gen_neg = gen_negsi2;
+ break;
+ case DImode:
+ gen_neg = gen_negdi2;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ e1 = gen_reg_rtx (inmode);
+ e2 = gen_reg_rtx (inmode);
+ res = gen_reg_rtx (outmode);
+
+ half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
+
+ /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
+
+ /* scratch = fxam(op1) */
+ emit_insn (gen_rtx_SET (VOIDmode, scratch,
+ gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
+ UNSPEC_FXAM)));
+ /* e1 = fabs(op1) */
+ emit_insn (gen_abs (e1, op1));
+
+ /* e2 = e1 + 0.5 */
+ half = force_reg (inmode, half);
+ emit_insn (gen_rtx_SET (VOIDmode, e2,
+ gen_rtx_PLUS (inmode, e1, half)));
+
+ /* res = floor(e2) */
+ if (inmode != XFmode)
+ {
+ tmp1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_rtx_SET (VOIDmode, tmp1,
+ gen_rtx_FLOAT_EXTEND (XFmode, e2)));
+ }
+ else
+ tmp1 = e2;
+
+ switch (outmode)
+ {
+ case SFmode:
+ case DFmode:
+ {
+ rtx tmp0 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
+
+ emit_insn (gen_rtx_SET (VOIDmode, res,
+ gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
+ UNSPEC_TRUNC_NOOP)));
+ }
+ break;
+ case XFmode:
+ emit_insn (gen_frndintxf2_floor (res, tmp1));
+ break;
+ case HImode:
+ emit_insn (gen_lfloorxfhi2 (res, tmp1));
+ break;
+ case SImode:
+ emit_insn (gen_lfloorxfsi2 (res, tmp1));
+ break;
+ case DImode:
+ emit_insn (gen_lfloorxfdi2 (res, tmp1));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* flags = signbit(a) */
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
+
+ /* if (flags) then res = -res */
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
+ gen_rtx_EQ (VOIDmode, flags, const0_rtx),
+ gen_rtx_LABEL_REF (VOIDmode, jump_label),
+ pc_rtx);
+ insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+ predict_jump (REG_BR_PROB_BASE * 50 / 100);
+ JUMP_LABEL (insn) = jump_label;
+
+ emit_insn (gen_neg (res, res));
+
+ emit_label (jump_label);
+ LABEL_NUSES (jump_label) = 1;
+
+ emit_move_insn (op0, res);
+}
+
/* Output code to perform a Newton-Rhapson approximation of a single precision
floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2011-08-09 17:57 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-08-08 19:52 [PATCH, i386]: Expand round(a) as sgn(a) * floor(fabs(a) + 0.5) for x87 math Uros Bizjak
2011-08-09 9:36 ` Richard Guenther
2011-08-09 9:55 ` Uros Bizjak
2011-08-09 20:11 ` [PATCH v2, " Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).