* [arm] Improve longlong.h umul_ppmm, count_trailing_zeros
@ 2012-01-31 5:15 Richard Henderson
2012-02-01 13:24 ` Richard Earnshaw
0 siblings, 1 reply; 3+ messages in thread
From: Richard Henderson @ 2012-01-31 5:15 UTC (permalink / raw)
To: GCC Patches
I noticed this accidentally, while looking for something else.
There are significant improvements in the DImode multiplication
and division routines for armv4+.
Despite how trivial this is, I assume this must wait for stage1.
Ok?
r~
* longlong.h [arm] (umul_ppmm): Use umull.
[arm] (count_trailing_zeros): Use __builtin_ctz.
diff --git a/libgcc/longlong.h b/libgcc/longlong.h
index 30cc2e3..7204679 100644
--- a/libgcc/longlong.h
+++ b/libgcc/longlong.h
@@ -220,9 +220,12 @@ UDItype __umulsidi3 (USItype, USItype);
"rI" ((USItype) (bh)), \
"r" ((USItype) (al)), \
"rI" ((USItype) (bl)) __CLOBBER_CC)
-#define umul_ppmm(xh, xl, a, b) \
-{register USItype __t0, __t1, __t2; \
- __asm__ ("%@ Inlined umul_ppmm\n" \
+# if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
+ || defined(__ARM_ARCH_3__) || defined(__ARM_ARCH_3M__)
+# define umul_ppmm(xh, xl, a, b) \
+ do { \
+ register USItype __t0, __t1, __t2; \
+ __asm__ ("%@ Inlined umul_ppmm\n" \
" mov %2, %5, lsr #16\n" \
" mov %0, %6, lsr #16\n" \
" bic %3, %5, %2, lsl #16\n" \
@@ -239,14 +242,26 @@ UDItype __umulsidi3 (USItype, USItype);
"=r" ((USItype) (xl)), \
"=&r" (__t0), "=&r" (__t1), "=r" (__t2) \
: "r" ((USItype) (a)), \
- "r" ((USItype) (b)) __CLOBBER_CC );}
-#define UMUL_TIME 20
-#define UDIV_TIME 100
+ "r" ((USItype) (b)) __CLOBBER_CC ); \
+ } while (0)
+# define UMUL_TIME 20
+# else
+# define umul_ppmm(xh, xl, a, b) \
+ do { \
+ /* Generate umull, under compiler control. */ \
+ register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b); \
+ (xl) = (USItype)__t0; \
+ (xh) = (USItype)(__t0 >> 32); \
+ } while (0)
+# define UMUL_TIME 3
+# endif
+# define UDIV_TIME 100
#endif /* __arm__ */
#if defined(__arm__)
/* Let gcc decide how best to implement count_leading_zeros. */
#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
+#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
#define COUNT_LEADING_ZEROS_0 32
#endif
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [arm] Improve longlong.h umul_ppmm, count_trailing_zeros
2012-01-31 5:15 [arm] Improve longlong.h umul_ppmm, count_trailing_zeros Richard Henderson
@ 2012-02-01 13:24 ` Richard Earnshaw
2012-03-20 14:56 ` Richard Earnshaw
0 siblings, 1 reply; 3+ messages in thread
From: Richard Earnshaw @ 2012-02-01 13:24 UTC (permalink / raw)
To: Richard Henderson; +Cc: GCC Patches
On 31/01/12 05:15, Richard Henderson wrote:
> I noticed this accidentally, while looking for something else.
> There are significant improvements in the DImode multiplication
> and division routines for armv4+.
>
> Despite how trivial this is, I assume this must wait for stage1.
> Ok?
>
>
> r~
>
>
> * longlong.h [arm] (umul_ppmm): Use umull.
> [arm] (count_trailing_zeros): Use __builtin_ctz.
armv3m also has the widening multiply operation (it's what the M stands
for).
Otherwise ok for stage1
R.
>
> diff --git a/libgcc/longlong.h b/libgcc/longlong.h
> index 30cc2e3..7204679 100644
> --- a/libgcc/longlong.h
> +++ b/libgcc/longlong.h
> @@ -220,9 +220,12 @@ UDItype __umulsidi3 (USItype, USItype);
> "rI" ((USItype) (bh)), \
> "r" ((USItype) (al)), \
> "rI" ((USItype) (bl)) __CLOBBER_CC)
> -#define umul_ppmm(xh, xl, a, b) \
> -{register USItype __t0, __t1, __t2; \
> - __asm__ ("%@ Inlined umul_ppmm\n" \
> +# if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
> + || defined(__ARM_ARCH_3__) || defined(__ARM_ARCH_3M__)
> +# define umul_ppmm(xh, xl, a, b) \
> + do { \
> + register USItype __t0, __t1, __t2; \
> + __asm__ ("%@ Inlined umul_ppmm\n" \
> " mov %2, %5, lsr #16\n" \
> " mov %0, %6, lsr #16\n" \
> " bic %3, %5, %2, lsl #16\n" \
> @@ -239,14 +242,26 @@ UDItype __umulsidi3 (USItype, USItype);
> "=r" ((USItype) (xl)), \
> "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \
> : "r" ((USItype) (a)), \
> - "r" ((USItype) (b)) __CLOBBER_CC );}
> -#define UMUL_TIME 20
> -#define UDIV_TIME 100
> + "r" ((USItype) (b)) __CLOBBER_CC ); \
> + } while (0)
> +# define UMUL_TIME 20
> +# else
> +# define umul_ppmm(xh, xl, a, b) \
> + do { \
> + /* Generate umull, under compiler control. */ \
> + register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b); \
> + (xl) = (USItype)__t0; \
> + (xh) = (USItype)(__t0 >> 32); \
> + } while (0)
> +# define UMUL_TIME 3
> +# endif
> +# define UDIV_TIME 100
> #endif /* __arm__ */
>
> #if defined(__arm__)
> /* Let gcc decide how best to implement count_leading_zeros. */
> #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
> +#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
> #define COUNT_LEADING_ZEROS_0 32
> #endif
>
>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [arm] Improve longlong.h umul_ppmm, count_trailing_zeros
2012-02-01 13:24 ` Richard Earnshaw
@ 2012-03-20 14:56 ` Richard Earnshaw
0 siblings, 0 replies; 3+ messages in thread
From: Richard Earnshaw @ 2012-03-20 14:56 UTC (permalink / raw)
To: Richard Henderson; +Cc: GCC Patches
On 01/02/12 13:23, Richard Earnshaw wrote:
> On 31/01/12 05:15, Richard Henderson wrote:
>> Despite how trivial this is, I assume this must wait for stage1.
>> Ok?
>>
>>
>> r~
>>
>>
>> * longlong.h [arm] (umul_ppmm): Use umull.
>> [arm] (count_trailing_zeros): Use __builtin_ctz.
>
> armv3m also has the widening multiply operation (it's what the M stands
> for).
>
> Otherwise ok for stage1
>
And it's a good job we did. I've just noticed that it's broken thumb1
builds of libgcc.
00000000 <__ctzsi2>:
0: b508 push {r3, lr}
2: f7ff fffe bl 0 <__ctzsi2>
2: R_ARM_THM_CALL __ctzsi2
6: bc08 pop {r3}
8: bc02 pop {r1}
a: 4708 bx r1
R.
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2012-03-20 14:56 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-31 5:15 [arm] Improve longlong.h umul_ppmm, count_trailing_zeros Richard Henderson
2012-02-01 13:24 ` Richard Earnshaw
2012-03-20 14:56 ` Richard Earnshaw
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).