* [PATCH, ARM] use vmov.i64 to load 0 into FP reg if neon enabled
@ 2016-05-05 21:37 Jim Wilson
2016-05-06 14:29 ` Kyrill Tkachov
0 siblings, 1 reply; 3+ messages in thread
From: Jim Wilson @ 2016-05-05 21:37 UTC (permalink / raw)
To: gcc-patches; +Cc: Jim Wilson
[-- Attachment #1: Type: text/plain, Size: 1173 bytes --]
For this simple testcase
double
sub (void)
{
return 0.0;
}
Without the attached patch, an ARM compiler with neon support enabled, gives
vldr.64 d0, .L2
With the attached patch, an ARM compiler with neon enabled, gives
vmov.i64 d0, #0@ float
which is faster and smaller, as there is no load from a constant pool entry.
There are a few ways to implement this. I added a neon enabled
attribute. Another way to do this would be a new constraint, like Dg,
that tests for both neon and 0.
I don't see any mention of targets that only support single-float in
the ARM ARM, so it isn't obvious how to handle that. I see no targets
that support both neon and single-float, but maybe I need to check for
that anyways?
Most of the patch involves renumbering constraints and matching
attributes. The new alternative w/G must come before w/UvF or else we
still get a constant pool reference. Otherwise the patch is pretty
small and simple.
We can do the same thing in the movdi pattern. I haven't tried
writing that yet.
This patch was tested with a bootstrap and make check in an armhf
schroot on an xgene box. There were no regressions.
OK to check in?
Jim
[-- Attachment #2: arm-vmov-i64.patch --]
[-- Type: text/x-patch, Size: 5515 bytes --]
* config/arm/arm.md: (arch): Add neon.
(arch_enabled): Return yes for arch neon when TARGET_NEON.
* config/arm/vfp.md (movdf_vfp): Add w/G as alternative 3. Add
neon_move as type for alt 3. Add arch attr enabling alt 3 for neon.
Emit vmov.i64 for alt 3. Renumber alternatives 3 to 8. Adjust
attributes for alt renumbering. Mark alt 3 as non-predicable.
(thumb2_movdf_vfp): Likewise.
Index: config/arm/arm.md
===================================================================
--- config/arm/arm.md (revision 235793)
+++ config/arm/arm.md (working copy)
@@ -121,7 +121,7 @@
; arm_arch6. "v6t2" for Thumb-2 with arm_arch6. This attribute is
; used to compute attribute "enabled", use type "any" to enable an
; alternative in all cases.
-(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2,armv6_or_vfpv3"
+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2,armv6_or_vfpv3,neon"
(const_string "any"))
(define_attr "arch_enabled" "no,yes"
@@ -177,6 +177,10 @@
(and (eq_attr "arch" "armv6_or_vfpv3")
(match_test "arm_arch6 || TARGET_VFP3"))
(const_string "yes")
+
+ (and (eq_attr "arch" "neon")
+ (match_test "TARGET_NEON"))
+ (const_string "yes")
]
(const_string "no")))
Index: config/arm/vfp.md
===================================================================
--- config/arm/vfp.md (revision 235793)
+++ config/arm/vfp.md (working copy)
@@ -394,8 +394,8 @@
;; DFmode moves
(define_insn "*movdf_vfp"
- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r, m,w,r")
- (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w ,mF,r,w,r"))]
+ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w ,Uv,r, m,w,r")
+ (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,G,UvF,w ,mF,r,w,r"))]
"TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
&& ( register_operand (operands[0], DFmode)
|| register_operand (operands[1], DFmode))"
@@ -410,16 +410,18 @@
case 2:
gcc_assert (TARGET_VFP_DOUBLE);
return \"vmov%?.f64\\t%P0, %1\";
- case 3: case 4:
+ case 3:
+ return \"vmov.i64\\t%P0, #0@ float\";
+ case 4: case 5:
return output_move_vfp (operands);
- case 5: case 6:
+ case 6: case 7:
return output_move_double (operands, true, NULL);
- case 7:
+ case 8:
if (TARGET_VFP_SINGLE)
return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
else
return \"vmov%?.f64\\t%P0, %P1\";
- case 8:
+ case 9:
return \"#\";
default:
gcc_unreachable ();
@@ -426,23 +428,24 @@
}
}
"
- [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,f_stored,\
+ [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,f_stored,\
load2,store2,ffarithd,multiple")
- (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
- (eq_attr "alternative" "7")
+ (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8)
+ (eq_attr "alternative" "8")
(if_then_else
(match_test "TARGET_VFP_SINGLE")
(const_int 8)
(const_int 4))]
(const_int 4)))
- (set_attr "predicable" "yes")
- (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*")
- (set_attr "neg_pool_range" "*,*,*,1004,*,1004,*,*,*")]
+ (set_attr "predicable" "yes,yes,yes,no,yes,yes,yes,yes,yes,yes")
+ (set_attr "pool_range" "*,*,*,*,1020,*,1020,*,*,*")
+ (set_attr "neg_pool_range" "*,*,*,*,1004,*,1004,*,*,*")
+ (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")]
)
(define_insn "*thumb2_movdf_vfp"
- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r ,m,w,r")
- (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w, mF,r, w,r"))]
+ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w ,Uv,r ,m,w,r")
+ (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,G,UvF,w, mF,r, w,r"))]
"TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP
&& ( register_operand (operands[0], DFmode)
|| register_operand (operands[1], DFmode))"
@@ -457,11 +460,13 @@
case 2:
gcc_assert (TARGET_VFP_DOUBLE);
return \"vmov%?.f64\\t%P0, %1\";
- case 3: case 4:
+ case 3:
+ return \"vmov.i64\\t%P0, #0@ float\";
+ case 4: case 5:
return output_move_vfp (operands);
- case 5: case 6: case 8:
+ case 6: case 7: case 9:
return output_move_double (operands, true, NULL);
- case 7:
+ case 8:
if (TARGET_VFP_SINGLE)
return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
else
@@ -471,17 +476,18 @@
}
}
"
- [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,\
+ [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,\
f_stored,load2,store2,ffarithd,multiple")
- (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
- (eq_attr "alternative" "7")
+ (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8)
+ (eq_attr "alternative" "8")
(if_then_else
(match_test "TARGET_VFP_SINGLE")
(const_int 8)
(const_int 4))]
(const_int 4)))
- (set_attr "pool_range" "*,*,*,1018,*,4094,*,*,*")
- (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
+ (set_attr "pool_range" "*,*,*,*,1018,*,4094,*,*,*")
+ (set_attr "neg_pool_range" "*,*,*,*,1008,*,0,*,*,*")
+ (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")]
)
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH, ARM] use vmov.i64 to load 0 into FP reg if neon enabled
2016-05-05 21:37 [PATCH, ARM] use vmov.i64 to load 0 into FP reg if neon enabled Jim Wilson
@ 2016-05-06 14:29 ` Kyrill Tkachov
2016-05-07 23:12 ` Jim Wilson
0 siblings, 1 reply; 3+ messages in thread
From: Kyrill Tkachov @ 2016-05-06 14:29 UTC (permalink / raw)
To: Jim Wilson, gcc-patches
Hi Jim,
On 05/05/16 22:37, Jim Wilson wrote:
> For this simple testcase
>
> double
> sub (void)
> {
> return 0.0;
> }
>
> Without the attached patch, an ARM compiler with neon support enabled, gives
> vldr.64 d0, .L2
> With the attached patch, an ARM compiler with neon enabled, gives
> vmov.i64 d0, #0@ float
> which is faster and smaller, as there is no load from a constant pool entry.
>
> There are a few ways to implement this. I added a neon enabled
> attribute. Another way to do this would be a new constraint, like Dg,
> that tests for both neon and 0.
Good idea.
> I don't see any mention of targets that only support single-float in
> the ARM ARM, so it isn't obvious how to handle that. I see no targets
> that support both neon and single-float, but maybe I need to check for
> that anyways?
I don't think we have any.
I think adding a gcc_assert (TARGET_VFP_DOUBLE); to the
alternative you're adding would be the way to go.
We already have case 2 in the *movdf_vfp pattern that does that.
> Most of the patch involves renumbering constraints and matching
> attributes. The new alternative w/G must come before w/UvF or else we
> still get a constant pool reference. Otherwise the patch is pretty
> small and simple.
>
> We can do the same thing in the movdi pattern. I haven't tried
> writing that yet.
>
> This patch was tested with a bootstrap and make check in an armhf
> schroot on an xgene box. There were no regressions.
Since you're modifying the both the ARM and Thumb2 pattern
can you please do two bootstrap and tests, one with --with-mode=arm
and one with --with-mode=thumb.
> OK to check in?
Ok after adding the assert mentioned above, the arm/thumb testing and fixing
a minor nit below...
@@ -410,16 +410,18 @@
case 2:
gcc_assert (TARGET_VFP_DOUBLE);
return \"vmov%?.f64\\t%P0, %1\";
- case 3: case 4:
+ case 3:
+ return \"vmov.i64\\t%P0, #0@ float\";
+ case 4: case 5:
Please add a tab before the "@float" comment i.e. "\\t%@ float".
Thanks for working on this,
Kyrill
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH, ARM] use vmov.i64 to load 0 into FP reg if neon enabled
2016-05-06 14:29 ` Kyrill Tkachov
@ 2016-05-07 23:12 ` Jim Wilson
0 siblings, 0 replies; 3+ messages in thread
From: Jim Wilson @ 2016-05-07 23:12 UTC (permalink / raw)
To: Kyrill Tkachov; +Cc: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 601 bytes --]
On Fri, May 6, 2016 at 7:29 AM, Kyrill Tkachov
<kyrylo.tkachov@foss.arm.com> wrote:
> Since you're modifying the both the ARM and Thumb2 pattern
> can you please do two bootstrap and tests, one with --with-mode=arm
> and one with --with-mode=thumb.
> Ok after adding the assert mentioned above, the arm/thumb testing and fixing
> a minor nit below...
>
> Please add a tab before the "@float" comment i.e. "\\t%@ float".
I updated the patch as requested, and remembered to test both arm and
thumb mode this time, both passed without regression. I checked in
the patch. Revised patch attached.
Jim
[-- Attachment #2: arm-vmov-i64.patch.2 --]
[-- Type: application/octet-stream, Size: 5591 bytes --]
* config/arm/arm.md: (arch): Add neon.
(arch_enabled): Return yes for arch neon when TARGET_NEON.
* config/arm/vfp.md (movdf_vfp): Add w/G as alternative 3. Add
neon_move as type for alt 3. Add arch attr enabling alt 3 for neon.
Emit vmov.i64 for alt 3. Renumber alternatives 3 to 8. Adjust
attributes for alt renumbering. Mark alt 3 as non-predicable.
(thumb2_movdf_vfp): Likewise.
Index: config/arm/arm.md
===================================================================
--- config/arm/arm.md (revision 235793)
+++ config/arm/arm.md (working copy)
@@ -121,7 +121,7 @@
; arm_arch6. "v6t2" for Thumb-2 with arm_arch6. This attribute is
; used to compute attribute "enabled", use type "any" to enable an
; alternative in all cases.
-(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2,armv6_or_vfpv3"
+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2,armv6_or_vfpv3,neon"
(const_string "any"))
(define_attr "arch_enabled" "no,yes"
@@ -177,6 +177,10 @@
(and (eq_attr "arch" "armv6_or_vfpv3")
(match_test "arm_arch6 || TARGET_VFP3"))
(const_string "yes")
+
+ (and (eq_attr "arch" "neon")
+ (match_test "TARGET_NEON"))
+ (const_string "yes")
]
(const_string "no")))
Index: config/arm/vfp.md
===================================================================
--- config/arm/vfp.md (revision 235793)
+++ config/arm/vfp.md (working copy)
@@ -394,8 +394,8 @@
;; DFmode moves
(define_insn "*movdf_vfp"
- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r, m,w,r")
- (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w ,mF,r,w,r"))]
+ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w ,Uv,r, m,w,r")
+ (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,G,UvF,w ,mF,r,w,r"))]
"TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
&& ( register_operand (operands[0], DFmode)
|| register_operand (operands[1], DFmode))"
@@ -410,16 +410,19 @@
case 2:
gcc_assert (TARGET_VFP_DOUBLE);
return \"vmov%?.f64\\t%P0, %1\";
- case 3: case 4:
+ case 3:
+ gcc_assert (TARGET_VFP_DOUBLE);
+ return \"vmov.i64\\t%P0, #0\\t%@ float\";
+ case 4: case 5:
return output_move_vfp (operands);
- case 5: case 6:
+ case 6: case 7:
return output_move_double (operands, true, NULL);
- case 7:
+ case 8:
if (TARGET_VFP_SINGLE)
return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
else
return \"vmov%?.f64\\t%P0, %P1\";
- case 8:
+ case 9:
return \"#\";
default:
gcc_unreachable ();
@@ -426,23 +429,24 @@
}
}
"
- [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,f_stored,\
+ [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,f_stored,\
load2,store2,ffarithd,multiple")
- (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
- (eq_attr "alternative" "7")
+ (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8)
+ (eq_attr "alternative" "8")
(if_then_else
(match_test "TARGET_VFP_SINGLE")
(const_int 8)
(const_int 4))]
(const_int 4)))
- (set_attr "predicable" "yes")
- (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*")
- (set_attr "neg_pool_range" "*,*,*,1004,*,1004,*,*,*")]
+ (set_attr "predicable" "yes,yes,yes,no,yes,yes,yes,yes,yes,yes")
+ (set_attr "pool_range" "*,*,*,*,1020,*,1020,*,*,*")
+ (set_attr "neg_pool_range" "*,*,*,*,1004,*,1004,*,*,*")
+ (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")]
)
(define_insn "*thumb2_movdf_vfp"
- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r ,m,w,r")
- (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w, mF,r, w,r"))]
+ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w ,Uv,r ,m,w,r")
+ (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,G,UvF,w, mF,r, w,r"))]
"TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP
&& ( register_operand (operands[0], DFmode)
|| register_operand (operands[1], DFmode))"
@@ -457,11 +461,14 @@
case 2:
gcc_assert (TARGET_VFP_DOUBLE);
return \"vmov%?.f64\\t%P0, %1\";
- case 3: case 4:
+ case 3:
+ gcc_assert (TARGET_VFP_DOUBLE);
+ return \"vmov.i64\\t%P0, #0\\t%@ float\";
+ case 4: case 5:
return output_move_vfp (operands);
- case 5: case 6: case 8:
+ case 6: case 7: case 9:
return output_move_double (operands, true, NULL);
- case 7:
+ case 8:
if (TARGET_VFP_SINGLE)
return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
else
@@ -471,17 +478,18 @@
}
}
"
- [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,\
+ [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,\
f_stored,load2,store2,ffarithd,multiple")
- (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
- (eq_attr "alternative" "7")
+ (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8)
+ (eq_attr "alternative" "8")
(if_then_else
(match_test "TARGET_VFP_SINGLE")
(const_int 8)
(const_int 4))]
(const_int 4)))
- (set_attr "pool_range" "*,*,*,1018,*,4094,*,*,*")
- (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
+ (set_attr "pool_range" "*,*,*,*,1018,*,4094,*,*,*")
+ (set_attr "neg_pool_range" "*,*,*,*,1008,*,0,*,*,*")
+ (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")]
)
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2016-05-07 23:12 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-05 21:37 [PATCH, ARM] use vmov.i64 to load 0 into FP reg if neon enabled Jim Wilson
2016-05-06 14:29 ` Kyrill Tkachov
2016-05-07 23:12 ` Jim Wilson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).