[PATCH, ARM] use vmov.i64 to load 0 into FP reg if neon enabled

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH, ARM] use vmov.i64 to load 0 into FP reg if neon enabled
@ 2016-05-05 21:37 Jim Wilson
  2016-05-06 14:29 ` Kyrill Tkachov
  0 siblings, 1 reply; 3+ messages in thread
From: Jim Wilson @ 2016-05-05 21:37 UTC (permalink / raw)
  To: gcc-patches; +Cc: Jim Wilson

[-- Attachment #1: Type: text/plain, Size: 1173 bytes --]

For this simple testcase

double
sub (void)
{
  return 0.0;
}

Without the attached patch, an ARM compiler with neon support enabled, gives
     vldr.64 d0, .L2
With the attached patch, an ARM compiler with neon enabled, gives
     vmov.i64 d0, #0@ float
which is faster and smaller, as there is no load from a constant pool entry.

There are a few ways to implement this.  I added a neon enabled
attribute.  Another way to do this would be a new constraint, like Dg,
that tests for both neon and 0.

I don't see any mention of targets that only support single-float in
the ARM ARM, so it isn't obvious how to handle that.  I see no targets
that support both neon and single-float, but maybe I need to check for
that anyways?

Most of the patch involves renumbering constraints and matching
attributes.  The new alternative w/G must come before w/UvF or else we
still get a constant pool reference.  Otherwise the patch is pretty
small and simple.

We can do the same thing in the movdi pattern.  I haven't tried
writing that yet.

This patch was tested with a bootstrap and make check in an armhf
schroot on an xgene box.  There were no regressions.

OK to check in?

Jim

[-- Attachment #2: arm-vmov-i64.patch --]
[-- Type: text/x-patch, Size: 5515 bytes --]

	* config/arm/arm.md: (arch): Add neon.
	(arch_enabled): Return yes for arch neon when TARGET_NEON.
	* config/arm/vfp.md (movdf_vfp): Add w/G as alternative 3.  Add
	neon_move as type for alt 3.  Add arch attr enabling alt 3 for neon.
	Emit vmov.i64 for alt 3.  Renumber alternatives 3 to 8.  Adjust
	attributes for alt renumbering.  Mark alt 3 as non-predicable.
	(thumb2_movdf_vfp): Likewise.

Index: config/arm/arm.md
===================================================================
--- config/arm/arm.md	(revision 235793)
+++ config/arm/arm.md	(working copy)
@@ -121,7 +121,7 @@
 ; arm_arch6.  "v6t2" for Thumb-2 with arm_arch6.  This attribute is
 ; used to compute attribute "enabled", use type "any" to enable an
 ; alternative in all cases.
-(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2,armv6_or_vfpv3"
+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2,armv6_or_vfpv3,neon"
   (const_string "any"))
 
 (define_attr "arch_enabled" "no,yes"
@@ -177,6 +177,10 @@
 	 (and (eq_attr "arch" "armv6_or_vfpv3")
 	      (match_test "arm_arch6 || TARGET_VFP3"))
 	 (const_string "yes")
+
+	 (and (eq_attr "arch" "neon")
+	      (match_test "TARGET_NEON"))
+	 (const_string "yes")
 	]
 
 	(const_string "no")))
Index: config/arm/vfp.md
===================================================================
--- config/arm/vfp.md	(revision 235793)
+++ config/arm/vfp.md	(working copy)
@@ -394,8 +394,8 @@
 ;; DFmode moves
 
 (define_insn "*movdf_vfp"
-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w  ,Uv,r, m,w,r")
-	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,UvF,w ,mF,r,w,r"))]
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w  ,Uv,r, m,w,r")
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,G,UvF,w ,mF,r,w,r"))]
   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
    && (   register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
@@ -410,16 +410,18 @@
       case 2:
 	gcc_assert (TARGET_VFP_DOUBLE);
         return \"vmov%?.f64\\t%P0, %1\";
-      case 3: case 4:
+      case 3:
+	return \"vmov.i64\\t%P0, #0@ float\";
+      case 4: case 5:
 	return output_move_vfp (operands);
-      case 5: case 6:
+      case 6: case 7:
 	return output_move_double (operands, true, NULL);
-      case 7:
+      case 8:
 	if (TARGET_VFP_SINGLE)
 	  return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
 	else
 	  return \"vmov%?.f64\\t%P0, %P1\";
-      case 8:
+      case 9:
         return \"#\";
       default:
 	gcc_unreachable ();
@@ -426,23 +428,24 @@
       }
     }
   "
-  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,f_stored,\
+  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,f_stored,\
                      load2,store2,ffarithd,multiple")
-   (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
-			       (eq_attr "alternative" "7")
+   (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8)
+			       (eq_attr "alternative" "8")
 				(if_then_else
 				 (match_test "TARGET_VFP_SINGLE")
 				 (const_int 8)
 				 (const_int 4))]
 			      (const_int 4)))
-   (set_attr "predicable" "yes")
-   (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*")
-   (set_attr "neg_pool_range" "*,*,*,1004,*,1004,*,*,*")]
+   (set_attr "predicable" "yes,yes,yes,no,yes,yes,yes,yes,yes,yes")
+   (set_attr "pool_range" "*,*,*,*,1020,*,1020,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,*,1004,*,1004,*,*,*")
+   (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")]
 )
 
 (define_insn "*thumb2_movdf_vfp"
-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w  ,Uv,r ,m,w,r")
-	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,UvF,w, mF,r, w,r"))]
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w  ,Uv,r ,m,w,r")
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,G,UvF,w, mF,r, w,r"))]
   "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP
    && (   register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
@@ -457,11 +460,13 @@
       case 2:
 	gcc_assert (TARGET_VFP_DOUBLE);
 	return \"vmov%?.f64\\t%P0, %1\";
-      case 3: case 4:
+      case 3:
+	return \"vmov.i64\\t%P0, #0@ float\";
+      case 4: case 5:
 	return output_move_vfp (operands);
-      case 5: case 6: case 8:
+      case 6: case 7: case 9:
 	return output_move_double (operands, true, NULL);
-      case 7:
+      case 8:
 	if (TARGET_VFP_SINGLE)
 	  return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
 	else
@@ -471,17 +476,18 @@
       }
     }
   "
-  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,\
+  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,\
                      f_stored,load2,store2,ffarithd,multiple")
-   (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
-			       (eq_attr "alternative" "7")
+   (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8)
+			       (eq_attr "alternative" "8")
 				(if_then_else
 				 (match_test "TARGET_VFP_SINGLE")
 				 (const_int 8)
 				 (const_int 4))]
 			      (const_int 4)))
-   (set_attr "pool_range" "*,*,*,1018,*,4094,*,*,*")
-   (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
+   (set_attr "pool_range" "*,*,*,*,1018,*,4094,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,*,1008,*,0,*,*,*")
+   (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")]
 )
 
 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH, ARM] use vmov.i64 to load 0 into FP reg if neon enabled
  2016-05-05 21:37 [PATCH, ARM] use vmov.i64 to load 0 into FP reg if neon enabled Jim Wilson
@ 2016-05-06 14:29 ` Kyrill Tkachov
  2016-05-07 23:12   ` Jim Wilson
  0 siblings, 1 reply; 3+ messages in thread
From: Kyrill Tkachov @ 2016-05-06 14:29 UTC (permalink / raw)
  To: Jim Wilson, gcc-patches

Hi Jim,

On 05/05/16 22:37, Jim Wilson wrote:
> For this simple testcase
>
> double
> sub (void)
> {
>    return 0.0;
> }
>
> Without the attached patch, an ARM compiler with neon support enabled, gives
>       vldr.64 d0, .L2
> With the attached patch, an ARM compiler with neon enabled, gives
>       vmov.i64 d0, #0@ float
> which is faster and smaller, as there is no load from a constant pool entry.
>
> There are a few ways to implement this.  I added a neon enabled
> attribute.  Another way to do this would be a new constraint, like Dg,
> that tests for both neon and 0.

Good idea.

> I don't see any mention of targets that only support single-float in
> the ARM ARM, so it isn't obvious how to handle that.  I see no targets
> that support both neon and single-float, but maybe I need to check for
> that anyways?
I don't think we have any.

I think adding a gcc_assert (TARGET_VFP_DOUBLE); to the
alternative you're adding would be the way to go.
We already have case 2 in the *movdf_vfp pattern that does that.

> Most of the patch involves renumbering constraints and matching
> attributes.  The new alternative w/G must come before w/UvF or else we
> still get a constant pool reference.  Otherwise the patch is pretty
> small and simple.
>
> We can do the same thing in the movdi pattern.  I haven't tried
> writing that yet.
>
> This patch was tested with a bootstrap and make check in an armhf
> schroot on an xgene box.  There were no regressions.

Since you're modifying the both the ARM and Thumb2 pattern
can you please do two bootstrap and tests, one with --with-mode=arm
and one with --with-mode=thumb.

> OK to check in?

Ok after adding the assert mentioned above, the arm/thumb testing and fixing
a minor nit below...


@@ -410,16 +410,18 @@
        case 2:
  	gcc_assert (TARGET_VFP_DOUBLE);
          return \"vmov%?.f64\\t%P0, %1\";
-      case 3: case 4:
+      case 3:
+	return \"vmov.i64\\t%P0, #0@ float\";
+      case 4: case 5:


Please add a tab before the "@float" comment i.e. "\\t%@ float".

Thanks for working on this,
Kyrill

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH, ARM] use vmov.i64 to load 0 into FP reg if neon enabled
  2016-05-06 14:29 ` Kyrill Tkachov
@ 2016-05-07 23:12   ` Jim Wilson
  0 siblings, 0 replies; 3+ messages in thread
From: Jim Wilson @ 2016-05-07 23:12 UTC (permalink / raw)
  To: Kyrill Tkachov; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 601 bytes --]

On Fri, May 6, 2016 at 7:29 AM, Kyrill Tkachov
<kyrylo.tkachov@foss.arm.com> wrote:
> Since you're modifying the both the ARM and Thumb2 pattern
> can you please do two bootstrap and tests, one with --with-mode=arm
> and one with --with-mode=thumb.

> Ok after adding the assert mentioned above, the arm/thumb testing and fixing
> a minor nit below...
>
> Please add a tab before the "@float" comment i.e. "\\t%@ float".

I updated the patch as requested, and remembered to test both arm and
thumb mode this time, both passed without regression.  I checked in
the patch.  Revised patch attached.

Jim

[-- Attachment #2: arm-vmov-i64.patch.2 --]
[-- Type: application/octet-stream, Size: 5591 bytes --]

	* config/arm/arm.md: (arch): Add neon.
	(arch_enabled): Return yes for arch neon when TARGET_NEON.
	* config/arm/vfp.md (movdf_vfp): Add w/G as alternative 3.  Add
	neon_move as type for alt 3.  Add arch attr enabling alt 3 for neon.
	Emit vmov.i64 for alt 3.  Renumber alternatives 3 to 8.  Adjust
	attributes for alt renumbering.  Mark alt 3 as non-predicable.
	(thumb2_movdf_vfp): Likewise.

Index: config/arm/arm.md
===================================================================
--- config/arm/arm.md	(revision 235793)
+++ config/arm/arm.md	(working copy)
@@ -121,7 +121,7 @@
 ; arm_arch6.  "v6t2" for Thumb-2 with arm_arch6.  This attribute is
 ; used to compute attribute "enabled", use type "any" to enable an
 ; alternative in all cases.
-(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2,armv6_or_vfpv3"
+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2,armv6_or_vfpv3,neon"
   (const_string "any"))
 
 (define_attr "arch_enabled" "no,yes"
@@ -177,6 +177,10 @@
 	 (and (eq_attr "arch" "armv6_or_vfpv3")
 	      (match_test "arm_arch6 || TARGET_VFP3"))
 	 (const_string "yes")
+
+	 (and (eq_attr "arch" "neon")
+	      (match_test "TARGET_NEON"))
+	 (const_string "yes")
 	]
 
 	(const_string "no")))
Index: config/arm/vfp.md
===================================================================
--- config/arm/vfp.md	(revision 235793)
+++ config/arm/vfp.md	(working copy)
@@ -394,8 +394,8 @@
 ;; DFmode moves
 
 (define_insn "*movdf_vfp"
-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w  ,Uv,r, m,w,r")
-	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,UvF,w ,mF,r,w,r"))]
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w  ,Uv,r, m,w,r")
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,G,UvF,w ,mF,r,w,r"))]
   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
    && (   register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
@@ -410,16 +410,19 @@
       case 2:
 	gcc_assert (TARGET_VFP_DOUBLE);
         return \"vmov%?.f64\\t%P0, %1\";
-      case 3: case 4:
+      case 3:
+	gcc_assert (TARGET_VFP_DOUBLE);
+	return \"vmov.i64\\t%P0, #0\\t%@ float\";
+      case 4: case 5:
 	return output_move_vfp (operands);
-      case 5: case 6:
+      case 6: case 7:
 	return output_move_double (operands, true, NULL);
-      case 7:
+      case 8:
 	if (TARGET_VFP_SINGLE)
 	  return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
 	else
 	  return \"vmov%?.f64\\t%P0, %P1\";
-      case 8:
+      case 9:
         return \"#\";
       default:
 	gcc_unreachable ();
@@ -426,23 +429,24 @@
       }
     }
   "
-  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,f_stored,\
+  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,f_stored,\
                      load2,store2,ffarithd,multiple")
-   (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
-			       (eq_attr "alternative" "7")
+   (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8)
+			       (eq_attr "alternative" "8")
 				(if_then_else
 				 (match_test "TARGET_VFP_SINGLE")
 				 (const_int 8)
 				 (const_int 4))]
 			      (const_int 4)))
-   (set_attr "predicable" "yes")
-   (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*")
-   (set_attr "neg_pool_range" "*,*,*,1004,*,1004,*,*,*")]
+   (set_attr "predicable" "yes,yes,yes,no,yes,yes,yes,yes,yes,yes")
+   (set_attr "pool_range" "*,*,*,*,1020,*,1020,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,*,1004,*,1004,*,*,*")
+   (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")]
 )
 
 (define_insn "*thumb2_movdf_vfp"
-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w  ,Uv,r ,m,w,r")
-	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,UvF,w, mF,r, w,r"))]
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w  ,Uv,r ,m,w,r")
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,G,UvF,w, mF,r, w,r"))]
   "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP
    && (   register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
@@ -457,11 +461,14 @@
       case 2:
 	gcc_assert (TARGET_VFP_DOUBLE);
 	return \"vmov%?.f64\\t%P0, %1\";
-      case 3: case 4:
+      case 3:
+	gcc_assert (TARGET_VFP_DOUBLE);
+	return \"vmov.i64\\t%P0, #0\\t%@ float\";
+      case 4: case 5:
 	return output_move_vfp (operands);
-      case 5: case 6: case 8:
+      case 6: case 7: case 9:
 	return output_move_double (operands, true, NULL);
-      case 7:
+      case 8:
 	if (TARGET_VFP_SINGLE)
 	  return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
 	else
@@ -471,17 +478,18 @@
       }
     }
   "
-  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,\
+  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,\
                      f_stored,load2,store2,ffarithd,multiple")
-   (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
-			       (eq_attr "alternative" "7")
+   (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8)
+			       (eq_attr "alternative" "8")
 				(if_then_else
 				 (match_test "TARGET_VFP_SINGLE")
 				 (const_int 8)
 				 (const_int 4))]
 			      (const_int 4)))
-   (set_attr "pool_range" "*,*,*,1018,*,4094,*,*,*")
-   (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
+   (set_attr "pool_range" "*,*,*,*,1018,*,4094,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,*,1008,*,0,*,*,*")
+   (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")]
 )
 
 

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-05-07 23:12 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-05 21:37 [PATCH, ARM] use vmov.i64 to load 0 into FP reg if neon enabled Jim Wilson
2016-05-06 14:29 ` Kyrill Tkachov
2016-05-07 23:12   ` Jim Wilson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).