public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [ARM] Optimise handling of neon_vget_high/low
@ 2011-09-14 13:51 Richard Sandiford
  2011-09-23 10:00 ` Ramana Radhakrishnan
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Sandiford @ 2011-09-14 13:51 UTC (permalink / raw)
  To: gcc-patches

neon_vget_high and neon_vget_low extract one half of a vector.
The patterns look like:

(define_insn "neon_vget_highv16qi"
  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
	(vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
                         (parallel [(const_int 8) (const_int 9)
			            (const_int 10) (const_int 11)
				    (const_int 12) (const_int 13)
				    (const_int 14) (const_int 15)])))]
  "TARGET_NEON"
{
  int dest = REGNO (operands[0]);
  int src = REGNO (operands[1]);

  if (dest != src + 2)
    return "vmov\t%P0, %f1";
  else
    return "";
}
  [(set_attr "neon_type" "neon_bp_simple")]
)

But there's nothing here to tell the register allocator what's expected
of it, so we do often get the move.  The patch below makes the patterns
expand to normal subreg moves instead.

Unfortunately, when I first tried this, I ran across some bugs in
simplify-rtx.c.  They should be fixed now.  Of course, I can't
guarantee that there are other similar bugs elsewhere, but I'll
try to fix any that crop up.

The new patterns preserve the current treatment on big-endian targets.
Namely, the "low" half is always in the lower-numbered registers
(subreg byte offset 0).

Tested on arm-linux-gnueabi.  OK to install?

Richard


gcc/
	* config/arm/neon.md (neon_vget_highv16qi, neon_vget_highv8hi)
	(neon_vget_highv4si, neon_vget_highv4sf, neon_vget_highv2di)
	(neon_vget_lowv16qi, neon_vget_lowv8hi, neon_vget_lowv4si)
	(neon_vget_lowv4sf, neon_vget_lowv2di): Turn into define_expands
	that produce subreg moves.  Define using VQX iterators.

Index: gcc/config/arm/neon.md
===================================================================
--- gcc/config/arm/neon.md	2011-09-13 13:33:29.000000000 +0100
+++ gcc/config/arm/neon.md	2011-09-13 16:21:23.189304498 +0100
@@ -2969,183 +2969,27 @@ (define_insn "neon_vcombine<mode>"
    (set_attr "neon_type" "neon_bp_simple")]
 )
 
-(define_insn "neon_vget_highv16qi"
-  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
-	(vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
-                         (parallel [(const_int 8) (const_int 9)
-			            (const_int 10) (const_int 11)
-				    (const_int 12) (const_int 13)
-				    (const_int 14) (const_int 15)])))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src + 2)
-    return "vmov\t%P0, %f1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_highv8hi"
-  [(set (match_operand:V4HI 0 "s_register_operand" "=w")
-	(vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
-	                 (parallel [(const_int 4) (const_int 5)
-			            (const_int 6) (const_int 7)])))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src + 2)
-    return "vmov\t%P0, %f1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_highv4si"
-  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
-	(vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
-	                 (parallel [(const_int 2) (const_int 3)])))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src + 2)
-    return "vmov\t%P0, %f1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_highv4sf"
-  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
-	(vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
-	                 (parallel [(const_int 2) (const_int 3)])))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src + 2)
-    return "vmov\t%P0, %f1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_highv2di"
-  [(set (match_operand:DI 0 "s_register_operand" "=w")
-	(vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
-	               (parallel [(const_int 1)])))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src + 2)
-    return "vmov\t%P0, %f1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv16qi"
-  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
-	(vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
-                         (parallel [(const_int 0) (const_int 1)
-			            (const_int 2) (const_int 3)
-				    (const_int 4) (const_int 5)
-				    (const_int 6) (const_int 7)])))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src)
-    return "vmov\t%P0, %e1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv8hi"
-  [(set (match_operand:V4HI 0 "s_register_operand" "=w")
-	(vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
-	                 (parallel [(const_int 0) (const_int 1)
-			            (const_int 2) (const_int 3)])))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src)
-    return "vmov\t%P0, %e1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv4si"
-  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
-	(vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
-	                 (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src)
-    return "vmov\t%P0, %e1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv4sf"
-  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
-	(vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
-	                 (parallel [(const_int 0) (const_int 1)])))]
+(define_expand "neon_vget_high<mode>"
+  [(match_operand:<V_HALF> 0 "s_register_operand")
+   (match_operand:VQX 1 "s_register_operand")]
   "TARGET_NEON"
 {
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src)
-    return "vmov\t%P0, %e1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
+  emit_move_insn (operands[0],
+		  simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
+				       GET_MODE_SIZE (<V_HALF>mode)));
+  DONE;
+})
 
-(define_insn "neon_vget_lowv2di"
-  [(set (match_operand:DI 0 "s_register_operand" "=w")
-	(vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
-	               (parallel [(const_int 0)])))]
+(define_expand "neon_vget_low<mode>"
+  [(match_operand:<V_HALF> 0 "s_register_operand")
+   (match_operand:VQX 1 "s_register_operand")]
   "TARGET_NEON"
 {
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src)
-    return "vmov\t%P0, %e1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
+  emit_move_insn (operands[0],
+		  simplify_gen_subreg (<V_HALF>mode, operands[1],
+				       <MODE>mode, 0));
+  DONE;
+})
 
 (define_insn "neon_vcvt<mode>"
   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [ARM] Optimise handling of neon_vget_high/low
  2011-09-14 13:51 [ARM] Optimise handling of neon_vget_high/low Richard Sandiford
@ 2011-09-23 10:00 ` Ramana Radhakrishnan
  2011-09-28  9:11   ` Richard Sandiford
  0 siblings, 1 reply; 4+ messages in thread
From: Ramana Radhakrishnan @ 2011-09-23 10:00 UTC (permalink / raw)
  To: gcc-patches, richard.sandiford

On 14 September 2011 13:30, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> neon_vget_high and neon_vget_low extract one half of a vector.
> The patterns look like:
>
> (define_insn "neon_vget_highv16qi"
>  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
>        (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
>                         (parallel [(const_int 8) (const_int 9)
>                                    (const_int 10) (const_int 11)
>                                    (const_int 12) (const_int 13)
>                                    (const_int 14) (const_int 15)])))]
>  "TARGET_NEON"
> {
>  int dest = REGNO (operands[0]);
>  int src = REGNO (operands[1]);
>
>  if (dest != src + 2)
>    return "vmov\t%P0, %f1";
>  else
>    return "";
> }
>  [(set_attr "neon_type" "neon_bp_simple")]
> )
>
> But there's nothing here to tell the register allocator what's expected
> of it, so we do often get the move.  The patch below makes the patterns
> expand to normal subreg moves instead.
>
> Unfortunately, when I first tried this, I ran across some bugs in
> simplify-rtx.c.  They should be fixed now.  Of course, I can't
> guarantee that there are other similar bugs elsewhere, but I'll
> try to fix any that crop up.
>
> The new patterns preserve the current treatment on big-endian targets.
> Namely, the "low" half is always in the lower-numbered registers
> (subreg byte offset 0).
>
> Tested on arm-linux-gnueabi.  OK to install?


This is OK . Please watch out for any fallout that comes as a result
of this . It would be useful to do some big endian testing at some
point but I'm not going to let that hold up this patch.

While you are here can you look at the quad_halves_<code>v4si etc.
patterns neon_move_lo_quad_<mode> and neon_move_hi_quad_<mode>
patterns which seem to have the same problem ? And then I remembered
this ancient FIXME:

> ; FIXME: We wouldn't need the following insns if we could write subregs of
> ; vector registers. Make an attempt at removing unnecessary moves, though
> ; we're really at the mercy of the register allocator.
>


But that can be a follow-up patch.

And it's nice to be able to remove such FIXME's :)


cheers
Ramana

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [ARM] Optimise handling of neon_vget_high/low
  2011-09-23 10:00 ` Ramana Radhakrishnan
@ 2011-09-28  9:11   ` Richard Sandiford
  2011-09-28 12:42     ` Ramana Radhakrishnan
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Sandiford @ 2011-09-28  9:11 UTC (permalink / raw)
  To: Ramana Radhakrishnan; +Cc: gcc-patches

Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> writes:
> On 14 September 2011 13:30, Richard Sandiford
> <richard.sandiford@linaro.org> wrote:
>> neon_vget_high and neon_vget_low extract one half of a vector.
>> The patterns look like:
>>
>> (define_insn "neon_vget_highv16qi"
>>  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
>>        (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
>>                         (parallel [(const_int 8) (const_int 9)
>>                                    (const_int 10) (const_int 11)
>>                                    (const_int 12) (const_int 13)
>>                                    (const_int 14) (const_int 15)])))]
>>  "TARGET_NEON"
>> {
>>  int dest = REGNO (operands[0]);
>>  int src = REGNO (operands[1]);
>>
>>  if (dest != src + 2)
>>    return "vmov\t%P0, %f1";
>>  else
>>    return "";
>> }
>>  [(set_attr "neon_type" "neon_bp_simple")]
>> )
>>
>> But there's nothing here to tell the register allocator what's expected
>> of it, so we do often get the move.  The patch below makes the patterns
>> expand to normal subreg moves instead.
>>
>> Unfortunately, when I first tried this, I ran across some bugs in
>> simplify-rtx.c.  They should be fixed now.  Of course, I can't
>> guarantee that there are other similar bugs elsewhere, but I'll
>> try to fix any that crop up.
>>
>> The new patterns preserve the current treatment on big-endian targets.
>> Namely, the "low" half is always in the lower-numbered registers
>> (subreg byte offset 0).
>>
>> Tested on arm-linux-gnueabi.  OK to install?
>
>
> This is OK . Please watch out for any fallout that comes as a result
> of this . It would be useful to do some big endian testing at some
> point but I'm not going to let that hold up this patch.
>
> While you are here can you look at the quad_halves_<code>v4si etc.
> patterns neon_move_lo_quad_<mode> and neon_move_hi_quad_<mode>
> patterns which seem to have the same problem ?

Ah, yeah, thanks for the pointer.

Tested on arm-linux-gnueabi, and on benchmarks which should (and did)
benefit from it.  OK to install?

Richard


gcc/
	* config/arm/neon.md (neon_move_lo_quad_<mode>): Delete.
	(neon_move_hi_quad_<mode>): Likewise.
	(move_hi_quad_<mode>, move_lo_quad_<mode>): Use subreg moves.

Index: gcc/config/arm/neon.md
===================================================================
--- gcc/config/arm/neon.md	2011-09-27 09:27:18.000000000 +0100
+++ gcc/config/arm/neon.md	2011-09-27 09:45:26.008275971 +0100
@@ -1251,66 +1251,14 @@ (define_insn "quad_halves_<code>v16qi"
                     (const_string "neon_int_1") (const_string "neon_int_5")))]
 )
 
-; FIXME: We wouldn't need the following insns if we could write subregs of
-; vector registers. Make an attempt at removing unnecessary moves, though
-; we're really at the mercy of the register allocator.
-
-(define_insn "neon_move_lo_quad_<mode>"
-  [(set (match_operand:ANY128 0 "s_register_operand" "+w")
-        (vec_concat:ANY128
-          (match_operand:<V_HALF> 1 "s_register_operand" "w")
-          (vec_select:<V_HALF> 
-		(match_dup 0)
-	        (match_operand:ANY128 2 "vect_par_constant_high" ""))))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src)
-    return "vmov\t%e0, %P1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_move_hi_quad_<mode>"
-  [(set (match_operand:ANY128 0 "s_register_operand" "+w")
-        (vec_concat:ANY128
-          (vec_select:<V_HALF>
-		(match_dup 0)
-	        (match_operand:ANY128 2 "vect_par_constant_low" ""))
-          (match_operand:<V_HALF> 1 "s_register_operand" "w")))]
-	   
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src)
-    return "vmov\t%f0, %P1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
 (define_expand "move_hi_quad_<mode>"
  [(match_operand:ANY128 0 "s_register_operand" "")
   (match_operand:<V_HALF> 1 "s_register_operand" "")]
  "TARGET_NEON"
 {
-  rtvec v = rtvec_alloc (<V_mode_nunits>/2);
-  rtx t1;
-  int i;
-
-  for (i=0; i < (<V_mode_nunits>/2); i++)
-     RTVEC_ELT (v, i) = GEN_INT (i);
-
-  t1 = gen_rtx_PARALLEL (<MODE>mode, v);
-  emit_insn (gen_neon_move_hi_quad_<mode> (operands[0], operands[1], t1));
-
+  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
+				       GET_MODE_SIZE (<V_HALF>mode)),
+		  operands[1]);
   DONE;
 })
 
@@ -1319,16 +1267,9 @@ (define_expand "move_lo_quad_<mode>"
   (match_operand:<V_HALF> 1 "s_register_operand" "")]
  "TARGET_NEON"
 {
-  rtvec v = rtvec_alloc (<V_mode_nunits>/2);
-  rtx t1;
-  int i;
-
-  for (i=0; i < (<V_mode_nunits>/2); i++)
-     RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
-
-  t1 = gen_rtx_PARALLEL (<MODE>mode, v);
-  emit_insn (gen_neon_move_lo_quad_<mode> (operands[0], operands[1], t1));
-
+  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
+				       <MODE>mode, 0),
+		  operands[1]);
   DONE;
 })
 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [ARM] Optimise handling of neon_vget_high/low
  2011-09-28  9:11   ` Richard Sandiford
@ 2011-09-28 12:42     ` Ramana Radhakrishnan
  0 siblings, 0 replies; 4+ messages in thread
From: Ramana Radhakrishnan @ 2011-09-28 12:42 UTC (permalink / raw)
  To: Ramana Radhakrishnan, gcc-patches, richard.sandiford

>
> Tested on arm-linux-gnueabi, and on benchmarks which should (and did)
> benefit from it.  OK to install?

OK.

Ramana

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2011-09-28 11:35 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-14 13:51 [ARM] Optimise handling of neon_vget_high/low Richard Sandiford
2011-09-23 10:00 ` Ramana Radhakrishnan
2011-09-28  9:11   ` Richard Sandiford
2011-09-28 12:42     ` Ramana Radhakrishnan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).