public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [rs6000] Avoid rotates of floating-point modes
@ 2017-07-12 16:33 Richard Sandiford
  2017-07-13 17:39 ` Segher Boessenkool
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Sandiford @ 2017-07-12 16:33 UTC (permalink / raw)
  To: gcc-patches

The little-endian VSX code uses rotates to swap the two 64-bit halves of
128-bit scalar modes.  This is fine for TImode and V1TImode, but it
isn't really valid to use RTL rotates on floating-point modes like
KFmode and TFmode, and doing that triggered an assert added by the
SVE series.  This patch uses bit-casts to V1TImode instead.

Tested on powerpc64le-linux-gnu.  OK to install?

Richard


2017-07-12  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* config/rs6000/rs6000-protos.h (rs6000_emit_le_vsx_permute): Declare.
	* config/rs6000/rs6000.c (rs6000_gen_le_vsx_permute): Replace with...
	(rs6000_emit_le_vsx_permute): ...this.  Take the destination as input.
	Emit instructions rather than returning an expression.  Handle TFmode
	and KFmode by casting to TImode.
	(rs6000_emit_le_vsx_load): Update to use rs6000_emit_le_vsx_permute.
	(rs6000_emit_le_vsx_store): Likewise.
	* config/rs6000/vsx.md (VSX_LE_128I): New iterator.
	(*vsx_le_permute_<mode>): Use it instead of VSX_LE_128.
	(*vsx_le_undo_permute_<mode>): Likewise.
	(*vsx_le_perm_load_<mode>): Use rs6000_emit_le_vsx_permute to
	emit the split sequence.
	(*vsx_le_perm_store_<mode>): Likewise.

Index: gcc/config/rs6000/rs6000-protos.h
===================================================================
--- gcc/config/rs6000/rs6000-protos.h	2017-06-30 12:50:38.886633045 +0100
+++ gcc/config/rs6000/rs6000-protos.h	2017-07-12 16:30:38.728631839 +0100
@@ -151,6 +151,7 @@ extern rtx rs6000_longcall_ref (rtx);
 extern void rs6000_fatal_bad_address (rtx);
 extern rtx create_TOC_reference (rtx, rtx);
 extern void rs6000_split_multireg_move (rtx, rtx);
+extern void rs6000_emit_le_vsx_permute (rtx, rtx, machine_mode);
 extern void rs6000_emit_le_vsx_move (rtx, rtx, machine_mode);
 extern bool valid_sf_si_move (rtx, rtx, machine_mode);
 extern void rs6000_emit_move (rtx, rtx, machine_mode);
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	2017-07-08 11:37:45.740795846 +0100
+++ gcc/config/rs6000/rs6000.c	2017-07-12 16:30:38.732631678 +0100
@@ -10503,17 +10503,24 @@ rs6000_const_vec (machine_mode mode)
 
 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
    for a VSX load or store operation.  */
-rtx
-rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
+void
+rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
 {
   /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
      128-bit integers if they are allowed in VSX registers.  */
-  if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
-    return gen_rtx_ROTATE (mode, source, GEN_INT (64));
+  if (FLOAT128_VECTOR_P (mode))
+    {
+      dest = gen_lowpart (V1TImode, dest);
+      source = gen_lowpart (V1TImode, source);
+      mode = V1TImode;
+    }
+  if (mode == TImode || mode == V1TImode)
+    emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
+						  GEN_INT (64))));
   else
     {
       rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
-      return gen_rtx_VEC_SELECT (mode, source, par);
+      emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
     }
 }
 
@@ -10523,8 +10530,6 @@ rs6000_gen_le_vsx_permute (rtx source, m
 void
 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
 {
-  rtx tmp, permute_mem, permute_reg;
-
   /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
      V1TImode).  */
   if (mode == TImode || mode == V1TImode)
@@ -10534,11 +10539,9 @@ rs6000_emit_le_vsx_load (rtx dest, rtx s
       source = adjust_address (source, V2DImode, 0);
     }
 
-  tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
-  permute_mem = rs6000_gen_le_vsx_permute (source, mode);
-  permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
-  emit_insn (gen_rtx_SET (tmp, permute_mem));
-  emit_insn (gen_rtx_SET (dest, permute_reg));
+  rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
+  rs6000_emit_le_vsx_permute (tmp, source, mode);
+  rs6000_emit_le_vsx_permute (dest, tmp, mode);
 }
 
 /* Emit a little-endian store to vector memory location DEST from VSX
@@ -10547,8 +10550,6 @@ rs6000_emit_le_vsx_load (rtx dest, rtx s
 void
 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
 {
-  rtx tmp, permute_src, permute_tmp;
-
   /* This should never be called during or after reload, because it does
      not re-permute the source register.  It is intended only for use
      during expand.  */
@@ -10563,11 +10564,9 @@ rs6000_emit_le_vsx_store (rtx dest, rtx
       source = gen_lowpart (V2DImode, source);
     }
 
-  tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
-  permute_src = rs6000_gen_le_vsx_permute (source, mode);
-  permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
-  emit_insn (gen_rtx_SET (tmp, permute_src));
-  emit_insn (gen_rtx_SET (dest, permute_tmp));
+  rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
+  rs6000_emit_le_vsx_permute (tmp, source, mode);
+  rs6000_emit_le_vsx_permute (dest, tmp, mode);
 }
 
 /* Emit a sequence representing a little-endian VSX load or store,
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md	2017-06-30 12:50:38.889632907 +0100
+++ gcc/config/rs6000/vsx.md	2017-07-12 16:30:38.734631598 +0100
@@ -37,6 +37,10 @@ (define_mode_iterator VSX_LE_128 [(KF
 				  (TI	"TARGET_VSX_TIMODE")
 				  V1TI])
 
+;; Same, but with just the integer modes.
+(define_mode_iterator VSX_LE_128I [(TI	"TARGET_VSX_TIMODE")
+				   V1TI])
+
 ;; Iterator for the 2 32-bit vector types
 (define_mode_iterator VSX_W [V4SF V4SI])
 
@@ -750,9 +754,9 @@ (define_split
 ;; special V1TI container class, which it is not appropriate to use vec_select
 ;; for the type.
 (define_insn "*vsx_le_permute_<mode>"
-  [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
-	(rotate:VSX_LE_128
-	 (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>")
+  [(set (match_operand:VSX_LE_128I 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
+	(rotate:VSX_LE_128I
+	 (match_operand:VSX_LE_128I 1 "input_operand" "<VSa>,Z,<VSa>")
 	 (const_int 64)))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "@
@@ -763,10 +767,10 @@ (define_insn "*vsx_le_permute_<mode>"
    (set_attr "type" "vecperm,vecload,vecstore")])
 
 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
-  [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,<VSa>")
-	(rotate:VSX_LE_128
-	 (rotate:VSX_LE_128
-	  (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,<VSa>")
+  [(set (match_operand:VSX_LE_128I 0 "vsx_register_operand" "=<VSa>,<VSa>")
+	(rotate:VSX_LE_128I
+	 (rotate:VSX_LE_128I
+	  (match_operand:VSX_LE_128I 1 "vsx_register_operand" "0,<VSa>")
 	  (const_int 64))
 	 (const_int 64)))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
@@ -791,16 +795,15 @@ (define_insn_and_split "*vsx_le_perm_loa
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
-  [(set (match_dup 2)
-	(rotate:VSX_LE_128 (match_dup 1)
-			   (const_int 64)))
-   (set (match_dup 0)
-	(rotate:VSX_LE_128 (match_dup 2)
-			   (const_int 64)))]
+  [(const_int 0)]
   "
 {
-  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
-                                       : operands[0];
+  rtx tmp = (can_create_pseudo_p ()
+	     ? gen_reg_rtx_and_attrs (operands[0])
+	     : operands[0]);
+  rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
+  rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
+  DONE;
 }
   "
   [(set_attr "type" "vecload")
@@ -818,15 +821,14 @@ (define_split
   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
-  [(set (match_dup 2)
-	(rotate:VSX_LE_128 (match_dup 1)
-			   (const_int 64)))
-   (set (match_dup 0)
-	(rotate:VSX_LE_128 (match_dup 2)
-			   (const_int 64)))]
+  [(const_int 0)]
 {
-  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
-                                       : operands[0];
+  rtx tmp = (can_create_pseudo_p ()
+	     ? gen_reg_rtx_and_attrs (operands[0])
+	     : operands[0]);
+  rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
+  rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
+  DONE;
 })
 
 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
@@ -850,16 +852,13 @@ (define_split
   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
-  [(set (match_dup 1)
-	(rotate:VSX_LE_128 (match_dup 1)
-			   (const_int 64)))
-   (set (match_dup 0)
-	(rotate:VSX_LE_128 (match_dup 1)
-			   (const_int 64)))
-   (set (match_dup 1)
-	(rotate:VSX_LE_128 (match_dup 1)
-			   (const_int 64)))]
-  "")
+  [(const_int 0)]
+{
+  rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
+  rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
+  rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
+  DONE;
+})
 
 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
 ;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [rs6000] Avoid rotates of floating-point modes
  2017-07-12 16:33 [rs6000] Avoid rotates of floating-point modes Richard Sandiford
@ 2017-07-13 17:39 ` Segher Boessenkool
  2017-07-25 15:10   ` Richard Sandiford
  0 siblings, 1 reply; 4+ messages in thread
From: Segher Boessenkool @ 2017-07-13 17:39 UTC (permalink / raw)
  To: gcc-patches, richard.sandiford

Hi Richard,

On Wed, Jul 12, 2017 at 05:33:42PM +0100, Richard Sandiford wrote:
> The little-endian VSX code uses rotates to swap the two 64-bit halves of
> 128-bit scalar modes.  This is fine for TImode and V1TImode, but it
> isn't really valid to use RTL rotates on floating-point modes like
> KFmode and TFmode, and doing that triggered an assert added by the
> SVE series.  This patch uses bit-casts to V1TImode instead.
> 
> Tested on powerpc64le-linux-gnu.  OK to install?


> +void
> +rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
>  {
>    /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
>       128-bit integers if they are allowed in VSX registers.  */
> -  if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
> -    return gen_rtx_ROTATE (mode, source, GEN_INT (64));
> +  if (FLOAT128_VECTOR_P (mode))
> +    {
> +      dest = gen_lowpart (V1TImode, dest);
> +      source = gen_lowpart (V1TImode, source);
> +      mode = V1TImode;
> +    }

Add an empty line here?  And maybe a comment.

> +  if (mode == TImode || mode == V1TImode)
> +    emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
> +						  GEN_INT (64))));
>    else
>      {
>        rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
> -      return gen_rtx_VEC_SELECT (mode, source, par);
> +      emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
>      }
>  }

> --- gcc/config/rs6000/vsx.md	2017-06-30 12:50:38.889632907 +0100
> +++ gcc/config/rs6000/vsx.md	2017-07-12 16:30:38.734631598 +0100
> @@ -37,6 +37,10 @@ (define_mode_iterator VSX_LE_128 [(KF
>  				  (TI	"TARGET_VSX_TIMODE")
>  				  V1TI])
>  
> +;; Same, but with just the integer modes.
> +(define_mode_iterator VSX_LE_128I [(TI	"TARGET_VSX_TIMODE")
> +				   V1TI])

I don't like that name much.  The difference between VSX_LE_128 and
VSX_LE_128I is easy to overlook (and what _is_ the difference?  "I"
means "integer" I guess?).  The "LE" in the name has no real meaning
(it is used for LE, sure, but that doesn't matter for the iterator).
Maybe just VSX_TI?  Or is that too short.

Other than that, looks fine.  Thank you for the patch!

Does this need backports?


Segher

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [rs6000] Avoid rotates of floating-point modes
  2017-07-13 17:39 ` Segher Boessenkool
@ 2017-07-25 15:10   ` Richard Sandiford
  2017-07-26 18:02     ` Segher Boessenkool
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Sandiford @ 2017-07-25 15:10 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: gcc-patches

Segher Boessenkool <segher@kernel.crashing.org> writes:
> Hi Richard,
>
> On Wed, Jul 12, 2017 at 05:33:42PM +0100, Richard Sandiford wrote:
>> The little-endian VSX code uses rotates to swap the two 64-bit halves of
>> 128-bit scalar modes.  This is fine for TImode and V1TImode, but it
>> isn't really valid to use RTL rotates on floating-point modes like
>> KFmode and TFmode, and doing that triggered an assert added by the
>> SVE series.  This patch uses bit-casts to V1TImode instead.
>> 
>> Tested on powerpc64le-linux-gnu.  OK to install?
>
>
>> +void
>> +rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
>>  {
>>    /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
>>       128-bit integers if they are allowed in VSX registers.  */
>> -  if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
>> -    return gen_rtx_ROTATE (mode, source, GEN_INT (64));
>> +  if (FLOAT128_VECTOR_P (mode))
>> +    {
>> +      dest = gen_lowpart (V1TImode, dest);
>> +      source = gen_lowpart (V1TImode, source);
>> +      mode = V1TImode;
>> +    }
>
> Add an empty line here?  And maybe a comment.
>
>> +  if (mode == TImode || mode == V1TImode)
>> +    emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
>> +						  GEN_INT (64))));
>>    else
>>      {
>>        rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
>> -      return gen_rtx_VEC_SELECT (mode, source, par);
>> +      emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
>>      }
>>  }
>
>> --- gcc/config/rs6000/vsx.md	2017-06-30 12:50:38.889632907 +0100
>> +++ gcc/config/rs6000/vsx.md	2017-07-12 16:30:38.734631598 +0100
>> @@ -37,6 +37,10 @@ (define_mode_iterator VSX_LE_128 [(KF
>>  				  (TI	"TARGET_VSX_TIMODE")
>>  				  V1TI])
>>  
>> +;; Same, but with just the integer modes.
>> +(define_mode_iterator VSX_LE_128I [(TI	"TARGET_VSX_TIMODE")
>> +				   V1TI])
>
> I don't like that name much.  The difference between VSX_LE_128 and
> VSX_LE_128I is easy to overlook (and what _is_ the difference?  "I"
> means "integer" I guess?).  The "LE" in the name has no real meaning
> (it is used for LE, sure, but that doesn't matter for the iterator).
> Maybe just VSX_TI?  Or is that too short.
>
> Other than that, looks fine.  Thank you for the patch!

OK, how does this look?  Tested in the same way as before.

> Does this need backports?

Not sure, but probably not.  I don't know of any specific code that
would complain at the moment (but there again I haven't looked that hard).

Thanks,
Richard


2017-07-25  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* config/rs6000/rs6000-protos.h (rs6000_emit_le_vsx_permute): Declare.
	* config/rs6000/rs6000.c (rs6000_gen_le_vsx_permute): Replace with...
	(rs6000_emit_le_vsx_permute): ...this.  Take the destination as input.
	Emit instructions rather than returning an expression.  Handle TFmode
	and KFmode by casting to TImode.
	(rs6000_emit_le_vsx_load): Update to use rs6000_emit_le_vsx_permute.
	(rs6000_emit_le_vsx_store): Likewise.
	* config/rs6000/vsx.md (VSX_TI): New iterator.
	(*vsx_le_permute_<mode>): Use it instead of VSX_LE_128.
	(*vsx_le_undo_permute_<mode>): Likewise.
	(*vsx_le_perm_load_<mode>): Use rs6000_emit_le_vsx_permute to
	emit the split sequence.
	(*vsx_le_perm_store_<mode>): Likewise.

Index: gcc/config/rs6000/rs6000-protos.h
===================================================================
--- gcc/config/rs6000/rs6000-protos.h	2017-07-13 09:25:13.909213921 +0100
+++ gcc/config/rs6000/rs6000-protos.h	2017-07-25 11:04:20.314991769 +0100
@@ -151,6 +151,7 @@ extern rtx rs6000_longcall_ref (rtx);
 extern void rs6000_fatal_bad_address (rtx);
 extern rtx create_TOC_reference (rtx, rtx);
 extern void rs6000_split_multireg_move (rtx, rtx);
+extern void rs6000_emit_le_vsx_permute (rtx, rtx, machine_mode);
 extern void rs6000_emit_le_vsx_move (rtx, rtx, machine_mode);
 extern bool valid_sf_si_move (rtx, rtx, machine_mode);
 extern void rs6000_emit_move (rtx, rtx, machine_mode);
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	2017-07-13 09:25:13.909213921 +0100
+++ gcc/config/rs6000/rs6000.c	2017-07-25 11:14:27.692739547 +0100
@@ -10503,17 +10503,28 @@ rs6000_const_vec (machine_mode mode)
 
 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
    for a VSX load or store operation.  */
-rtx
-rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
+void
+rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
 {
-  /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
-     128-bit integers if they are allowed in VSX registers.  */
-  if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
-    return gen_rtx_ROTATE (mode, source, GEN_INT (64));
+  /* Scalar permutations are easier to express in integer modes rather than
+     floating-point modes, so cast them here.  We use V1TImode instead
+     of TImode to ensure that the values don't go through GPRs.  */
+  if (FLOAT128_VECTOR_P (mode))
+    {
+      dest = gen_lowpart (V1TImode, dest);
+      source = gen_lowpart (V1TImode, source);
+      mode = V1TImode;
+    }
+
+  /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
+     scalar.  */
+  if (mode == TImode || mode == V1TImode)
+    emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
+						  GEN_INT (64))));
   else
     {
       rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
-      return gen_rtx_VEC_SELECT (mode, source, par);
+      emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
     }
 }
 
@@ -10523,8 +10534,6 @@ rs6000_gen_le_vsx_permute (rtx source, m
 void
 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
 {
-  rtx tmp, permute_mem, permute_reg;
-
   /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
      V1TImode).  */
   if (mode == TImode || mode == V1TImode)
@@ -10534,11 +10543,9 @@ rs6000_emit_le_vsx_load (rtx dest, rtx s
       source = adjust_address (source, V2DImode, 0);
     }
 
-  tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
-  permute_mem = rs6000_gen_le_vsx_permute (source, mode);
-  permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
-  emit_insn (gen_rtx_SET (tmp, permute_mem));
-  emit_insn (gen_rtx_SET (dest, permute_reg));
+  rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
+  rs6000_emit_le_vsx_permute (tmp, source, mode);
+  rs6000_emit_le_vsx_permute (dest, tmp, mode);
 }
 
 /* Emit a little-endian store to vector memory location DEST from VSX
@@ -10547,8 +10554,6 @@ rs6000_emit_le_vsx_load (rtx dest, rtx s
 void
 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
 {
-  rtx tmp, permute_src, permute_tmp;
-
   /* This should never be called during or after reload, because it does
      not re-permute the source register.  It is intended only for use
      during expand.  */
@@ -10563,11 +10568,9 @@ rs6000_emit_le_vsx_store (rtx dest, rtx
       source = gen_lowpart (V2DImode, source);
     }
 
-  tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
-  permute_src = rs6000_gen_le_vsx_permute (source, mode);
-  permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
-  emit_insn (gen_rtx_SET (tmp, permute_src));
-  emit_insn (gen_rtx_SET (dest, permute_tmp));
+  rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
+  rs6000_emit_le_vsx_permute (tmp, source, mode);
+  rs6000_emit_le_vsx_permute (dest, tmp, mode);
 }
 
 /* Emit a sequence representing a little-endian VSX load or store,
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md	2017-07-13 09:25:13.909213921 +0100
+++ gcc/config/rs6000/vsx.md	2017-07-25 11:08:54.160528532 +0100
@@ -37,6 +37,9 @@ (define_mode_iterator VSX_LE_128 [(KF
 				  (TI	"TARGET_VSX_TIMODE")
 				  V1TI])
 
+;; Iterator for 128-bit integer types that go in a single vector register.
+(define_mode_iterator VSX_TI [(TI "TARGET_VSX_TIMODE") V1TI])
+
 ;; Iterator for the 2 32-bit vector types
 (define_mode_iterator VSX_W [V4SF V4SI])
 
@@ -750,9 +753,9 @@ (define_split
 ;; special V1TI container class, which it is not appropriate to use vec_select
 ;; for the type.
 (define_insn "*vsx_le_permute_<mode>"
-  [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
-	(rotate:VSX_LE_128
-	 (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>")
+  [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
+	(rotate:VSX_TI
+	 (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>")
 	 (const_int 64)))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "@
@@ -763,10 +766,10 @@ (define_insn "*vsx_le_permute_<mode>"
    (set_attr "type" "vecperm,vecload,vecstore")])
 
 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
-  [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,<VSa>")
-	(rotate:VSX_LE_128
-	 (rotate:VSX_LE_128
-	  (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,<VSa>")
+  [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
+	(rotate:VSX_TI
+	 (rotate:VSX_TI
+	  (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
 	  (const_int 64))
 	 (const_int 64)))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
@@ -791,16 +794,15 @@ (define_insn_and_split "*vsx_le_perm_loa
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
-  [(set (match_dup 2)
-	(rotate:VSX_LE_128 (match_dup 1)
-			   (const_int 64)))
-   (set (match_dup 0)
-	(rotate:VSX_LE_128 (match_dup 2)
-			   (const_int 64)))]
+  [(const_int 0)]
   "
 {
-  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
-                                       : operands[0];
+  rtx tmp = (can_create_pseudo_p ()
+	     ? gen_reg_rtx_and_attrs (operands[0])
+	     : operands[0]);
+  rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
+  rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
+  DONE;
 }
   "
   [(set_attr "type" "vecload")
@@ -818,15 +820,14 @@ (define_split
   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
-  [(set (match_dup 2)
-	(rotate:VSX_LE_128 (match_dup 1)
-			   (const_int 64)))
-   (set (match_dup 0)
-	(rotate:VSX_LE_128 (match_dup 2)
-			   (const_int 64)))]
+  [(const_int 0)]
 {
-  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
-                                       : operands[0];
+  rtx tmp = (can_create_pseudo_p ()
+	     ? gen_reg_rtx_and_attrs (operands[0])
+	     : operands[0]);
+  rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
+  rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
+  DONE;
 })
 
 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
@@ -850,16 +851,13 @@ (define_split
   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
-  [(set (match_dup 1)
-	(rotate:VSX_LE_128 (match_dup 1)
-			   (const_int 64)))
-   (set (match_dup 0)
-	(rotate:VSX_LE_128 (match_dup 1)
-			   (const_int 64)))
-   (set (match_dup 1)
-	(rotate:VSX_LE_128 (match_dup 1)
-			   (const_int 64)))]
-  "")
+  [(const_int 0)]
+{
+  rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
+  rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
+  rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
+  DONE;
+})
 
 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
 ;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [rs6000] Avoid rotates of floating-point modes
  2017-07-25 15:10   ` Richard Sandiford
@ 2017-07-26 18:02     ` Segher Boessenkool
  0 siblings, 0 replies; 4+ messages in thread
From: Segher Boessenkool @ 2017-07-26 18:02 UTC (permalink / raw)
  To: gcc-patches, richard.sandiford

Hi!

On Tue, Jul 25, 2017 at 04:10:19PM +0100, Richard Sandiford wrote:
> Segher Boessenkool <segher@kernel.crashing.org> writes:
> --- gcc/config/rs6000/rs6000.c	2017-07-13 09:25:13.909213921 +0100
> +++ gcc/config/rs6000/rs6000.c	2017-07-25 11:14:27.692739547 +0100
> @@ -10503,17 +10503,28 @@ rs6000_const_vec (machine_mode mode)
>  
>  /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
>     for a VSX load or store operation.  */
> -rtx
> -rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
> +void
> +rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)

Please update the comment.  With that, okay for trunk.  Thanks!


Segher

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-07-26 18:02 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-12 16:33 [rs6000] Avoid rotates of floating-point modes Richard Sandiford
2017-07-13 17:39 ` Segher Boessenkool
2017-07-25 15:10   ` Richard Sandiford
2017-07-26 18:02     ` Segher Boessenkool

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).