public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, rs6000] Generate LE code for vec_lvsl and vec_lvsr that is compatible with BE code
@ 2014-09-29 22:26 Bill Schmidt
  2014-09-30 14:50 ` Segher Boessenkool
  0 siblings, 1 reply; 8+ messages in thread
From: Bill Schmidt @ 2014-09-29 22:26 UTC (permalink / raw)
  To: gcc-patches; +Cc: dje.gcc

Hi,

Up till now we have not attempted to generate code for LE usage of
vec_lvsl and vec_lvsr that is compatible with expected BE usage.  The LE
code sequence corresponding to lvsl/vperm is not good, and we encourage
programmers to convert those sequences to use direct assignment and the
type system for unaligned loads.  However, the issue comes up frequently
enough that it seems best to provide this sequence together with a
warning message (in a previous patch submission) to avoid confusion.

The method used in this patch is to perform a byte-reversal of the
result of the lvsl/lvsr.  This is accomplished by loading the vector
char constant {0,1,...,15}, which will appear in the register from left
to right as {15,...,1,0}.  A vperm instruction (which uses BE element
ordering) is applied to the result of the lvsl/lvsr using the loaded
constant as the permute control vector.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this ok for trunk?

Thanks,
Bill


[gcc]

2014-09-29  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* altivec.md (altivec_lvsl): New define_expand.
	(altivec_lvsl_direct): Rename define_insn from altivec_lvsl.
	(altivec_lvsr): New define_expand.
	(altivec_lvsr_direct): Rename define_insn from altivec_lvsr.
	* rs6000.c (rs6000_expand_builtin): Change to use
	altivec_lvs[lr]_direct; remove commented-out code.

[gcc/testsuite]

2014-09-29  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* gcc.target/powerpc/lvsl-lvsr.c: New test.


Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md	(revision 215689)
+++ gcc/config/rs6000/altivec.md	(working copy)
@@ -2297,7 +2297,32 @@
   "dststt %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
-(define_insn "altivec_lvsl"
+(define_expand "altivec_lvsl"
+  [(use (match_operand:V16QI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "memory_operand" "Z"))]
+  "TARGET_ALTIVEC"
+  "
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_lvsl_direct (operands[0], operands[1]));
+  else
+    {
+      int i;
+      rtx mask, perm[16], constv, vperm;
+      mask = gen_reg_rtx (V16QImode);
+      emit_insn (gen_altivec_lvsl_direct (mask, operands[1]));
+      for (i = 0; i < 16; ++i)
+        perm[i] = GEN_INT (i);
+      constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
+      constv = force_reg (V16QImode, constv);
+      vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv),
+                              UNSPEC_VPERM);
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm));
+    }
+  DONE;
+}")
+
+(define_insn "altivec_lvsl_direct"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
 	(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")]
 		      UNSPEC_LVSL))]
@@ -2305,7 +2330,32 @@
   "lvsl %0,%y1"
   [(set_attr "type" "vecload")])
 
-(define_insn "altivec_lvsr"
+(define_expand "altivec_lvsr"
+  [(use (match_operand:V16QI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "memory_operand" "Z"))]
+  "TARGET_ALTIVEC"
+  "
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_lvsr_direct (operands[0], operands[1]));
+  else
+    {
+      int i;
+      rtx mask, perm[16], constv, vperm;
+      mask = gen_reg_rtx (V16QImode);
+      emit_insn (gen_altivec_lvsr_direct (mask, operands[1]));
+      for (i = 0; i < 16; ++i)
+        perm[i] = GEN_INT (i);
+      constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
+      constv = force_reg (V16QImode, constv);
+      vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv),
+                              UNSPEC_VPERM);
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm));
+    }
+  DONE;
+}")
+
+(define_insn "altivec_lvsr_direct"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
 	(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")]
 		      UNSPEC_LVSR))]
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 215689)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -13898,8 +13898,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx s
     case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
     case ALTIVEC_BUILTIN_MASK_FOR_STORE:
       {
-	int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr
-		     : (int) CODE_FOR_altivec_lvsl);
+	int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
+		     : (int) CODE_FOR_altivec_lvsl_direct);
 	enum machine_mode tmode = insn_data[icode].operand[0].mode;
 	enum machine_mode mode = insn_data[icode].operand[1].mode;
 	tree arg;
@@ -13927,7 +13927,6 @@ rs6000_expand_builtin (tree exp, rtx target, rtx s
 	    || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
 	  target = gen_reg_rtx (tmode);
 
-	/*pat = gen_altivec_lvsr (target, op);*/
 	pat = GEN_FCN (icode) (target, op);
 	if (!pat)
 	  return 0;
Index: gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c	(working copy)
@@ -0,0 +1,19 @@
+/* Test expected code generation for lvsl and lvsr on little endian.  */
+
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-options "-O0 -Wno-deprecated" } */
+/* { dg-final { scan-assembler-times "lvsl" 1 } } */
+/* { dg-final { scan-assembler-times "lvsr" 1 } } */
+/* { dg-final { scan-assembler-times "lxvd2x" 2 } } */
+/* { dg-final { scan-assembler-times "vperm" 2 } } */
+
+
+#include <altivec.h>
+
+float f[20];
+
+void foo ()
+{
+  vector unsigned char a = vec_lvsl (4, f);
+  vector unsigned char b = vec_lvsr (8, f);
+}


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2014-10-03 18:18 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-29 22:26 [PATCH, rs6000] Generate LE code for vec_lvsl and vec_lvsr that is compatible with BE code Bill Schmidt
2014-09-30 14:50 ` Segher Boessenkool
2014-09-30 15:24   ` Bill Schmidt
2014-09-30 16:04     ` Segher Boessenkool
2014-09-30 16:18       ` Bill Schmidt
2014-09-30 20:37         ` Segher Boessenkool
2014-10-02 19:20   ` Bill Schmidt
2014-10-03 18:18     ` David Edelsohn

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).