[PATCH, rs6000] Generate LE code for vec_lvsl and vec_lvsr that is compatible with BE code

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: Bill Schmidt <wschmidt@linux.vnet.ibm.com>
To: gcc-patches@gcc.gnu.org
Cc: dje.gcc@gmail.com
Subject: [PATCH, rs6000] Generate LE code for vec_lvsl and vec_lvsr that is compatible with BE code
Date: Mon, 29 Sep 2014 22:26:00 -0000	[thread overview]
Message-ID: <1412029574.2986.42.camel@gnopaine> (raw)

Hi,

Up till now we have not attempted to generate code for LE usage of
vec_lvsl and vec_lvsr that is compatible with expected BE usage.  The LE
code sequence corresponding to lvsl/vperm is not good, and we encourage
programmers to convert those sequences to use direct assignment and the
type system for unaligned loads.  However, the issue comes up frequently
enough that it seems best to provide this sequence together with a
warning message (in a previous patch submission) to avoid confusion.

The method used in this patch is to perform a byte-reversal of the
result of the lvsl/lvsr.  This is accomplished by loading the vector
char constant {0,1,...,15}, which will appear in the register from left
to right as {15,...,1,0}.  A vperm instruction (which uses BE element
ordering) is applied to the result of the lvsl/lvsr using the loaded
constant as the permute control vector.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this ok for trunk?

Thanks,
Bill


[gcc]

2014-09-29  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* altivec.md (altivec_lvsl): New define_expand.
	(altivec_lvsl_direct): Rename define_insn from altivec_lvsl.
	(altivec_lvsr): New define_expand.
	(altivec_lvsr_direct): Rename define_insn from altivec_lvsr.
	* rs6000.c (rs6000_expand_builtin): Change to use
	altivec_lvs[lr]_direct; remove commented-out code.

[gcc/testsuite]

2014-09-29  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* gcc.target/powerpc/lvsl-lvsr.c: New test.


Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md	(revision 215689)
+++ gcc/config/rs6000/altivec.md	(working copy)
@@ -2297,7 +2297,32 @@
   "dststt %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
-(define_insn "altivec_lvsl"
+(define_expand "altivec_lvsl"
+  [(use (match_operand:V16QI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "memory_operand" "Z"))]
+  "TARGET_ALTIVEC"
+  "
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_lvsl_direct (operands[0], operands[1]));
+  else
+    {
+      int i;
+      rtx mask, perm[16], constv, vperm;
+      mask = gen_reg_rtx (V16QImode);
+      emit_insn (gen_altivec_lvsl_direct (mask, operands[1]));
+      for (i = 0; i < 16; ++i)
+        perm[i] = GEN_INT (i);
+      constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
+      constv = force_reg (V16QImode, constv);
+      vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv),
+                              UNSPEC_VPERM);
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm));
+    }
+  DONE;
+}")
+
+(define_insn "altivec_lvsl_direct"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
 	(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")]
 		      UNSPEC_LVSL))]
@@ -2305,7 +2330,32 @@
   "lvsl %0,%y1"
   [(set_attr "type" "vecload")])
 
-(define_insn "altivec_lvsr"
+(define_expand "altivec_lvsr"
+  [(use (match_operand:V16QI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "memory_operand" "Z"))]
+  "TARGET_ALTIVEC"
+  "
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_lvsr_direct (operands[0], operands[1]));
+  else
+    {
+      int i;
+      rtx mask, perm[16], constv, vperm;
+      mask = gen_reg_rtx (V16QImode);
+      emit_insn (gen_altivec_lvsr_direct (mask, operands[1]));
+      for (i = 0; i < 16; ++i)
+        perm[i] = GEN_INT (i);
+      constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
+      constv = force_reg (V16QImode, constv);
+      vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv),
+                              UNSPEC_VPERM);
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm));
+    }
+  DONE;
+}")
+
+(define_insn "altivec_lvsr_direct"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
 	(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")]
 		      UNSPEC_LVSR))]
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 215689)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -13898,8 +13898,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx s
     case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
     case ALTIVEC_BUILTIN_MASK_FOR_STORE:
       {
-	int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr
-		     : (int) CODE_FOR_altivec_lvsl);
+	int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
+		     : (int) CODE_FOR_altivec_lvsl_direct);
 	enum machine_mode tmode = insn_data[icode].operand[0].mode;
 	enum machine_mode mode = insn_data[icode].operand[1].mode;
 	tree arg;
@@ -13927,7 +13927,6 @@ rs6000_expand_builtin (tree exp, rtx target, rtx s
 	    || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
 	  target = gen_reg_rtx (tmode);
 
-	/*pat = gen_altivec_lvsr (target, op);*/
 	pat = GEN_FCN (icode) (target, op);
 	if (!pat)
 	  return 0;
Index: gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c	(working copy)
@@ -0,0 +1,19 @@
+/* Test expected code generation for lvsl and lvsr on little endian.  */
+
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-options "-O0 -Wno-deprecated" } */
+/* { dg-final { scan-assembler-times "lvsl" 1 } } */
+/* { dg-final { scan-assembler-times "lvsr" 1 } } */
+/* { dg-final { scan-assembler-times "lxvd2x" 2 } } */
+/* { dg-final { scan-assembler-times "vperm" 2 } } */
+
+
+#include <altivec.h>
+
+float f[20];
+
+void foo ()
+{
+  vector unsigned char a = vec_lvsl (4, f);
+  vector unsigned char b = vec_lvsr (8, f);
+}

next             reply	other threads:[~2014-09-29 22:26 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-29 22:26 Bill Schmidt [this message]
2014-09-30 14:50 ` Segher Boessenkool
2014-09-30 15:24   ` Bill Schmidt
2014-09-30 16:04     ` Segher Boessenkool
2014-09-30 16:18       ` Bill Schmidt
2014-09-30 20:37         ` Segher Boessenkool
2014-10-02 19:20   ` Bill Schmidt
2014-10-03 18:18     ` David Edelsohn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1412029574.2986.42.camel@gnopaine \
    --to=wschmidt@linux.vnet.ibm.com \
    --cc=dje.gcc@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).