public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, rs6000] Use hardware support for vector character multiply
@ 2015-09-03 15:22 Bill Schmidt
  2015-09-03 15:33 ` Andrew Pinski
  2015-09-03 15:36 ` David Edelsohn
  0 siblings, 2 replies; 8+ messages in thread
From: Bill Schmidt @ 2015-09-03 15:22 UTC (permalink / raw)
  To: gcc-patches; +Cc: dje.gcc

Hi,

It was pointed out to me recently that multiplying two vector chars is
performed using scalarization, even though we have hardware support for
byte multiplies in vectors.  This patch adds an expansion for mulv16qi3
to correct this.

The expansion is pretty simple.  We do a multiply-even and multiply-odd
to create halfword results, and then use a permute to extract the
low-order bytes of each result.  This particular form of a permute uses
a different set of input/output vector modes than have been used before,
so I added the altivec_vperm_v8hiv16qi insn to represent this.  (The two
source operands are vector halfword types, while the target operand is a
vector char type.)

I've added two test variants, one to test the code generation, and one
executable test to check correctness.  One other test failed with this
change.  This turned out to be because PowerPC was excluded from the
check_effective_target_vect_char_mult target support test.  I resolved
this by adding check_effective_target_powerpc_altivec to that test.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this ok for trunk?

Thanks,
Bill


[gcc]

2015-09-03  Bill Schmidt  <wschmidt@vnet.linux.ibm.com>

	* config/rs6000/altivec.md (altivec_vperm_v8hiv16qi): New
	define_insn.
	(mulv16qi3): New define_expand.

[gcc/testsuite]

2015-09-03  Bill Schmidt  <wschmidt@vnet.linux.ibm.com>

	* gcc.target/powerpc/vec-mult-char-1.c: New test.
	* gcc.target/powerpc/vec-mult-char-2.c: New test.


Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md	(revision 227416)
+++ gcc/config/rs6000/altivec.md	(working copy)
@@ -1957,6 +1957,16 @@
   "vperm %0,%1,%2,%3"
   [(set_attr "type" "vecperm")])
 
+(define_insn "altivec_vperm_v8hiv16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+	(unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
+   	               (match_operand:V8HI 2 "register_operand" "v")
+		       (match_operand:V16QI 3 "register_operand" "v")]
+		   UNSPEC_VPERM))]
+  "TARGET_ALTIVEC"
+  "vperm %0,%1,%2,%3"
+  [(set_attr "type" "vecperm")])
+
 (define_expand "altivec_vperm_<mode>_uns"
   [(set (match_operand:VM 0 "register_operand" "=v")
 	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
@@ -3161,6 +3171,34 @@
   "<VI_unit>"
   "")
 
+(define_expand "mulv16qi3"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (mult:V16QI (match_operand:V16QI 1 "register_operand" "v")
+                    (match_operand:V16QI 2 "register_operand" "v")))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx even = gen_reg_rtx (V8HImode);
+  rtx odd = gen_reg_rtx (V8HImode);
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtvec v = rtvec_alloc (16);
+  bool be = BYTES_BIG_ENDIAN;
+  int i;
+
+  for (i = 0; i < 8; ++i) {
+    RTVEC_ELT (v, 2 * i)
+     = gen_rtx_CONST_INT (QImode, be ? 2 * i + 1 : 31 - 2 * i);
+    RTVEC_ELT (v, 2 * i + 1)
+     = gen_rtx_CONST_INT (QImode, be ? 2 * i + 17 : 15 - 2 * i);
+  }
+
+  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_altivec_vmulesb (even, operands[1], operands[2]));
+  emit_insn (gen_altivec_vmulosb (odd, operands[1], operands[2]));
+  emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], even, odd, mask));
+  DONE;
+}")
+
 (define_expand "altivec_negv4sf2"
   [(use (match_operand:V4SF 0 "register_operand" ""))
    (use (match_operand:V4SF 1 "register_operand" ""))]
Index: gcc/testsuite/gcc.target/powerpc/vec-mult-char-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/vec-mult-char-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/vec-mult-char-1.c	(working copy)
@@ -0,0 +1,53 @@
+/* { dg-do run { target { powerpc*-*-* && vmx_hw } } } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-maltivec" } */
+
+#include <altivec.h>
+
+extern void abort (void);
+
+vector unsigned char vmului(vector unsigned char v,
+			    vector unsigned char i)
+{
+	return v * i;
+}
+
+vector signed char vmulsi(vector signed char v,
+			  vector signed char i)
+{
+	return v * i;
+}
+
+int main ()
+{
+  vector unsigned char a = {2, 4, 6, 8, 10, 12, 14, 16,
+			    18, 20, 22, 24, 26, 28, 30, 32};
+  vector unsigned char b = {3, 6, 9, 12, 15, 18, 21, 24,
+			    27, 30, 33, 36, 39, 42, 45, 48};
+  vector unsigned char c = vmului (a, b);
+  vector unsigned char expect_c = {6, 24, 54, 96, 150, 216, 38, 128,
+				   230, 88, 214, 96, 246, 152, 70, 0};
+
+  vector signed char d = {2, -4, 6, -8, 10, -12, 14, -16,
+			  18, -20, 22, -24, 26, -28, 30, -32};
+  vector signed char e = {3, 6, -9, -12, 15, 18, -21, -24,
+			  27, 30, -33, -36, 39, 42, -45, -48};
+  vector signed char f = vmulsi (d, e);
+  vector signed char expect_f = {6, -24, -54, 96, -106, 40, -38, -128,
+				 -26, -88, 42, 96, -10, 104, -70, 0};
+
+  vector signed char g = {127, -128, 126, -126, 125, -125,  124, -124,
+			  123, -123, 122, -122, 121, -121,  120, -120};
+  vector signed char h = {  2,    2,  -2,   -2, 127,  127, -128, -128,
+			    10,  10, -10,  -10,  64,   65,  -64,  -65};
+  vector signed char i = vmulsi (g, h);
+  vector signed char expect_i = {-2, 0, 4, -4, 3, -3, 0, 0,
+				 -50, 50, 60, -60, 64, 71, 0, 120};
+
+  if (!vec_all_eq (c, expect_c))
+    abort ();
+  if (!vec_all_eq (f, expect_f))
+    abort ();
+  if (!vec_all_eq (i, expect_i))
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/vec-mult-char-2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/vec-mult-char-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/vec-mult-char-2.c	(working copy)
@@ -0,0 +1,21 @@
+/* { dg-do compile { target { powerpc*-*-* && vmx_hw } } } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-maltivec" } */
+
+#include <altivec.h>
+
+vector unsigned char vmului(vector unsigned char v,
+			    vector unsigned char i)
+{
+	return v * i;
+}
+
+vector signed char vmulsi(vector signed char v,
+			  vector signed char i)
+{
+	return v * i;
+}
+
+/* { dg-final { scan-assembler-times "vmulesb" 2 } } */
+/* { dg-final { scan-assembler-times "vmulosb" 2 } } */
+/* { dg-final { scan-assembler-times "vperm" 2 } } */
Index: gcc/testsuite/lib/target-supports.exp
===================================================================
--- gcc/testsuite/lib/target-supports.exp	(revision 227416)
+++ gcc/testsuite/lib/target-supports.exp	(working copy)
@@ -4577,7 +4577,8 @@ proc check_effective_target_vect_char_mult { } {
 	if { [istarget aarch64*-*-*]
 	     || [istarget ia64-*-*]
 	     || [istarget i?86-*-*] || [istarget x86_64-*-*]
-            || [check_effective_target_arm32] } {
+             || [check_effective_target_arm32]
+	     || [check_effective_target_powerpc_altivec] } {
 	   set et_vect_char_mult_saved 1
 	}
     }


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2015-09-15 20:37 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-09-03 15:22 [PATCH, rs6000] Use hardware support for vector character multiply Bill Schmidt
2015-09-03 15:33 ` Andrew Pinski
2015-09-03 15:36   ` Bill Schmidt
2015-09-15 14:08     ` Richard Biener
2015-09-15 20:37       ` Andrew Pinski
2015-09-15 20:50         ` Bill Schmidt
2015-09-03 15:36 ` David Edelsohn
2015-09-03 15:53   ` Bill Schmidt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).