[PATCH] Add explicit VIS intrinsics for addition and subtraction.

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: David Miller <davem@davemloft.net>
To: gcc-patches@gcc.gnu.org
Cc: ebotcazou@adacore.com
Subject: [PATCH] Add explicit VIS intrinsics for addition and subtraction.
Date: Tue, 27 Sep 2011 07:19:00 -0000	[thread overview]
Message-ID: <20110927.000118.1421173401433924103.davem@davemloft.net> (raw)


Eric, I'm sure you have noticed this, but the Sparc target test
"combined-1.c" fails for some time on 32-bit because of how float
arguments are passed in the 32-bit SPARC ABI.

Since they are passed in integer registers, the vectorizer does
the initial logical operations using non-VIS instructions,
then pops them into the FPU regs to use the VIS vector addition
and subtraction.

Because of this some of the scan-assembler directives in that test
miss.

Would you mind if I added a hack, like we use in the other test
cases and the one I added here, to force the VIS operations to
be used on 32-bit?  We could either pass the vectors as pointer
args, or use the trick where we return the vector values from
extern functions.  Any preference?

Committed to trunk.

gcc/

	* config/sparc/sparc.c (sparc_vis_init_builtins): Add explicit
	builtins for VIS vector addition and subtraction.
	* config/sparc/visintrin.h (__vis_fpadd16, __vis_fpadd16s,
	__vis_fpadd32, __vis_fpadd32s, __vis_fpsub16, __vis_fpsub16s,
	__vis_fpsub32, __vis_fpsub32s): New.
	* doc/extend.texi: Document new VIS intrinsics.

gcc/testsuite/

	* gcc.target/sparc/fpaddsubi.c: New test.
---
 gcc/ChangeLog                              |    7 +++
 gcc/config/sparc/sparc.c                   |   21 ++++++++++
 gcc/config/sparc/visintrin.h               |   57 +++++++++++++++++++++++++++
 gcc/doc/extend.texi                        |   10 +++++
 gcc/testsuite/ChangeLog                    |    2 +
 gcc/testsuite/gcc.target/sparc/fpaddsubi.c |   58 ++++++++++++++++++++++++++++
 6 files changed, 155 insertions(+), 0 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/sparc/fpaddsubi.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ecdb26b..a3dd883 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -24,6 +24,13 @@
 	* config/sparc/sparc-protos.h (sparc_target_macros): Declare.
 	* config/sparc/sparc.h (TARGE_CPU_CPP_BUILTINS): Call it.
 
+	* config/sparc/sparc.c (sparc_vis_init_builtins): Add explicit
+	builtins for VIS vector addition and subtraction.
+	* config/sparc/visintrin.h (__vis_fpadd16, __vis_fpadd16s,
+	__vis_fpadd32, __vis_fpadd32s, __vis_fpsub16, __vis_fpsub16s,
+	__vis_fpsub32, __vis_fpsub32s): New.
+	* doc/extend.texi: Document new VIS intrinsics.
+
 2011-09-26  Georg-Johann Lay  <avr@gjlay.de>
 
 	* config/avr/avr.md (peephole casesi+2): Use -1 instead of 65536.
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index d1d8355..44d7f20 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -9173,6 +9173,7 @@ sparc_vis_init_builtins (void)
   tree v4hi = build_vector_type (intHI_type_node, 4);
   tree v2hi = build_vector_type (intHI_type_node, 2);
   tree v2si = build_vector_type (intSI_type_node, 2);
+  tree v1si = build_vector_type (intSI_type_node, 1);
 
   tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
   tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
@@ -9186,6 +9187,8 @@ sparc_vis_init_builtins (void)
   tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
   tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
   tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
+  tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
+  tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
   tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
 							 v8qi, v8qi,
 							 intDI_type_node, 0);
@@ -9350,6 +9353,24 @@ sparc_vis_init_builtins (void)
       def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
 			 si_ftype_v2si_v2si);
     }
+
+  /* Addition and subtraction.  */
+  def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
+		     v4hi_ftype_v4hi_v4hi);
+  def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
+		     v2hi_ftype_v2hi_v2hi);
+  def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
+		     v2si_ftype_v2si_v2si);
+  def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addsi3,
+		     v1si_ftype_v1si_v1si);
+  def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
+		     v4hi_ftype_v4hi_v4hi);
+  def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
+		     v2hi_ftype_v2hi_v2hi);
+  def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
+		     v2si_ftype_v2si_v2si);
+  def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subsi3,
+		     v1si_ftype_v1si_v1si);
 }
 
 /* Handle TARGET_EXPAND_BUILTIN target hook.
diff --git a/gcc/config/sparc/visintrin.h b/gcc/config/sparc/visintrin.h
index d37bd95..eb2b4ec 100644
--- a/gcc/config/sparc/visintrin.h
+++ b/gcc/config/sparc/visintrin.h
@@ -25,6 +25,7 @@
 #define _VISINTRIN_H_INCLUDED
 
 typedef int __v2si __attribute__ ((__vector_size__ (8)));
+typedef int __v1si __attribute__ ((__vector_size__ (4)));
 typedef short __v4hi __attribute__ ((__vector_size__ (8)));
 typedef short __v2hi __attribute__ ((__vector_size__ (4)));
 typedef unsigned char __v8qi __attribute__ ((__vector_size__ (8)));
@@ -276,4 +277,60 @@ __vis_fcmpeq32 (__v2si __A, __v2si __B)
   return __builtin_vis_fcmpeq32 (__A, __B);
 }
 
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpadd16 (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_fpadd16 (__A, __B);
+}
+
+extern __inline __v2hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpadd16s (__v2hi __A, __v2hi __B)
+{
+  return __builtin_vis_fpadd16s (__A, __B);
+}
+
+extern __inline __v2si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpadd32 (__v2si __A, __v2si __B)
+{
+  return __builtin_vis_fpadd32 (__A, __B);
+}
+
+extern __inline __v1si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpadd32s (__v1si __A, __v1si __B)
+{
+  return __builtin_vis_fpadd32s (__A, __B);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpsub16 (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_fpsub16 (__A, __B);
+}
+
+extern __inline __v2hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpsub16s (__v2hi __A, __v2hi __B)
+{
+  return __builtin_vis_fpsub16s (__A, __B);
+}
+
+extern __inline __v2si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpsub32 (__v2si __A, __v2si __B)
+{
+  return __builtin_vis_fpsub32 (__A, __B);
+}
+
+extern __inline __v1si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpsub32s (__v1si __A, __v1si __B)
+{
+  return __builtin_vis_fpsub32s (__A, __B);
+}
+
 #endif  /* _VISINTRIN_H_INCLUDED */
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index e9d0bc7..195fa8c 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -12929,6 +12929,7 @@ the SPARC Visual Instruction Set (VIS).  When you use the @option{-mvis}
 switch, the VIS extension is exposed as the following built-in functions:
 
 @smallexample
+typedef int v1si __attribute__ ((vector_size (4)));
 typedef int v2si __attribute__ ((vector_size (8)));
 typedef short v4hi __attribute__ ((vector_size (8)));
 typedef short v2hi __attribute__ ((vector_size (4)));
@@ -12977,6 +12978,15 @@ long __builtin_vis_fcmpgt16 (v4hi, v4hi);
 long __builtin_vis_fcmpgt32 (v2si, v2si);
 long __builtin_vis_fcmpeq16 (v4hi, v4hi);
 long __builtin_vis_fcmpeq32 (v2si, v2si);
+
+v4hi __builtin_vis_fpadd16 (v4hi, v4hi);
+v2hi __builtin_vis_fpadd16s (v2hi, v2hi);
+v2si __builtin_vis_fpadd32 (v2si, v2si);
+v1si __builtin_vis_fpadd32s (v1si, v1si);
+v4hi __builtin_vis_fpsub16 (v4hi, v4hi);
+v2hi __builtin_vis_fpsub16s (v2hi, v2hi);
+v2si __builtin_vis_fpsub32 (v2si, v2si);
+v1si __builtin_vis_fpsub32s (v1si, v1si);
 @end smallexample
 
 @node SPU Built-in Functions
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index ebc9385..b430baf 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -14,6 +14,8 @@
 	* gcc.target/sparc/edge.c: Update for new return types.
 	* gcc.target/sparc/fcmp.c: Likewise.
 
+	* gcc.target/sparc/fpaddsubi.c: New test.
+
 2011-09-26  Janus Weil  <janus@gcc.gnu.org>
 
 	PR fortran/50515
diff --git a/gcc/testsuite/gcc.target/sparc/fpaddsubi.c b/gcc/testsuite/gcc.target/sparc/fpaddsubi.c
new file mode 100644
index 0000000..a36108e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/sparc/fpaddsubi.c
@@ -0,0 +1,58 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mcpu=ultrasparc -mvis" } */
+typedef int __v2si __attribute__((vector_size(8)));
+typedef int __v1si __attribute__((vector_size(4)));
+typedef short __v4hi __attribute__((vector_size(8)));
+typedef short __v2hi __attribute__((vector_size(4)));
+
+extern __v1si foo_x (void);
+extern __v1si foo_y (void);
+
+__v4hi test_fpadd16 (__v4hi x, __v4hi y)
+{
+  return __builtin_vis_fpadd16 (x, y);
+}
+
+__v2hi test_fpadd16s (__v2hi x, __v2hi y)
+{
+  return __builtin_vis_fpadd16s (x, y);
+}
+
+__v4hi test_fpsub16 (__v4hi x, __v4hi y)
+{
+  return __builtin_vis_fpsub16 (x, y);
+}
+
+__v2hi test_fpsub16s (__v2hi x, __v2hi y)
+{
+  return __builtin_vis_fpsub16s (x, y);
+}
+
+__v2si test_fpadd32 (__v2si x, __v2si y)
+{
+  return __builtin_vis_fpadd32 (x, y);
+}
+
+__v1si test_fpadd32s (void)
+{
+  return __builtin_vis_fpadd32s (foo_x (), foo_y ());
+}
+
+__v2si test_fpsub32 (__v2si x, __v2si y)
+{
+  return __builtin_vis_fpsub32 (x, y);
+}
+
+__v1si test_fpsub32s (__v1si x, __v1si y)
+{
+  return __builtin_vis_fpsub32s (foo_x (), foo_y ());
+}
+
+/* { dg-final { scan-assembler "fpadd16\t%" }  } */
+/* { dg-final { scan-assembler "fpadd16s\t%" }  } */
+/* { dg-final { scan-assembler "fpsub16\t%" }  } */
+/* { dg-final { scan-assembler "fpsub16s\t%" }  } */
+/* { dg-final { scan-assembler "fpadd32\t%" }  } */
+/* { dg-final { scan-assembler "fpadd32s\t%" }  } */
+/* { dg-final { scan-assembler "fpsub32\t%" }  } */
+/* { dg-final { scan-assembler "fpsub32s\t%" }  } */
-- 
1.7.6.401.g6a319

next             reply	other threads:[~2011-09-27  4:02 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-09-27  7:19 David Miller [this message]
2011-09-27  9:35 ` Eric Botcazou
2011-09-27  9:45   ` David Miller
2011-09-29  1:15     ` Eric Botcazou
2011-10-13 21:05       ` David Miller
2011-10-14 15:15       ` Vladimir Makarov
2011-10-15 17:02         ` Eric Botcazou

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110927.000118.1421173401433924103.davem@davemloft.net \
    --to=davem@davemloft.net \
    --cc=ebotcazou@adacore.com \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).