public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, testsuite]: Committed: Test macroized SSE intrinsics
@ 2007-12-14 13:28 Uros Bizjak
  0 siblings, 0 replies; only message in thread
From: Uros Bizjak @ 2007-12-14 13:28 UTC (permalink / raw)
  To: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 723 bytes --]

Hello!

This is a followup to recent SSE intrinsics update. This patch adds
testsuite coverage of macroized SSE intrinsics to avoid trivial errors
in SSE header files. Additionally, a couple of trivial formating fixes
were intriduced to the headers.

Patch was tested on i686-pc-linux-gnu and is committed to mainline SVN.

2007-12-14  Uros Bizjak  <ubizjak@gmail.com>

	* gcc.target/i386/sse-14.c (test_1, test_2, test_2x, test_4): New
	macros to test macroized SSE intrinsics.  Use new macros to test
	macroized SSE intrinsics from ammintrin.h, smmintrin.h, tmmintrin.h,
	emmintrin.h, xmmintrin.h and bmmintrin.h
	* gcc.target/i386/sse-13.c (__builtin_ia32_pcmp?str*128): Redefine
	to test with immediate operand.

Uros.

[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 13105 bytes --]

Index: testsuite/gcc.target/i386/sse-14.c
===================================================================
--- testsuite/gcc.target/i386/sse-14.c	(revision 130927)
+++ testsuite/gcc.target/i386/sse-14.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O0 -march=k8 -m3dnow -msse4.1 -msse5" } */
+/* { dg-options "-O0 -march=k8 -m3dnow -msse4 -msse5" } */
 
 /* Test that the intrinsics compile without optimization.  All of them are
    defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h  and mm3dnow.h
@@ -12,3 +12,98 @@
 #include <bmmintrin.h>
 #include <smmintrin.h>
 #include <mm3dnow.h>
+
+#define _CONCAT(x,y) x ## y
+
+#define test_1(func, type, op1_type, imm)				\
+  type _CONCAT(_,func) (op1_type A, int const I)			\
+  { return func (A, imm); }
+
+#define test_1x(func, type, op1_type, imm1, imm2)			\
+  type _CONCAT(_,func) (op1_type A, int const I, int const L)		\
+  { return func (A, imm1, imm2); }
+
+#define test_2(func, type, op1_type, op2_type, imm)			\
+  type _CONCAT(_,func) (op1_type A, op2_type B, int const I)		\
+  { return func (A, B, imm); }
+
+#define test_2x(func, type, op1_type, op2_type, imm1, imm2)		\
+  type _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \
+  { return func (A, B, imm1, imm2); }
+
+#define test_4(func, type, op1_type, op2_type, op3_type, op4_type, imm)	\
+  type _CONCAT(_,func) (op1_type A, op2_type B,				\
+			op3_type C, op4_type D, int const I)		\
+  { return func (A, B, C, D, imm); }
+
+
+/* Following intrinsics require immediate arguments and
+   are defined as macros for non-optimized compilations. */
+
+/* ammintrin.h */
+test_1x (_mm_extracti_si64, __m128i, __m128i, 1, 1)
+test_2x (_mm_inserti_si64, __m128i, __m128i, __m128i, 1, 1)
+
+/* smmintrin.h */
+test_2 (_mm_blend_epi16, __m128i, __m128i, __m128i, 1)
+test_2 (_mm_blend_ps, __m128, __m128, __m128, 1)
+test_2 (_mm_blend_pd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_dp_ps, __m128, __m128, __m128, 1)
+test_2 (_mm_dp_pd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_insert_ps, __m128, __m128, __m128, 1)
+test_1 (_mm_extract_ps, int, __m128, 1)
+test_2 (_mm_insert_epi8, __m128i, __m128i, int, 1)
+test_2 (_mm_insert_epi32, __m128i, __m128i, int, 1)
+#ifdef __x86_64__
+test_2 (_mm_insert_epi64, __m128i, __m128i, long long, 1)
+#endif
+test_1 (_mm_extract_epi8, int, __m128i, 1)
+test_1 (_mm_extract_epi32, int, __m128i, 1)
+#ifdef __x86_64__
+test_1 (_mm_extract_epi64, long long, __m128i, 1)
+#endif
+test_2 (_mm_mpsadbw_epu8, __m128i, __m128i, __m128i, 1)
+test_2 (_mm_cmpistrm, __m128i, __m128i, __m128i, 1)
+test_2 (_mm_cmpistri, int, __m128i, __m128i, 1)
+test_4 (_mm_cmpestrm, __m128i, __m128i, int, __m128i, int, 1)
+test_4 (_mm_cmpestri, int, __m128i, int, __m128i, int, 1)
+test_2 (_mm_cmpistra, int, __m128i, __m128i, 1)
+test_2 (_mm_cmpistrc, int, __m128i, __m128i, 1)
+test_2 (_mm_cmpistro, int, __m128i, __m128i, 1)
+test_2 (_mm_cmpistrs, int, __m128i, __m128i, 1)
+test_2 (_mm_cmpistrz, int, __m128i, __m128i, 1)
+test_4 (_mm_cmpestra, int, __m128i, int, __m128i, int, 1)
+test_4 (_mm_cmpestrc, int, __m128i, int, __m128i, int, 1)
+test_4 (_mm_cmpestro, int, __m128i, int, __m128i, int, 1)
+test_4 (_mm_cmpestrs, int, __m128i, int, __m128i, int, 1)
+test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
+
+/* tmmintrin.h */
+test_2 (_mm_alignr_epi8, __m128i, __m128i, __m128i, 1)
+test_2 (_mm_alignr_pi8, __m64, __m64, __m64, 1)
+
+/* emmintrin.h */
+test_2 (_mm_shuffle_pd, __m128d, __m128d, __m128d, 1)
+test_1 (_mm_srli_si128, __m128i, __m128i, 1)
+test_1 (_mm_slli_si128, __m128i, __m128i, 1)
+test_1 (_mm_extract_epi16, int, __m128i, 1)
+test_2 (_mm_insert_epi16, __m128i, __m128i, int, 1)
+test_1 (_mm_shufflehi_epi16, __m128i, __m128i, 1)
+test_1 (_mm_shufflelo_epi16, __m128i, __m128i, 1)
+test_1 (_mm_shuffle_epi32, __m128i, __m128i, 1)
+
+/* xmmintrin.h */
+test_2 (_mm_shuffle_ps, __m128, __m128, __m128, 1)
+test_1 (_mm_extract_pi16, int, __m64, 1)
+test_1 (_m_pextrw, int, __m64, 1)
+test_2 (_mm_insert_pi16, __m64, __m64, int, 1)
+test_2 (_m_pinsrw, __m64, __m64, int, 1)
+test_1 (_mm_shuffle_pi16, __m64, __m64, 1)
+test_1 (_m_pshufw, __m64, __m64, 1)
+test_1 (_mm_prefetch, void, void *, _MM_HINT_NTA)
+
+/* bmmintrin.h */
+test_1 (_mm_roti_epi8, __m128i, __m128i, 1)
+test_1 (_mm_roti_epi16, __m128i, __m128i, 1)
+test_1 (_mm_roti_epi32, __m128i, __m128i, 1)
+test_1 (_mm_roti_epi64, __m128i, __m128i, 1)
Index: testsuite/gcc.target/i386/sse-13.c
===================================================================
--- testsuite/gcc.target/i386/sse-13.c	(revision 130927)
+++ testsuite/gcc.target/i386/sse-13.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=k8 -m3dnow -msse4.1 -msse5 " } */
+/* { dg-options "-O2 -march=k8 -m3dnow -msse4.1 -msse5" } */
 
 /* Test that the intrinsics compile with optimization.  All of them are
    defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h
@@ -34,6 +34,34 @@
 #define __builtin_ia32_roundps(V, M) __builtin_ia32_roundps(V, 1)
 #define __builtin_ia32_roundss(D, V, M) __builtin_ia32_roundss(D, V, 1)
 #define __builtin_ia32_mpsadbw128(X, Y, M) __builtin_ia32_mpsadbw128(X, Y, 1)
+#define __builtin_ia32_pcmpistrm128(X, Y, M) \
+  __builtin_ia32_pcmpistrm128(X, Y, 1)
+#define __builtin_ia32_pcmpistri128(X, Y, M) \
+  __builtin_ia32_pcmpistri128(X, Y, 1)
+#define __builtin_ia32_pcmpestrm128(X, LX, Y, LY, M) \
+  __builtin_ia32_pcmpestrm128(X, LX, Y, LY, 1)
+#define __builtin_ia32_pcmpestri128(X, LX, Y, LY, M) \
+  __builtin_ia32_pcmpestri128(X, LX, Y, LY, 1)
+#define __builtin_ia32_pcmpistria128(X, Y, M) \
+  __builtin_ia32_pcmpistria128(X, Y, 1)
+#define __builtin_ia32_pcmpistric128(X, Y, M) \
+  __builtin_ia32_pcmpistric128(X, Y, 1)
+#define __builtin_ia32_pcmpistrio128(X, Y, M) \
+  __builtin_ia32_pcmpistrio128(X, Y, 1)
+#define __builtin_ia32_pcmpistris128(X, Y, M) \
+  __builtin_ia32_pcmpistris128(X, Y, 1)
+#define __builtin_ia32_pcmpistriz128(X, Y, M) \
+  __builtin_ia32_pcmpistriz128(X, Y, 1)
+#define __builtin_ia32_pcmpestria128(X, LX, Y, LY, M) \
+  __builtin_ia32_pcmpestria128(X, LX, Y, LY, 1)
+#define __builtin_ia32_pcmpestric128(X, LX, Y, LY, M) \
+  __builtin_ia32_pcmpestric128(X, LX, Y, LY, 1)
+#define __builtin_ia32_pcmpestrio128(X, LX, Y, LY, M) \
+  __builtin_ia32_pcmpestrio128(X, LX, Y, LY, 1)
+#define __builtin_ia32_pcmpestris128(X, LX, Y, LY, M) \
+  __builtin_ia32_pcmpestris128(X, LX, Y, LY, 1)
+#define __builtin_ia32_pcmpestriz128(X, LX, Y, LY, M) \
+  __builtin_ia32_pcmpestriz128(X, LX, Y, LY, 1)
 
 /* tmmintrin.h */
 #define __builtin_ia32_palignr128(X, Y, N) __builtin_ia32_palignr128(X, Y, 8)
Index: config/i386/bmmintrin.h
===================================================================
--- config/i386/bmmintrin.h	(revision 130927)
+++ config/i386/bmmintrin.h	(working copy)
@@ -375,14 +375,14 @@
   return  (__m128i) __builtin_ia32_protqi ((__v2di)__A, __B);
 }
 #else
-#define _mm_roti_epi8(A, B) \
-  ((__m128i) __builtin_ia32_protbi ((__v16qi)(__m128i)(A), (int)(B)))
-#define _mm_roti_epi16(A, B) \
-  ((__m128i) __builtin_ia32_protwi ((__v8hi)(__m128i)(A), (int)(B)))
-#define _mm_roti_epi32(A, B) \
-  ((__m128i) __builtin_ia32_protdi ((__v4si)(__m128i)(A), (int)(B)))
-#define _mm_roti_epi64(A, B) \
-  ((__m128i) __builtin_ia32_protqi ((__v2di)(__m128i)(A), (int)(B))
+#define _mm_roti_epi8(A, N) \
+  ((__m128i) __builtin_ia32_protbi ((__v16qi)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi16(A, N) \
+  ((__m128i) __builtin_ia32_protwi ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi32(A, N) \
+  ((__m128i) __builtin_ia32_protdi ((__v4si)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi64(A, N) \
+  ((__m128i) __builtin_ia32_protqi ((__v2di)(__m128i)(A), (int)(N)))
 #endif
 
 /* pshl */
Index: config/i386/smmintrin.h
===================================================================
--- config/i386/smmintrin.h	(revision 130927)
+++ config/i386/smmintrin.h	(working copy)
@@ -341,9 +341,9 @@
 #endif
 #else
 #define _mm_extract_epi8(X, N) \
-  __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N))
+  ((int) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N)))
 #define _mm_extract_epi32(X, N) \
-  __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N))
+  ((int) __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N)))
 
 #ifdef __x86_64__
 #define _mm_extract_epi64(X, N) \
@@ -544,7 +544,7 @@
 #define _mm_cmpestri(X, LX, Y, LY, M)					\
   ((int) __builtin_ia32_pcmpestri128 ((__v16qi)(__m128i)(X), (int)(LX),	\
 				      (__v16qi)(__m128i)(Y), (int)(LY),	\
-				      (int)(M))
+				      (int)(M)))
 #endif
 
 /* Intrinsics for text/string processing and reading values of
Index: config/i386/tmmintrin.h
===================================================================
--- config/i386/tmmintrin.h	(revision 130927)
+++ config/i386/tmmintrin.h	(working copy)
@@ -201,8 +201,8 @@
 					(__v2di)(__m128i)(Y),		\
 					(int)(N) * 8))
 #define _mm_alignr_pi8(X, Y, N)						\
-  ((__m64) __builtin_ia32_palignr ((long long)(__m64)(__X),		\
-				   (long long)(__m64)(__Y),		\
+  ((__m64) __builtin_ia32_palignr ((long long)(__m64)(X),		\
+				   (long long)(__m64)(Y),		\
 				   (int)(N) * 8))
 #endif
 
Index: config/i386/xmmintrin.h
===================================================================
--- config/i386/xmmintrin.h	(revision 130927)
+++ config/i386/xmmintrin.h	(working copy)
@@ -1007,8 +1007,8 @@
 #else
 #define _mm_extract_pi16(A, N)	\
   ((int) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N)))
-#define _m_pextrw(A, N) \
-  ((int) _mm_extract_pi16((__m64)(A),(int)(N)))
+
+#define _m_pextrw(A, N) _mm_extract_pi16(A, N)
 #endif
 
 /* Inserts word D into one of four words of A.  The selector N must be
@@ -1029,8 +1029,8 @@
 #define _mm_insert_pi16(A, D, N)				\
   ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(__m64)(A),	\
 					(int)(D), (int)(N)))
-#define _m_pinsrw(A, D, N) \
-  ((__m64) _mm_insert_pi16((__m64)(A), (int)(D), (int)(N))
+
+#define _m_pinsrw(A, D, N) _mm_insert_pi16(A, D, N)
 #endif
 
 /* Compute the element-wise maximum of signed 16-bit values.  */
@@ -1129,8 +1129,8 @@
 #else
 #define _mm_shuffle_pi16(A, N) \
   ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N)))
-#define _m_pshufw(A, N) \
-  ((__m64) _mm_shuffle_pi16 ((__m64)(A), (int)(N))
+
+#define _m_pshufw(A, N) _mm_shuffle_pi16 (A, N)
 #endif
 
 /* Conditionally store byte elements of A into P.  The high bit of each
Index: config/i386/emmintrin.h
===================================================================
--- config/i386/emmintrin.h	(revision 130927)
+++ config/i386/emmintrin.h	(working copy)
@@ -887,9 +887,9 @@
   return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
 }
 #else
-#define _mm_shuffle_pd(__A, __B, __C)					\
-  ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)__A,		\
-				   (__v2df)(__m128d)__B, (int)(__C)))
+#define _mm_shuffle_pd(A, B, N)						\
+  ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A),		\
+				   (__v2df)(__m128d)(B), (int)(N)))
 #endif
 
 static __inline __m128d __attribute__((__always_inline__, __artificial__))
@@ -1146,21 +1146,21 @@
 
 #ifdef __OPTIMIZE__
 static __inline __m128i __attribute__((__always_inline__, __artificial__))
-_mm_srli_si128 (__m128i __A, const int __B)
+_mm_srli_si128 (__m128i __A, const int __N)
 {
-  return (__m128i)__builtin_ia32_psrldqi128 (__A, __B * 8);
+  return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
 }
 
 static __inline __m128i __attribute__((__always_inline__, __artificial__))
-_mm_slli_si128 (__m128i __A, const int __B)
+_mm_slli_si128 (__m128i __A, const int __N)
 {
-  return (__m128i)__builtin_ia32_pslldqi128 (__A, __B * 8);
+  return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
 }
 #else
-#define _mm_srli_si128(__A, __B) \
-  ((__m128i)__builtin_ia32_psrldqi128 (__A, (__B) * 8))
-#define _mm_slli_si128(__A, __B) \
-  ((__m128i)__builtin_ia32_pslldqi128 (__A, (__B) * 8))
+#define _mm_srli_si128(A, N) \
+  ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
+#define _mm_slli_si128(A, N) \
+  ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
 #endif
 
 static __inline __m128i __attribute__((__always_inline__, __artificial__))
@@ -1382,12 +1382,12 @@
   return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
 }
 #else
-#define _mm_shufflehi_epi16(__A, __B) \
-  ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)__A, (int)__B))
-#define _mm_shufflelo_epi16(__A, __B) \
-  ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)__A, (int)__B))
-#define _mm_shuffle_epi32(__A, __B) \
-  ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)__A, (int)__B))
+#define _mm_shufflehi_epi16(A, N) \
+  ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_shufflelo_epi16(A, N) \
+  ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_shuffle_epi32(A, N) \
+  ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
 #endif
 
 static __inline void __attribute__((__always_inline__, __artificial__))

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2007-12-14 12:50 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-12-14 13:28 [PATCH, testsuite]: Committed: Test macroized SSE intrinsics Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).