public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] i386: Add AVX512 unaligned intrinsics
@ 2019-07-09 22:03 Sunil Pandey
  2019-07-10  9:03 ` Uros Bizjak
  0 siblings, 1 reply; 6+ messages in thread
From: Sunil Pandey @ 2019-07-09 22:03 UTC (permalink / raw)
  To: gcc-patches; +Cc: hjl.tools, ubizjak

[-- Attachment #1: Type: text/plain, Size: 1130 bytes --]

__m512i _mm512_loadu_epi32( void * sa);
__m512i _mm512_loadu_epi64( void * sa);
void _mm512_storeu_epi32(void * d, __m512i a);
void _mm256_storeu_epi32(void * d, __m256i a);
void _mm_storeu_epi32(void * d, __m128i a);
void _mm512_storeu_epi64(void * d, __m512i a);
void _mm256_storeu_epi64(void * d, __m256i a);
void _mm_storeu_epi64(void * d, __m128i a);

Tested on x86-64.

OK for trunk?

--Sunil Pandey


gcc/

        PR target/90980
        * config/i386/avx512fintrin.h (__v16si_u): New data type
        (__v8di_u): Likewise
        (_mm512_loadu_epi32): New.
        (_mm512_loadu_epi64): Likewise.
        (_mm512_storeu_epi32): Likewise.
        (_mm512_storeu_epi64): Likewise.
        * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New.
        (_mm256_storeu_epi32): Likewise.
        (_mm_storeu_epi64): Likewise.
        (_mm256_storeu_epi64): Likewise.

gcc/testsuite/

        PR target/90980
        * gcc.target/i386/avx512f-vmovdqu32-3.c: New test.
        * gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise.
        * gcc.target/i386/pr90980-1.c: Likewise.
        * gcc.target/i386/pr90980-2.c: Likewise.

[-- Attachment #2: 0001-i386-Add-AVX512-unaligned-intrinsics.patch --]
[-- Type: application/octet-stream, Size: 7793 bytes --]

From 6b81933857d41501efde56bef16c40fea3201899 Mon Sep 17 00:00:00 2001
From: Sunil K Pandey <skpgkp1@gmail.com>
Date: Mon, 8 Jul 2019 12:36:02 -0700
Subject: [PATCH] i386: Add AVX512 unaligned intrinsics

__m512i _mm512_loadu_epi32( void * sa);
__m512i _mm512_loadu_epi64( void * sa);
void _mm512_storeu_epi32(void * d, __m512i a);
void _mm256_storeu_epi32(void * d, __m256i a);
void _mm_storeu_epi32(void * d, __m128i a);
void _mm512_storeu_epi64(void * d, __m512i a);
void _mm256_storeu_epi64(void * d, __m256i a);
void _mm_storeu_epi64(void * d, __m128i a);

Tested on x86-64.

gcc/

        PR target/90980
        * config/i386/avx512fintrin.h (__v16si_u): New data type
        (__v8di_u): Likewise
        (_mm512_loadu_epi32): New.
        (_mm512_loadu_epi64): Likewise.
        (_mm512_storeu_epi32): Likewise.
        (_mm512_storeu_epi64): Likewise.
        * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New.
        (_mm256_storeu_epi32): Likewise.
        (_mm_storeu_epi64): Likewise.
        (_mm256_storeu_epi64): Likewise.

gcc/testsuite/

        PR target/90980
        * gcc.target/i386/avx512f-vmovdqu32-3.c: New test.
        * gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise.
        * gcc.target/i386/pr90980-1.c: Likewise.
        * gcc.target/i386/pr90980-2.c: Likewise.
---
 gcc/config/i386/avx512fintrin.h               | 35 +++++++++++++++++++
 gcc/config/i386/avx512vlintrin.h              | 28 +++++++++++++++
 .../gcc.target/i386/avx512f-vmovdqu32-3.c     | 16 +++++++++
 .../gcc.target/i386/avx512f-vmovdqu64-3.c     | 16 +++++++++
 gcc/testsuite/gcc.target/i386/pr90980-1.c     | 17 +++++++++
 gcc/testsuite/gcc.target/i386/pr90980-2.c     | 17 +++++++++
 6 files changed, 129 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-2.c

diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index e35eedb9268..9a4d657c108 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -46,6 +46,12 @@ typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
 
+/* Internal data types for implementing unaligned version of intrinsics.  */
+typedef int __v16si_u __attribute__ ((__vector_size__ (64),
+				      __aligned__ (1)));
+typedef long long __v8di_u __attribute__ ((__vector_size__ (64),
+					   __aligned__ (1)));
+
 /* The Intel API is flexible enough that we must allow aliasing with other
    vector types, and their scalar components.  */
 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
@@ -405,6 +411,35 @@ _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
 						   (__mmask8) __U);
 }
 
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_epi32 (void const *__P)
+{
+  __v16si_u __X = *(__v16si_u *) __P;
+  return (__m512i) __X;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_epi64 (void const *__P)
+{
+  return *(__v8di_u *) __P;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_epi32 (void *__P, __m512i __A)
+{
+  *(__v16si_u *) __P = (__v16si_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_epi64 (void *__P, __m512i __A)
+{
+  *(__v8di_u *) __P = (__v8di_u) __A;
+}
+
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_store_pd (void *__P, __m512d __A)
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index 3eaf817f898..06bcc50b6b4 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -598,6 +598,34 @@ _mm256_store_epi64 (void *__P, __m256i __A)
   *(__m256i *) __P = __A;
 }
 
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_epi32 (void *__P, __m128i __A)
+{
+  *(__m128i_u *) __P = (__m128i_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_epi32 (void *__P, __m256i __A)
+{
+  *(__m256i_u *) __P = (__m256i_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_epi64 (void *__P, __m128i __A)
+{
+  *(__m128i_u *) __P = (__m128i_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_epi64 (void *__P, __m256i __A)
+{
+  *(__m256i_u *) __P = (__m256i_u) __A;
+}
+
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_store_epi64 (void *__P, __m128i __A)
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-3.c
new file mode 100644
index 00000000000..8f07b6e986a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+int *a,*b;
+volatile __m512i zz;
+
+void extern
+avx512f_test (void)
+{
+  zz = _mm512_loadu_epi32 (a);
+  _mm512_storeu_epi32 (b, zz);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-3.c
new file mode 100644
index 00000000000..c2805116eae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+long long *a;
+volatile __m512i zz;
+
+void extern
+avx512f_test (void)
+{
+  zz = _mm512_loadu_epi64 (a);
+  _mm512_storeu_epi64 (a, zz);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90980-1.c b/gcc/testsuite/gcc.target/i386/pr90980-1.c
new file mode 100644
index 00000000000..72a30dc8da2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90980-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "(?:vmovups|vmovdqu)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */
+
+#include <immintrin.h>
+
+int *a;
+long long *b;
+volatile __m128i xx;
+volatile __m128i xx1;
+
+void extern
+avx512vl_test (void)
+{
+  _mm_storeu_epi32 (a, xx);
+  _mm_storeu_epi64 (b, xx1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90980-2.c b/gcc/testsuite/gcc.target/i386/pr90980-2.c
new file mode 100644
index 00000000000..b1980e65140
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90980-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu\[0-9\]*\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */
+
+#include <immintrin.h>
+
+int *a;
+long long *b;
+volatile __m256i yy;
+volatile __m256i yy1;
+
+void extern
+avx512vl_test (void)
+{
+  _mm256_storeu_epi32 (a, yy);
+  _mm256_storeu_epi64 (b, yy1);
+}
-- 
2.20.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-07-11 18:43 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-09 22:03 [PATCH] i386: Add AVX512 unaligned intrinsics Sunil Pandey
2019-07-10  9:03 ` Uros Bizjak
2019-07-10 19:20   ` Sunil Pandey
2019-07-10 19:30     ` Uros Bizjak
2019-07-11 17:04       ` Sunil Pandey
2019-07-11 18:47         ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).