From: Sunil Pandey <skpgkp1@gmail.com>
To: gcc-patches@gcc.gnu.org
Cc: hjl.tools@gmail.com, ubizjak@gmail.com
Subject: [PATCH] i386: Add AVX512 unaligned intrinsics
Date: Tue, 09 Jul 2019 22:03:00 -0000 [thread overview]
Message-ID: <CAFMdu1KJeMncx_JM0gDvSqP48k3iMbzqzKcCbQ1kdDE6xnesRA@mail.gmail.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 1130 bytes --]
__m512i _mm512_loadu_epi32( void * sa);
__m512i _mm512_loadu_epi64( void * sa);
void _mm512_storeu_epi32(void * d, __m512i a);
void _mm256_storeu_epi32(void * d, __m256i a);
void _mm_storeu_epi32(void * d, __m128i a);
void _mm512_storeu_epi64(void * d, __m512i a);
void _mm256_storeu_epi64(void * d, __m256i a);
void _mm_storeu_epi64(void * d, __m128i a);
Tested on x86-64.
OK for trunk?
--Sunil Pandey
gcc/
PR target/90980
* config/i386/avx512fintrin.h (__v16si_u): New data type
(__v8di_u): Likewise
(_mm512_loadu_epi32): New.
(_mm512_loadu_epi64): Likewise.
(_mm512_storeu_epi32): Likewise.
(_mm512_storeu_epi64): Likewise.
* config/i386/avx512vlintrin.h (_mm_storeu_epi32): New.
(_mm256_storeu_epi32): Likewise.
(_mm_storeu_epi64): Likewise.
(_mm256_storeu_epi64): Likewise.
gcc/testsuite/
PR target/90980
* gcc.target/i386/avx512f-vmovdqu32-3.c: New test.
* gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise.
* gcc.target/i386/pr90980-1.c: Likewise.
* gcc.target/i386/pr90980-2.c: Likewise.
[-- Attachment #2: 0001-i386-Add-AVX512-unaligned-intrinsics.patch --]
[-- Type: application/octet-stream, Size: 7793 bytes --]
From 6b81933857d41501efde56bef16c40fea3201899 Mon Sep 17 00:00:00 2001
From: Sunil K Pandey <skpgkp1@gmail.com>
Date: Mon, 8 Jul 2019 12:36:02 -0700
Subject: [PATCH] i386: Add AVX512 unaligned intrinsics
__m512i _mm512_loadu_epi32( void * sa);
__m512i _mm512_loadu_epi64( void * sa);
void _mm512_storeu_epi32(void * d, __m512i a);
void _mm256_storeu_epi32(void * d, __m256i a);
void _mm_storeu_epi32(void * d, __m128i a);
void _mm512_storeu_epi64(void * d, __m512i a);
void _mm256_storeu_epi64(void * d, __m256i a);
void _mm_storeu_epi64(void * d, __m128i a);
Tested on x86-64.
gcc/
PR target/90980
* config/i386/avx512fintrin.h (__v16si_u): New data type
(__v8di_u): Likewise
(_mm512_loadu_epi32): New.
(_mm512_loadu_epi64): Likewise.
(_mm512_storeu_epi32): Likewise.
(_mm512_storeu_epi64): Likewise.
* config/i386/avx512vlintrin.h (_mm_storeu_epi32): New.
(_mm256_storeu_epi32): Likewise.
(_mm_storeu_epi64): Likewise.
(_mm256_storeu_epi64): Likewise.
gcc/testsuite/
PR target/90980
* gcc.target/i386/avx512f-vmovdqu32-3.c: New test.
* gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise.
* gcc.target/i386/pr90980-1.c: Likewise.
* gcc.target/i386/pr90980-2.c: Likewise.
---
gcc/config/i386/avx512fintrin.h | 35 +++++++++++++++++++
gcc/config/i386/avx512vlintrin.h | 28 +++++++++++++++
.../gcc.target/i386/avx512f-vmovdqu32-3.c | 16 +++++++++
.../gcc.target/i386/avx512f-vmovdqu64-3.c | 16 +++++++++
gcc/testsuite/gcc.target/i386/pr90980-1.c | 17 +++++++++
gcc/testsuite/gcc.target/i386/pr90980-2.c | 17 +++++++++
6 files changed, 129 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-2.c
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index e35eedb9268..9a4d657c108 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -46,6 +46,12 @@ typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
typedef char __v64qi __attribute__ ((__vector_size__ (64)));
typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
+/* Internal data types for implementing unaligned version of intrinsics. */
+typedef int __v16si_u __attribute__ ((__vector_size__ (64),
+ __aligned__ (1)));
+typedef long long __v8di_u __attribute__ ((__vector_size__ (64),
+ __aligned__ (1)));
+
/* The Intel API is flexible enough that we must allow aliasing with other
vector types, and their scalar components. */
typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
@@ -405,6 +411,35 @@ _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
(__mmask8) __U);
}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_epi32 (void const *__P)
+{
+ __v16si_u __X = *(__v16si_u *) __P;
+ return (__m512i) __X;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_epi64 (void const *__P)
+{
+ return *(__v8di_u *) __P;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_epi32 (void *__P, __m512i __A)
+{
+ *(__v16si_u *) __P = (__v16si_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_epi64 (void *__P, __m512i __A)
+{
+ *(__v8di_u *) __P = (__v8di_u) __A;
+}
+
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_store_pd (void *__P, __m512d __A)
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index 3eaf817f898..06bcc50b6b4 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -598,6 +598,34 @@ _mm256_store_epi64 (void *__P, __m256i __A)
*(__m256i *) __P = __A;
}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_epi32 (void *__P, __m128i __A)
+{
+ *(__m128i_u *) __P = (__m128i_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_epi32 (void *__P, __m256i __A)
+{
+ *(__m256i_u *) __P = (__m256i_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_epi64 (void *__P, __m128i __A)
+{
+ *(__m128i_u *) __P = (__m128i_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_epi64 (void *__P, __m256i __A)
+{
+ *(__m256i_u *) __P = (__m256i_u) __A;
+}
+
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_epi64 (void *__P, __m128i __A)
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-3.c
new file mode 100644
index 00000000000..8f07b6e986a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+int *a,*b;
+volatile __m512i zz;
+
+void extern
+avx512f_test (void)
+{
+ zz = _mm512_loadu_epi32 (a);
+ _mm512_storeu_epi32 (b, zz);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-3.c
new file mode 100644
index 00000000000..c2805116eae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+long long *a;
+volatile __m512i zz;
+
+void extern
+avx512f_test (void)
+{
+ zz = _mm512_loadu_epi64 (a);
+ _mm512_storeu_epi64 (a, zz);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90980-1.c b/gcc/testsuite/gcc.target/i386/pr90980-1.c
new file mode 100644
index 00000000000..72a30dc8da2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90980-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "(?:vmovups|vmovdqu)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */
+
+#include <immintrin.h>
+
+int *a;
+long long *b;
+volatile __m128i xx;
+volatile __m128i xx1;
+
+void extern
+avx512vl_test (void)
+{
+ _mm_storeu_epi32 (a, xx);
+ _mm_storeu_epi64 (b, xx1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90980-2.c b/gcc/testsuite/gcc.target/i386/pr90980-2.c
new file mode 100644
index 00000000000..b1980e65140
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90980-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu\[0-9\]*\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */
+
+#include <immintrin.h>
+
+int *a;
+long long *b;
+volatile __m256i yy;
+volatile __m256i yy1;
+
+void extern
+avx512vl_test (void)
+{
+ _mm256_storeu_epi32 (a, yy);
+ _mm256_storeu_epi64 (b, yy1);
+}
--
2.20.1
next reply other threads:[~2019-07-09 21:44 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-07-09 22:03 Sunil Pandey [this message]
2019-07-10 9:03 ` Uros Bizjak
2019-07-10 19:20 ` Sunil Pandey
2019-07-10 19:30 ` Uros Bizjak
2019-07-11 17:04 ` Sunil Pandey
2019-07-11 18:47 ` Uros Bizjak
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAFMdu1KJeMncx_JM0gDvSqP48k3iMbzqzKcCbQ1kdDE6xnesRA@mail.gmail.com \
--to=skpgkp1@gmail.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=hjl.tools@gmail.com \
--cc=ubizjak@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).