public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] i386: Add AVX512 unaligned intrinsics
@ 2019-07-09 22:03 Sunil Pandey
  2019-07-10  9:03 ` Uros Bizjak
  0 siblings, 1 reply; 6+ messages in thread
From: Sunil Pandey @ 2019-07-09 22:03 UTC (permalink / raw)
  To: gcc-patches; +Cc: hjl.tools, ubizjak

[-- Attachment #1: Type: text/plain, Size: 1130 bytes --]

__m512i _mm512_loadu_epi32( void * sa);
__m512i _mm512_loadu_epi64( void * sa);
void _mm512_storeu_epi32(void * d, __m512i a);
void _mm256_storeu_epi32(void * d, __m256i a);
void _mm_storeu_epi32(void * d, __m128i a);
void _mm512_storeu_epi64(void * d, __m512i a);
void _mm256_storeu_epi64(void * d, __m256i a);
void _mm_storeu_epi64(void * d, __m128i a);

Tested on x86-64.

OK for trunk?

--Sunil Pandey


gcc/

        PR target/90980
        * config/i386/avx512fintrin.h (__v16si_u): New data type
        (__v8di_u): Likewise
        (_mm512_loadu_epi32): New.
        (_mm512_loadu_epi64): Likewise.
        (_mm512_storeu_epi32): Likewise.
        (_mm512_storeu_epi64): Likewise.
        * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New.
        (_mm256_storeu_epi32): Likewise.
        (_mm_storeu_epi64): Likewise.
        (_mm256_storeu_epi64): Likewise.

gcc/testsuite/

        PR target/90980
        * gcc.target/i386/avx512f-vmovdqu32-3.c: New test.
        * gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise.
        * gcc.target/i386/pr90980-1.c: Likewise.
        * gcc.target/i386/pr90980-2.c: Likewise.

[-- Attachment #2: 0001-i386-Add-AVX512-unaligned-intrinsics.patch --]
[-- Type: application/octet-stream, Size: 7793 bytes --]

From 6b81933857d41501efde56bef16c40fea3201899 Mon Sep 17 00:00:00 2001
From: Sunil K Pandey <skpgkp1@gmail.com>
Date: Mon, 8 Jul 2019 12:36:02 -0700
Subject: [PATCH] i386: Add AVX512 unaligned intrinsics

__m512i _mm512_loadu_epi32( void * sa);
__m512i _mm512_loadu_epi64( void * sa);
void _mm512_storeu_epi32(void * d, __m512i a);
void _mm256_storeu_epi32(void * d, __m256i a);
void _mm_storeu_epi32(void * d, __m128i a);
void _mm512_storeu_epi64(void * d, __m512i a);
void _mm256_storeu_epi64(void * d, __m256i a);
void _mm_storeu_epi64(void * d, __m128i a);

Tested on x86-64.

gcc/

        PR target/90980
        * config/i386/avx512fintrin.h (__v16si_u): New data type
        (__v8di_u): Likewise
        (_mm512_loadu_epi32): New.
        (_mm512_loadu_epi64): Likewise.
        (_mm512_storeu_epi32): Likewise.
        (_mm512_storeu_epi64): Likewise.
        * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New.
        (_mm256_storeu_epi32): Likewise.
        (_mm_storeu_epi64): Likewise.
        (_mm256_storeu_epi64): Likewise.

gcc/testsuite/

        PR target/90980
        * gcc.target/i386/avx512f-vmovdqu32-3.c: New test.
        * gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise.
        * gcc.target/i386/pr90980-1.c: Likewise.
        * gcc.target/i386/pr90980-2.c: Likewise.
---
 gcc/config/i386/avx512fintrin.h               | 35 +++++++++++++++++++
 gcc/config/i386/avx512vlintrin.h              | 28 +++++++++++++++
 .../gcc.target/i386/avx512f-vmovdqu32-3.c     | 16 +++++++++
 .../gcc.target/i386/avx512f-vmovdqu64-3.c     | 16 +++++++++
 gcc/testsuite/gcc.target/i386/pr90980-1.c     | 17 +++++++++
 gcc/testsuite/gcc.target/i386/pr90980-2.c     | 17 +++++++++
 6 files changed, 129 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-2.c

diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index e35eedb9268..9a4d657c108 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -46,6 +46,12 @@ typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
 
+/* Internal data types for implementing unaligned version of intrinsics.  */
+typedef int __v16si_u __attribute__ ((__vector_size__ (64),
+				      __aligned__ (1)));
+typedef long long __v8di_u __attribute__ ((__vector_size__ (64),
+					   __aligned__ (1)));
+
 /* The Intel API is flexible enough that we must allow aliasing with other
    vector types, and their scalar components.  */
 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
@@ -405,6 +411,35 @@ _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
 						   (__mmask8) __U);
 }
 
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_epi32 (void const *__P)
+{
+  __v16si_u __X = *(__v16si_u *) __P;
+  return (__m512i) __X;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_epi64 (void const *__P)
+{
+  return *(__v8di_u *) __P;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_epi32 (void *__P, __m512i __A)
+{
+  *(__v16si_u *) __P = (__v16si_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_epi64 (void *__P, __m512i __A)
+{
+  *(__v8di_u *) __P = (__v8di_u) __A;
+}
+
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_store_pd (void *__P, __m512d __A)
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index 3eaf817f898..06bcc50b6b4 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -598,6 +598,34 @@ _mm256_store_epi64 (void *__P, __m256i __A)
   *(__m256i *) __P = __A;
 }
 
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_epi32 (void *__P, __m128i __A)
+{
+  *(__m128i_u *) __P = (__m128i_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_epi32 (void *__P, __m256i __A)
+{
+  *(__m256i_u *) __P = (__m256i_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_epi64 (void *__P, __m128i __A)
+{
+  *(__m128i_u *) __P = (__m128i_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_epi64 (void *__P, __m256i __A)
+{
+  *(__m256i_u *) __P = (__m256i_u) __A;
+}
+
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_store_epi64 (void *__P, __m128i __A)
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-3.c
new file mode 100644
index 00000000000..8f07b6e986a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+int *a,*b;
+volatile __m512i zz;
+
+void extern
+avx512f_test (void)
+{
+  zz = _mm512_loadu_epi32 (a);
+  _mm512_storeu_epi32 (b, zz);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-3.c
new file mode 100644
index 00000000000..c2805116eae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+long long *a;
+volatile __m512i zz;
+
+void extern
+avx512f_test (void)
+{
+  zz = _mm512_loadu_epi64 (a);
+  _mm512_storeu_epi64 (a, zz);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90980-1.c b/gcc/testsuite/gcc.target/i386/pr90980-1.c
new file mode 100644
index 00000000000..72a30dc8da2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90980-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "(?:vmovups|vmovdqu)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */
+
+#include <immintrin.h>
+
+int *a;
+long long *b;
+volatile __m128i xx;
+volatile __m128i xx1;
+
+void extern
+avx512vl_test (void)
+{
+  _mm_storeu_epi32 (a, xx);
+  _mm_storeu_epi64 (b, xx1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90980-2.c b/gcc/testsuite/gcc.target/i386/pr90980-2.c
new file mode 100644
index 00000000000..b1980e65140
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90980-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu\[0-9\]*\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */
+
+#include <immintrin.h>
+
+int *a;
+long long *b;
+volatile __m256i yy;
+volatile __m256i yy1;
+
+void extern
+avx512vl_test (void)
+{
+  _mm256_storeu_epi32 (a, yy);
+  _mm256_storeu_epi64 (b, yy1);
+}
-- 
2.20.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] i386: Add AVX512 unaligned intrinsics
  2019-07-09 22:03 [PATCH] i386: Add AVX512 unaligned intrinsics Sunil Pandey
@ 2019-07-10  9:03 ` Uros Bizjak
  2019-07-10 19:20   ` Sunil Pandey
  0 siblings, 1 reply; 6+ messages in thread
From: Uros Bizjak @ 2019-07-10  9:03 UTC (permalink / raw)
  To: Sunil Pandey; +Cc: gcc-patches, H. J. Lu

On Tue, Jul 9, 2019 at 11:44 PM Sunil Pandey <skpgkp1@gmail.com> wrote:
>
> __m512i _mm512_loadu_epi32( void * sa);
> __m512i _mm512_loadu_epi64( void * sa);
> void _mm512_storeu_epi32(void * d, __m512i a);
> void _mm256_storeu_epi32(void * d, __m256i a);
> void _mm_storeu_epi32(void * d, __m128i a);
> void _mm512_storeu_epi64(void * d, __m512i a);
> void _mm256_storeu_epi64(void * d, __m256i a);
> void _mm_storeu_epi64(void * d, __m128i a);
>
> Tested on x86-64.
>
> OK for trunk?
>
> --Sunil Pandey
>
>
> gcc/
>
>         PR target/90980
>         * config/i386/avx512fintrin.h (__v16si_u): New data type
>         (__v8di_u): Likewise
>         (_mm512_loadu_epi32): New.
>         (_mm512_loadu_epi64): Likewise.
>         (_mm512_storeu_epi32): Likewise.
>         (_mm512_storeu_epi64): Likewise.
>         * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New.
>         (_mm256_storeu_epi32): Likewise.
>         (_mm_storeu_epi64): Likewise.
>         (_mm256_storeu_epi64): Likewise.
>
> gcc/testsuite/
>
>         PR target/90980
>         * gcc.target/i386/avx512f-vmovdqu32-3.c: New test.
>         * gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise.
>         * gcc.target/i386/pr90980-1.c: Likewise.
>         * gcc.target/i386/pr90980-2.c: Likewise.

+/* Internal data types for implementing unaligned version of intrinsics.  */
+typedef int __v16si_u __attribute__ ((__vector_size__ (64),
+      __aligned__ (1)));
+typedef long long __v8di_u __attribute__ ((__vector_size__ (64),
+   __aligned__ (1)));

You should define only one generic __m512i_u type, something like:

typedef long long __m512i_u __attribute__ ((__vector_size__ (64),
__may_alias__, __aligned__ (1)));

Please see avxintrin.h how __m256i_u is defined and used.

Uros.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] i386: Add AVX512 unaligned intrinsics
  2019-07-10  9:03 ` Uros Bizjak
@ 2019-07-10 19:20   ` Sunil Pandey
  2019-07-10 19:30     ` Uros Bizjak
  0 siblings, 1 reply; 6+ messages in thread
From: Sunil Pandey @ 2019-07-10 19:20 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches, H. J. Lu

[-- Attachment #1: Type: text/plain, Size: 3123 bytes --]

Thanks Uros. I incorporated suggested changes in attached patch.

--Sunil Pandey

    i386: Add AVX512 unaligned intrinsics

    __m512i _mm512_loadu_epi32( void * sa);
    __m512i _mm512_loadu_epi64( void * sa);
    void _mm512_storeu_epi32(void * d, __m512i a);
    void _mm256_storeu_epi32(void * d, __m256i a);
    void _mm_storeu_epi32(void * d, __m128i a);
    void _mm512_storeu_epi64(void * d, __m512i a);
    void _mm256_storeu_epi64(void * d, __m256i a);
    void _mm_storeu_epi64(void * d, __m128i a);

    Tested on x86-64.

    gcc/

            PR target/90980
            * config/i386/avx512fintrin.h (_mm512_loadu_epi32): New.
            (_mm512_loadu_epi64): Likewise.
            (_mm512_storeu_epi32): Likewise.
            (_mm512_storeu_epi64): Likewise.
            * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New.
            (_mm256_storeu_epi32): Likewise.
            (_mm_storeu_epi64): Likewise.
            (_mm256_storeu_epi64): Likewise.

    gcc/testsuite/

            PR target/90980
            * gcc.target/i386/pr90980-1.c: New test.
            * gcc.target/i386/pr90980-2.c: Likewise.
            * gcc.target/i386/pr90980-3.c: Likewise.

On Tue, Jul 9, 2019 at 11:39 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Tue, Jul 9, 2019 at 11:44 PM Sunil Pandey <skpgkp1@gmail.com> wrote:
> >
> > __m512i _mm512_loadu_epi32( void * sa);
> > __m512i _mm512_loadu_epi64( void * sa);
> > void _mm512_storeu_epi32(void * d, __m512i a);
> > void _mm256_storeu_epi32(void * d, __m256i a);
> > void _mm_storeu_epi32(void * d, __m128i a);
> > void _mm512_storeu_epi64(void * d, __m512i a);
> > void _mm256_storeu_epi64(void * d, __m256i a);
> > void _mm_storeu_epi64(void * d, __m128i a);
> >
> > Tested on x86-64.
> >
> > OK for trunk?
> >
> > --Sunil Pandey
> >
> >
> > gcc/
> >
> >         PR target/90980
> >         * config/i386/avx512fintrin.h (__v16si_u): New data type
> >         (__v8di_u): Likewise
> >         (_mm512_loadu_epi32): New.
> >         (_mm512_loadu_epi64): Likewise.
> >         (_mm512_storeu_epi32): Likewise.
> >         (_mm512_storeu_epi64): Likewise.
> >         * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New.
> >         (_mm256_storeu_epi32): Likewise.
> >         (_mm_storeu_epi64): Likewise.
> >         (_mm256_storeu_epi64): Likewise.
> >
> > gcc/testsuite/
> >
> >         PR target/90980
> >         * gcc.target/i386/avx512f-vmovdqu32-3.c: New test.
> >         * gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise.
> >         * gcc.target/i386/pr90980-1.c: Likewise.
> >         * gcc.target/i386/pr90980-2.c: Likewise.
>
> +/* Internal data types for implementing unaligned version of intrinsics.  */
> +typedef int __v16si_u __attribute__ ((__vector_size__ (64),
> +      __aligned__ (1)));
> +typedef long long __v8di_u __attribute__ ((__vector_size__ (64),
> +   __aligned__ (1)));
>
> You should define only one generic __m512i_u type, something like:
>
> typedef long long __m512i_u __attribute__ ((__vector_size__ (64),
> __may_alias__, __aligned__ (1)));
>
> Please see avxintrin.h how __m256i_u is defined and used.
>
> Uros.

[-- Attachment #2: 0001-i386-Add-AVX512-unaligned-intrinsics.patch --]
[-- Type: application/octet-stream, Size: 6129 bytes --]

From 7a0a4bb7c4d4115acf559094a7317d8b3c0ced06 Mon Sep 17 00:00:00 2001
From: Sunil K Pandey <skpgkp1@gmail.com>
Date: Mon, 8 Jul 2019 12:36:02 -0700
Subject: [PATCH] i386: Add AVX512 unaligned intrinsics

__m512i _mm512_loadu_epi32( void * sa);
__m512i _mm512_loadu_epi64( void * sa);
void _mm512_storeu_epi32(void * d, __m512i a);
void _mm256_storeu_epi32(void * d, __m256i a);
void _mm_storeu_epi32(void * d, __m128i a);
void _mm512_storeu_epi64(void * d, __m512i a);
void _mm256_storeu_epi64(void * d, __m256i a);
void _mm_storeu_epi64(void * d, __m128i a);

Tested on x86-64.

gcc/

        PR target/90980
        * config/i386/avx512fintrin.h (_mm512_loadu_epi32): New.
        (_mm512_loadu_epi64): Likewise.
        (_mm512_storeu_epi32): Likewise.
        (_mm512_storeu_epi64): Likewise.
        * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New.
        (_mm256_storeu_epi32): Likewise.
        (_mm_storeu_epi64): Likewise.
        (_mm256_storeu_epi64): Likewise.

gcc/testsuite/

        PR target/90980
        * gcc.target/i386/pr90980-1.c: New test.
        * gcc.target/i386/pr90980-2.c: Likewise.
        * gcc.target/i386/pr90980-3.c: Likewise.
---
 gcc/config/i386/avx512fintrin.h           | 28 +++++++++++++++++++++++
 gcc/config/i386/avx512vlintrin.h          | 28 +++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr90980-1.c | 17 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr90980-2.c | 17 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr90980-3.c | 20 ++++++++++++++++
 5 files changed, 110 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-3.c

diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index e35eedb9268..71a7dc5a00c 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -405,6 +405,34 @@ _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
 						   (__mmask8) __U);
 }
 
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_epi32 (void const *__P)
+{
+  return *(__m512i_u *) __P;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_epi64 (void const *__P)
+{
+  return *(__m512i_u *) __P;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_epi32 (void *__P, __m512i __A)
+{
+  *(__m512i_u *) __P = (__m512i_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_epi64 (void *__P, __m512i __A)
+{
+  *(__m512i_u *) __P = (__m512i_u) __A;
+}
+
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_store_pd (void *__P, __m512d __A)
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index 3eaf817f898..06bcc50b6b4 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -598,6 +598,34 @@ _mm256_store_epi64 (void *__P, __m256i __A)
   *(__m256i *) __P = __A;
 }
 
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_epi32 (void *__P, __m128i __A)
+{
+  *(__m128i_u *) __P = (__m128i_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_epi32 (void *__P, __m256i __A)
+{
+  *(__m256i_u *) __P = (__m256i_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_epi64 (void *__P, __m128i __A)
+{
+  *(__m128i_u *) __P = (__m128i_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_epi64 (void *__P, __m256i __A)
+{
+  *(__m256i_u *) __P = (__m256i_u) __A;
+}
+
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_store_epi64 (void *__P, __m128i __A)
diff --git a/gcc/testsuite/gcc.target/i386/pr90980-1.c b/gcc/testsuite/gcc.target/i386/pr90980-1.c
new file mode 100644
index 00000000000..72a30dc8da2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90980-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "(?:vmovups|vmovdqu)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */
+
+#include <immintrin.h>
+
+int *a;
+long long *b;
+volatile __m128i xx;
+volatile __m128i xx1;
+
+void extern
+avx512vl_test (void)
+{
+  _mm_storeu_epi32 (a, xx);
+  _mm_storeu_epi64 (b, xx1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90980-2.c b/gcc/testsuite/gcc.target/i386/pr90980-2.c
new file mode 100644
index 00000000000..b1980e65140
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90980-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu\[0-9\]*\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */
+
+#include <immintrin.h>
+
+int *a;
+long long *b;
+volatile __m256i yy;
+volatile __m256i yy1;
+
+void extern
+avx512vl_test (void)
+{
+  _mm256_storeu_epi32 (a, yy);
+  _mm256_storeu_epi64 (b, yy1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90980-3.c b/gcc/testsuite/gcc.target/i386/pr90980-3.c
new file mode 100644
index 00000000000..d839ee011aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90980-3.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */
+
+#include <immintrin.h>
+
+int *a;
+long long *b;
+volatile __m512i zz;
+volatile __m512i zz1;
+
+void extern
+avx512f_test (void)
+{
+  zz = _mm512_loadu_epi32 (a);
+  _mm512_storeu_epi32 (a, zz);
+  zz1 = _mm512_loadu_epi64 (b);
+  _mm512_storeu_epi64 (b, zz1);
+}
-- 
2.20.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] i386: Add AVX512 unaligned intrinsics
  2019-07-10 19:20   ` Sunil Pandey
@ 2019-07-10 19:30     ` Uros Bizjak
  2019-07-11 17:04       ` Sunil Pandey
  0 siblings, 1 reply; 6+ messages in thread
From: Uros Bizjak @ 2019-07-10 19:30 UTC (permalink / raw)
  To: Sunil Pandey; +Cc: gcc-patches, H. J. Lu

On Wed, Jul 10, 2019 at 9:11 PM Sunil Pandey <skpgkp1@gmail.com> wrote:
>
> Thanks Uros. I incorporated suggested changes in attached patch.
>
> --Sunil Pandey
>
>     i386: Add AVX512 unaligned intrinsics
>
>     __m512i _mm512_loadu_epi32( void * sa);
>     __m512i _mm512_loadu_epi64( void * sa);
>     void _mm512_storeu_epi32(void * d, __m512i a);
>     void _mm256_storeu_epi32(void * d, __m256i a);
>     void _mm_storeu_epi32(void * d, __m128i a);
>     void _mm512_storeu_epi64(void * d, __m512i a);
>     void _mm256_storeu_epi64(void * d, __m256i a);
>     void _mm_storeu_epi64(void * d, __m128i a);
>
>     Tested on x86-64.
>
>     gcc/
>
>             PR target/90980
>             * config/i386/avx512fintrin.h (_mm512_loadu_epi32): New.
>             (_mm512_loadu_epi64): Likewise.
>             (_mm512_storeu_epi32): Likewise.
>             (_mm512_storeu_epi64): Likewise.
>             * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New.
>             (_mm256_storeu_epi32): Likewise.
>             (_mm_storeu_epi64): Likewise.
>             (_mm256_storeu_epi64): Likewise.
>
>     gcc/testsuite/
>
>             PR target/90980
>             * gcc.target/i386/pr90980-1.c: New test.
>             * gcc.target/i386/pr90980-2.c: Likewise.
>             * gcc.target/i386/pr90980-3.c: Likewise.

Looks good, but please put new intrinsics nearby existing intrinsics,
so we will have e.g.:

_mm512_loadu_epi32
_mm512_mask_loadu_epi32
_mm512_maskz_loadu_epi32

and in similar way for other loads and stores.

Uros.

>
> On Tue, Jul 9, 2019 at 11:39 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Tue, Jul 9, 2019 at 11:44 PM Sunil Pandey <skpgkp1@gmail.com> wrote:
> > >
> > > __m512i _mm512_loadu_epi32( void * sa);
> > > __m512i _mm512_loadu_epi64( void * sa);
> > > void _mm512_storeu_epi32(void * d, __m512i a);
> > > void _mm256_storeu_epi32(void * d, __m256i a);
> > > void _mm_storeu_epi32(void * d, __m128i a);
> > > void _mm512_storeu_epi64(void * d, __m512i a);
> > > void _mm256_storeu_epi64(void * d, __m256i a);
> > > void _mm_storeu_epi64(void * d, __m128i a);
> > >
> > > Tested on x86-64.
> > >
> > > OK for trunk?
> > >
> > > --Sunil Pandey
> > >
> > >
> > > gcc/
> > >
> > >         PR target/90980
> > >         * config/i386/avx512fintrin.h (__v16si_u): New data type
> > >         (__v8di_u): Likewise
> > >         (_mm512_loadu_epi32): New.
> > >         (_mm512_loadu_epi64): Likewise.
> > >         (_mm512_storeu_epi32): Likewise.
> > >         (_mm512_storeu_epi64): Likewise.
> > >         * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New.
> > >         (_mm256_storeu_epi32): Likewise.
> > >         (_mm_storeu_epi64): Likewise.
> > >         (_mm256_storeu_epi64): Likewise.
> > >
> > > gcc/testsuite/
> > >
> > >         PR target/90980
> > >         * gcc.target/i386/avx512f-vmovdqu32-3.c: New test.
> > >         * gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise.
> > >         * gcc.target/i386/pr90980-1.c: Likewise.
> > >         * gcc.target/i386/pr90980-2.c: Likewise.
> >
> > +/* Internal data types for implementing unaligned version of intrinsics.  */
> > +typedef int __v16si_u __attribute__ ((__vector_size__ (64),
> > +      __aligned__ (1)));
> > +typedef long long __v8di_u __attribute__ ((__vector_size__ (64),
> > +   __aligned__ (1)));
> >
> > You should define only one generic __m512i_u type, something like:
> >
> > typedef long long __m512i_u __attribute__ ((__vector_size__ (64),
> > __may_alias__, __aligned__ (1)));
> >
> > Please see avxintrin.h how __m256i_u is defined and used.
> >
> > Uros.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] i386: Add AVX512 unaligned intrinsics
  2019-07-10 19:30     ` Uros Bizjak
@ 2019-07-11 17:04       ` Sunil Pandey
  2019-07-11 18:47         ` Uros Bizjak
  0 siblings, 1 reply; 6+ messages in thread
From: Sunil Pandey @ 2019-07-11 17:04 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches, H. J. Lu

[-- Attachment #1: Type: text/plain, Size: 4900 bytes --]

Fixed.

--Sunil Pandey

i386: Add AVX512 unaligned intrinsics

__m512i _mm512_loadu_epi64( void * sa);
void _mm512_storeu_epi64(void * d, __m512i a);
__m512i _mm512_loadu_epi32( void * sa);
void _mm512_storeu_epi32(void * d, __m512i a);
void _mm256_storeu_epi64(void * d, __m256i a);
void _mm_storeu_epi64(void * d, __m128i a);
void _mm256_storeu_epi32(void * d, __m256i a);
void _mm_storeu_epi32(void * d, __m128i a);

Tested on x86-64.

gcc/

        PR target/90980
        * config/i386/avx512fintrin.h (_mm512_loadu_epi64): New.
        (_mm512_storeu_epi64): Likewise.
        (_mm512_loadu_epi32): Likewise.
        (_mm512_storeu_epi32): Likewise.
        * config/i386/avx512vlintrin.h (_mm256_storeu_epi64): New.
        (_mm_storeu_epi64): Likewise.
        (_mm256_storeu_epi32): Likewise.
        (_mm_storeu_epi32): Likewise.

gcc/testsuite/

        PR target/90980
        * gcc.target/i386/pr90980-1.c: New test.
        * gcc.target/i386/pr90980-2.c: Likewise.
        * gcc.target/i386/pr90980-3.c: Likewise.

On Wed, Jul 10, 2019 at 12:20 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Wed, Jul 10, 2019 at 9:11 PM Sunil Pandey <skpgkp1@gmail.com> wrote:
> >
> > Thanks Uros. I incorporated suggested changes in attached patch.
> >
> > --Sunil Pandey
> >
> >     i386: Add AVX512 unaligned intrinsics
> >
> >     __m512i _mm512_loadu_epi32( void * sa);
> >     __m512i _mm512_loadu_epi64( void * sa);
> >     void _mm512_storeu_epi32(void * d, __m512i a);
> >     void _mm256_storeu_epi32(void * d, __m256i a);
> >     void _mm_storeu_epi32(void * d, __m128i a);
> >     void _mm512_storeu_epi64(void * d, __m512i a);
> >     void _mm256_storeu_epi64(void * d, __m256i a);
> >     void _mm_storeu_epi64(void * d, __m128i a);
> >
> >     Tested on x86-64.
> >
> >     gcc/
> >
> >             PR target/90980
> >             * config/i386/avx512fintrin.h (_mm512_loadu_epi32): New.
> >             (_mm512_loadu_epi64): Likewise.
> >             (_mm512_storeu_epi32): Likewise.
> >             (_mm512_storeu_epi64): Likewise.
> >             * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New.
> >             (_mm256_storeu_epi32): Likewise.
> >             (_mm_storeu_epi64): Likewise.
> >             (_mm256_storeu_epi64): Likewise.
> >
> >     gcc/testsuite/
> >
> >             PR target/90980
> >             * gcc.target/i386/pr90980-1.c: New test.
> >             * gcc.target/i386/pr90980-2.c: Likewise.
> >             * gcc.target/i386/pr90980-3.c: Likewise.
>
> Looks good, but please put new intrinsics nearby existing intrinsics,
> so we will have e.g.:
>
> _mm512_loadu_epi32
> _mm512_mask_loadu_epi32
> _mm512_maskz_loadu_epi32
>
> and in similar way for other loads and stores.
>
> Uros.
>
> >
> > On Tue, Jul 9, 2019 at 11:39 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > >
> > > On Tue, Jul 9, 2019 at 11:44 PM Sunil Pandey <skpgkp1@gmail.com> wrote:
> > > >
> > > > __m512i _mm512_loadu_epi32( void * sa);
> > > > __m512i _mm512_loadu_epi64( void * sa);
> > > > void _mm512_storeu_epi32(void * d, __m512i a);
> > > > void _mm256_storeu_epi32(void * d, __m256i a);
> > > > void _mm_storeu_epi32(void * d, __m128i a);
> > > > void _mm512_storeu_epi64(void * d, __m512i a);
> > > > void _mm256_storeu_epi64(void * d, __m256i a);
> > > > void _mm_storeu_epi64(void * d, __m128i a);
> > > >
> > > > Tested on x86-64.
> > > >
> > > > OK for trunk?
> > > >
> > > > --Sunil Pandey
> > > >
> > > >
> > > > gcc/
> > > >
> > > >         PR target/90980
> > > >         * config/i386/avx512fintrin.h (__v16si_u): New data type
> > > >         (__v8di_u): Likewise
> > > >         (_mm512_loadu_epi32): New.
> > > >         (_mm512_loadu_epi64): Likewise.
> > > >         (_mm512_storeu_epi32): Likewise.
> > > >         (_mm512_storeu_epi64): Likewise.
> > > >         * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New.
> > > >         (_mm256_storeu_epi32): Likewise.
> > > >         (_mm_storeu_epi64): Likewise.
> > > >         (_mm256_storeu_epi64): Likewise.
> > > >
> > > > gcc/testsuite/
> > > >
> > > >         PR target/90980
> > > >         * gcc.target/i386/avx512f-vmovdqu32-3.c: New test.
> > > >         * gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise.
> > > >         * gcc.target/i386/pr90980-1.c: Likewise.
> > > >         * gcc.target/i386/pr90980-2.c: Likewise.
> > >
> > > +/* Internal data types for implementing unaligned version of intrinsics.  */
> > > +typedef int __v16si_u __attribute__ ((__vector_size__ (64),
> > > +      __aligned__ (1)));
> > > +typedef long long __v8di_u __attribute__ ((__vector_size__ (64),
> > > +   __aligned__ (1)));
> > >
> > > You should define only one generic __m512i_u type, something like:
> > >
> > > typedef long long __m512i_u __attribute__ ((__vector_size__ (64),
> > > __may_alias__, __aligned__ (1)));
> > >
> > > Please see avxintrin.h how __m256i_u is defined and used.
> > >
> > > Uros.

[-- Attachment #2: 0001-i386-Add-AVX512-unaligned-intrinsics.patch --]
[-- Type: application/octet-stream, Size: 7836 bytes --]

From b5d08e427ed1178c9159a3b20b921a7663206bdd Mon Sep 17 00:00:00 2001
From: Sunil K Pandey <skpgkp1@gmail.com>
Date: Mon, 8 Jul 2019 12:36:02 -0700
Subject: [PATCH] i386: Add AVX512 unaligned intrinsics

__m512i _mm512_loadu_epi64( void * sa);
void _mm512_storeu_epi64(void * d, __m512i a);
__m512i _mm512_loadu_epi32( void * sa);
void _mm512_storeu_epi32(void * d, __m512i a);
void _mm256_storeu_epi64(void * d, __m256i a);
void _mm_storeu_epi64(void * d, __m128i a);
void _mm256_storeu_epi32(void * d, __m256i a);
void _mm_storeu_epi32(void * d, __m128i a);

Tested on x86-64.

gcc/

        PR target/90980
        * config/i386/avx512fintrin.h (_mm512_loadu_epi64): New.
        (_mm512_storeu_epi64): Likewise.
        (_mm512_loadu_epi32): Likewise.
        (_mm512_storeu_epi32): Likewise.
        * config/i386/avx512vlintrin.h (_mm256_storeu_epi64): New.
        (_mm_storeu_epi64): Likewise.
        (_mm256_storeu_epi32): Likewise.
        (_mm_storeu_epi32): Likewise.

gcc/testsuite/

        PR target/90980
        * gcc.target/i386/pr90980-1.c: New test.
        * gcc.target/i386/pr90980-2.c: Likewise.
        * gcc.target/i386/pr90980-3.c: Likewise.
---
 gcc/config/i386/avx512fintrin.h           | 28 +++++++++++++++++++++++
 gcc/config/i386/avx512vlintrin.h          | 28 +++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr90980-1.c | 17 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr90980-2.c | 17 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr90980-3.c | 20 ++++++++++++++++
 5 files changed, 110 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-3.c

diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index e35eedb9268..454fd3d247d 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -6350,6 +6350,13 @@ _mm_mask_store_sd (double *__P, __mmask8 __U, __m128d __A)
   __builtin_ia32_storesd_mask (__P, (__v2df) __A, (__mmask8) __U);
 }
 
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_epi64 (void const *__P)
+{
+  return *(__m512i_u *) __P;
+}
+
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
@@ -6369,6 +6376,13 @@ _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
 						     (__mmask8) __U);
 }
 
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_epi64 (void *__P, __m512i __A)
+{
+  *(__m512i_u *) __P = (__m512i_u) __A;
+}
+
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
@@ -6384,6 +6398,13 @@ _mm512_loadu_si512 (void const *__P)
   return *(__m512i_u *)__P;
 }
 
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_epi32 (void const *__P)
+{
+  return *(__m512i_u *) __P;
+}
+
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
@@ -6410,6 +6431,13 @@ _mm512_storeu_si512 (void *__P, __m512i __A)
   *(__m512i_u *)__P = __A;
 }
 
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_epi32 (void *__P, __m512i __A)
+{
+  *(__m512i_u *) __P = (__m512i_u) __A;
+}
+
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index 3eaf817f898..bd8746ddc94 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -755,6 +755,13 @@ _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
 						     (__mmask8) __U);
 }
 
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_epi64 (void *__P, __m256i __A)
+{
+  *(__m256i_u *) __P = (__m256i_u) __A;
+}
+
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
@@ -764,6 +771,13 @@ _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
 				     (__mmask8) __U);
 }
 
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_epi64 (void *__P, __m128i __A)
+{
+  *(__m128i_u *) __P = (__m128i_u) __A;
+}
+
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
@@ -811,6 +825,13 @@ _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
 						     (__mmask8) __U);
 }
 
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_epi32 (void *__P, __m256i __A)
+{
+  *(__m256i_u *) __P = (__m256i_u) __A;
+}
+
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
@@ -820,6 +841,13 @@ _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
 				     (__mmask8) __U);
 }
 
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_epi32 (void *__P, __m128i __A)
+{
+  *(__m128i_u *) __P = (__m128i_u) __A;
+}
+
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
diff --git a/gcc/testsuite/gcc.target/i386/pr90980-1.c b/gcc/testsuite/gcc.target/i386/pr90980-1.c
new file mode 100644
index 00000000000..72a30dc8da2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90980-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "(?:vmovups|vmovdqu)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */
+
+#include <immintrin.h>
+
+int *a;
+long long *b;
+volatile __m128i xx;
+volatile __m128i xx1;
+
+void extern
+avx512vl_test (void)
+{
+  _mm_storeu_epi32 (a, xx);
+  _mm_storeu_epi64 (b, xx1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90980-2.c b/gcc/testsuite/gcc.target/i386/pr90980-2.c
new file mode 100644
index 00000000000..b1980e65140
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90980-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu\[0-9\]*\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */
+
+#include <immintrin.h>
+
+int *a;
+long long *b;
+volatile __m256i yy;
+volatile __m256i yy1;
+
+void extern
+avx512vl_test (void)
+{
+  _mm256_storeu_epi32 (a, yy);
+  _mm256_storeu_epi64 (b, yy1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90980-3.c b/gcc/testsuite/gcc.target/i386/pr90980-3.c
new file mode 100644
index 00000000000..d839ee011aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90980-3.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */
+
+#include <immintrin.h>
+
+int *a;
+long long *b;
+volatile __m512i zz;
+volatile __m512i zz1;
+
+void extern
+avx512f_test (void)
+{
+  zz = _mm512_loadu_epi32 (a);
+  _mm512_storeu_epi32 (a, zz);
+  zz1 = _mm512_loadu_epi64 (b);
+  _mm512_storeu_epi64 (b, zz1);
+}
-- 
2.20.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] i386: Add AVX512 unaligned intrinsics
  2019-07-11 17:04       ` Sunil Pandey
@ 2019-07-11 18:47         ` Uros Bizjak
  0 siblings, 0 replies; 6+ messages in thread
From: Uros Bizjak @ 2019-07-11 18:47 UTC (permalink / raw)
  To: Sunil Pandey; +Cc: gcc-patches, H. J. Lu

On Thu, Jul 11, 2019 at 6:54 PM Sunil Pandey <skpgkp1@gmail.com> wrote:
>
> Fixed.
>
> --Sunil Pandey
>
> i386: Add AVX512 unaligned intrinsics
>
> __m512i _mm512_loadu_epi64( void * sa);
> void _mm512_storeu_epi64(void * d, __m512i a);
> __m512i _mm512_loadu_epi32( void * sa);
> void _mm512_storeu_epi32(void * d, __m512i a);
> void _mm256_storeu_epi64(void * d, __m256i a);
> void _mm_storeu_epi64(void * d, __m128i a);
> void _mm256_storeu_epi32(void * d, __m256i a);
> void _mm_storeu_epi32(void * d, __m128i a);
>
> Tested on x86-64.
>
> gcc/
>
>         PR target/90980
>         * config/i386/avx512fintrin.h (_mm512_loadu_epi64): New.
>         (_mm512_storeu_epi64): Likewise.
>         (_mm512_loadu_epi32): Likewise.
>         (_mm512_storeu_epi32): Likewise.
>         * config/i386/avx512vlintrin.h (_mm256_storeu_epi64): New.
>         (_mm_storeu_epi64): Likewise.
>         (_mm256_storeu_epi32): Likewise.
>         (_mm_storeu_epi32): Likewise.
>
> gcc/testsuite/
>
>         PR target/90980
>         * gcc.target/i386/pr90980-1.c: New test.
>         * gcc.target/i386/pr90980-2.c: Likewise.
>         * gcc.target/i386/pr90980-3.c: Likewise.

OK.

Thanks,
Uros.

> On Wed, Jul 10, 2019 at 12:20 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Wed, Jul 10, 2019 at 9:11 PM Sunil Pandey <skpgkp1@gmail.com> wrote:
> > >
> > > Thanks Uros. I incorporated suggested changes in attached patch.
> > >
> > > --Sunil Pandey
> > >
> > >     i386: Add AVX512 unaligned intrinsics
> > >
> > >     __m512i _mm512_loadu_epi32( void * sa);
> > >     __m512i _mm512_loadu_epi64( void * sa);
> > >     void _mm512_storeu_epi32(void * d, __m512i a);
> > >     void _mm256_storeu_epi32(void * d, __m256i a);
> > >     void _mm_storeu_epi32(void * d, __m128i a);
> > >     void _mm512_storeu_epi64(void * d, __m512i a);
> > >     void _mm256_storeu_epi64(void * d, __m256i a);
> > >     void _mm_storeu_epi64(void * d, __m128i a);
> > >
> > >     Tested on x86-64.
> > >
> > >     gcc/
> > >
> > >             PR target/90980
> > >             * config/i386/avx512fintrin.h (_mm512_loadu_epi32): New.
> > >             (_mm512_loadu_epi64): Likewise.
> > >             (_mm512_storeu_epi32): Likewise.
> > >             (_mm512_storeu_epi64): Likewise.
> > >             * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New.
> > >             (_mm256_storeu_epi32): Likewise.
> > >             (_mm_storeu_epi64): Likewise.
> > >             (_mm256_storeu_epi64): Likewise.
> > >
> > >     gcc/testsuite/
> > >
> > >             PR target/90980
> > >             * gcc.target/i386/pr90980-1.c: New test.
> > >             * gcc.target/i386/pr90980-2.c: Likewise.
> > >             * gcc.target/i386/pr90980-3.c: Likewise.
> >
> > Looks good, but please put new intrinsics nearby existing intrinsics,
> > so we will have e.g.:
> >
> > _mm512_loadu_epi32
> > _mm512_mask_loadu_epi32
> > _mm512_maskz_loadu_epi32
> >
> > and in similar way for other loads and stores.
> >
> > Uros.
> >
> > >
> > > On Tue, Jul 9, 2019 at 11:39 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > > >
> > > > On Tue, Jul 9, 2019 at 11:44 PM Sunil Pandey <skpgkp1@gmail.com> wrote:
> > > > >
> > > > > __m512i _mm512_loadu_epi32( void * sa);
> > > > > __m512i _mm512_loadu_epi64( void * sa);
> > > > > void _mm512_storeu_epi32(void * d, __m512i a);
> > > > > void _mm256_storeu_epi32(void * d, __m256i a);
> > > > > void _mm_storeu_epi32(void * d, __m128i a);
> > > > > void _mm512_storeu_epi64(void * d, __m512i a);
> > > > > void _mm256_storeu_epi64(void * d, __m256i a);
> > > > > void _mm_storeu_epi64(void * d, __m128i a);
> > > > >
> > > > > Tested on x86-64.
> > > > >
> > > > > OK for trunk?
> > > > >
> > > > > --Sunil Pandey
> > > > >
> > > > >
> > > > > gcc/
> > > > >
> > > > >         PR target/90980
> > > > >         * config/i386/avx512fintrin.h (__v16si_u): New data type
> > > > >         (__v8di_u): Likewise
> > > > >         (_mm512_loadu_epi32): New.
> > > > >         (_mm512_loadu_epi64): Likewise.
> > > > >         (_mm512_storeu_epi32): Likewise.
> > > > >         (_mm512_storeu_epi64): Likewise.
> > > > >         * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New.
> > > > >         (_mm256_storeu_epi32): Likewise.
> > > > >         (_mm_storeu_epi64): Likewise.
> > > > >         (_mm256_storeu_epi64): Likewise.
> > > > >
> > > > > gcc/testsuite/
> > > > >
> > > > >         PR target/90980
> > > > >         * gcc.target/i386/avx512f-vmovdqu32-3.c: New test.
> > > > >         * gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise.
> > > > >         * gcc.target/i386/pr90980-1.c: Likewise.
> > > > >         * gcc.target/i386/pr90980-2.c: Likewise.
> > > >
> > > > +/* Internal data types for implementing unaligned version of intrinsics.  */
> > > > +typedef int __v16si_u __attribute__ ((__vector_size__ (64),
> > > > +      __aligned__ (1)));
> > > > +typedef long long __v8di_u __attribute__ ((__vector_size__ (64),
> > > > +   __aligned__ (1)));
> > > >
> > > > You should define only one generic __m512i_u type, something like:
> > > >
> > > > typedef long long __m512i_u __attribute__ ((__vector_size__ (64),
> > > > __may_alias__, __aligned__ (1)));
> > > >
> > > > Please see avxintrin.h how __m256i_u is defined and used.
> > > >
> > > > Uros.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-07-11 18:43 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-09 22:03 [PATCH] i386: Add AVX512 unaligned intrinsics Sunil Pandey
2019-07-10  9:03 ` Uros Bizjak
2019-07-10 19:20   ` Sunil Pandey
2019-07-10 19:30     ` Uros Bizjak
2019-07-11 17:04       ` Sunil Pandey
2019-07-11 18:47         ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).