* [PATCH] i386: Add AVX512 unaligned intrinsics @ 2019-07-09 22:03 Sunil Pandey 2019-07-10 9:03 ` Uros Bizjak 0 siblings, 1 reply; 6+ messages in thread From: Sunil Pandey @ 2019-07-09 22:03 UTC (permalink / raw) To: gcc-patches; +Cc: hjl.tools, ubizjak [-- Attachment #1: Type: text/plain, Size: 1130 bytes --] __m512i _mm512_loadu_epi32( void * sa); __m512i _mm512_loadu_epi64( void * sa); void _mm512_storeu_epi32(void * d, __m512i a); void _mm256_storeu_epi32(void * d, __m256i a); void _mm_storeu_epi32(void * d, __m128i a); void _mm512_storeu_epi64(void * d, __m512i a); void _mm256_storeu_epi64(void * d, __m256i a); void _mm_storeu_epi64(void * d, __m128i a); Tested on x86-64. OK for trunk? --Sunil Pandey gcc/ PR target/90980 * config/i386/avx512fintrin.h (__v16si_u): New data type (__v8di_u): Likewise (_mm512_loadu_epi32): New. (_mm512_loadu_epi64): Likewise. (_mm512_storeu_epi32): Likewise. (_mm512_storeu_epi64): Likewise. * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New. (_mm256_storeu_epi32): Likewise. (_mm_storeu_epi64): Likewise. (_mm256_storeu_epi64): Likewise. gcc/testsuite/ PR target/90980 * gcc.target/i386/avx512f-vmovdqu32-3.c: New test. * gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise. * gcc.target/i386/pr90980-1.c: Likewise. * gcc.target/i386/pr90980-2.c: Likewise. [-- Attachment #2: 0001-i386-Add-AVX512-unaligned-intrinsics.patch --] [-- Type: application/octet-stream, Size: 7793 bytes --] From 6b81933857d41501efde56bef16c40fea3201899 Mon Sep 17 00:00:00 2001 From: Sunil K Pandey <skpgkp1@gmail.com> Date: Mon, 8 Jul 2019 12:36:02 -0700 Subject: [PATCH] i386: Add AVX512 unaligned intrinsics __m512i _mm512_loadu_epi32( void * sa); __m512i _mm512_loadu_epi64( void * sa); void _mm512_storeu_epi32(void * d, __m512i a); void _mm256_storeu_epi32(void * d, __m256i a); void _mm_storeu_epi32(void * d, __m128i a); void _mm512_storeu_epi64(void * d, __m512i a); void _mm256_storeu_epi64(void * d, __m256i a); void _mm_storeu_epi64(void * d, __m128i a); Tested on x86-64. gcc/ PR target/90980 * config/i386/avx512fintrin.h (__v16si_u): New data type (__v8di_u): Likewise (_mm512_loadu_epi32): New. (_mm512_loadu_epi64): Likewise. (_mm512_storeu_epi32): Likewise. (_mm512_storeu_epi64): Likewise. * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New. (_mm256_storeu_epi32): Likewise. (_mm_storeu_epi64): Likewise. (_mm256_storeu_epi64): Likewise. gcc/testsuite/ PR target/90980 * gcc.target/i386/avx512f-vmovdqu32-3.c: New test. * gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise. * gcc.target/i386/pr90980-1.c: Likewise. * gcc.target/i386/pr90980-2.c: Likewise. --- gcc/config/i386/avx512fintrin.h | 35 +++++++++++++++++++ gcc/config/i386/avx512vlintrin.h | 28 +++++++++++++++ .../gcc.target/i386/avx512f-vmovdqu32-3.c | 16 +++++++++ .../gcc.target/i386/avx512f-vmovdqu64-3.c | 16 +++++++++ gcc/testsuite/gcc.target/i386/pr90980-1.c | 17 +++++++++ gcc/testsuite/gcc.target/i386/pr90980-2.c | 17 +++++++++ 6 files changed, 129 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-3.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-2.c diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index e35eedb9268..9a4d657c108 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -46,6 +46,12 @@ typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64))); typedef char __v64qi __attribute__ ((__vector_size__ (64))); typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64))); +/* Internal data types for implementing unaligned version of intrinsics. */ +typedef int __v16si_u __attribute__ ((__vector_size__ (64), + __aligned__ (1))); +typedef long long __v8di_u __attribute__ ((__vector_size__ (64), + __aligned__ (1))); + /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); @@ -405,6 +411,35 @@ _mm512_maskz_load_pd (__mmask8 __U, void const *__P) (__mmask8) __U); } +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_loadu_epi32 (void const *__P) +{ + __v16si_u __X = *(__v16si_u *) __P; + return (__m512i) __X; +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_loadu_epi64 (void const *__P) +{ + return *(__v8di_u *) __P; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_storeu_epi32 (void *__P, __m512i __A) +{ + *(__v16si_u *) __P = (__v16si_u) __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_storeu_epi64 (void *__P, __m512i __A) +{ + *(__v8di_u *) __P = (__v8di_u) __A; +} + extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_store_pd (void *__P, __m512d __A) diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index 3eaf817f898..06bcc50b6b4 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -598,6 +598,34 @@ _mm256_store_epi64 (void *__P, __m256i __A) *(__m256i *) __P = __A; } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storeu_epi32 (void *__P, __m128i __A) +{ + *(__m128i_u *) __P = (__m128i_u) __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_storeu_epi32 (void *__P, __m256i __A) +{ + *(__m256i_u *) __P = (__m256i_u) __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storeu_epi64 (void *__P, __m128i __A) +{ + *(__m128i_u *) __P = (__m128i_u) __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_storeu_epi64 (void *__P, __m256i __A) +{ + *(__m256i_u *) __P = (__m256i_u) __A; +} + extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_epi64 (void *__P, __m128i __A) diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-3.c new file mode 100644 index 00000000000..8f07b6e986a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-3.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-march=skylake-avx512 -O2" } */ +/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +int *a,*b; +volatile __m512i zz; + +void extern +avx512f_test (void) +{ + zz = _mm512_loadu_epi32 (a); + _mm512_storeu_epi32 (b, zz); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-3.c new file mode 100644 index 00000000000..c2805116eae --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-3.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-march=skylake-avx512 -O2" } */ +/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +long long *a; +volatile __m512i zz; + +void extern +avx512f_test (void) +{ + zz = _mm512_loadu_epi64 (a); + _mm512_storeu_epi64 (a, zz); +} diff --git a/gcc/testsuite/gcc.target/i386/pr90980-1.c b/gcc/testsuite/gcc.target/i386/pr90980-1.c new file mode 100644 index 00000000000..72a30dc8da2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90980-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=skylake-avx512 -O2" } */ +/* { dg-final { scan-assembler-times "(?:vmovups|vmovdqu)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */ + +#include <immintrin.h> + +int *a; +long long *b; +volatile __m128i xx; +volatile __m128i xx1; + +void extern +avx512vl_test (void) +{ + _mm_storeu_epi32 (a, xx); + _mm_storeu_epi64 (b, xx1); +} diff --git a/gcc/testsuite/gcc.target/i386/pr90980-2.c b/gcc/testsuite/gcc.target/i386/pr90980-2.c new file mode 100644 index 00000000000..b1980e65140 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90980-2.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=skylake-avx512 -O2" } */ +/* { dg-final { scan-assembler-times "vmovdqu\[0-9\]*\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */ + +#include <immintrin.h> + +int *a; +long long *b; +volatile __m256i yy; +volatile __m256i yy1; + +void extern +avx512vl_test (void) +{ + _mm256_storeu_epi32 (a, yy); + _mm256_storeu_epi64 (b, yy1); +} -- 2.20.1 ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] i386: Add AVX512 unaligned intrinsics 2019-07-09 22:03 [PATCH] i386: Add AVX512 unaligned intrinsics Sunil Pandey @ 2019-07-10 9:03 ` Uros Bizjak 2019-07-10 19:20 ` Sunil Pandey 0 siblings, 1 reply; 6+ messages in thread From: Uros Bizjak @ 2019-07-10 9:03 UTC (permalink / raw) To: Sunil Pandey; +Cc: gcc-patches, H. J. Lu On Tue, Jul 9, 2019 at 11:44 PM Sunil Pandey <skpgkp1@gmail.com> wrote: > > __m512i _mm512_loadu_epi32( void * sa); > __m512i _mm512_loadu_epi64( void * sa); > void _mm512_storeu_epi32(void * d, __m512i a); > void _mm256_storeu_epi32(void * d, __m256i a); > void _mm_storeu_epi32(void * d, __m128i a); > void _mm512_storeu_epi64(void * d, __m512i a); > void _mm256_storeu_epi64(void * d, __m256i a); > void _mm_storeu_epi64(void * d, __m128i a); > > Tested on x86-64. > > OK for trunk? > > --Sunil Pandey > > > gcc/ > > PR target/90980 > * config/i386/avx512fintrin.h (__v16si_u): New data type > (__v8di_u): Likewise > (_mm512_loadu_epi32): New. > (_mm512_loadu_epi64): Likewise. > (_mm512_storeu_epi32): Likewise. > (_mm512_storeu_epi64): Likewise. > * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New. > (_mm256_storeu_epi32): Likewise. > (_mm_storeu_epi64): Likewise. > (_mm256_storeu_epi64): Likewise. > > gcc/testsuite/ > > PR target/90980 > * gcc.target/i386/avx512f-vmovdqu32-3.c: New test. > * gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise. > * gcc.target/i386/pr90980-1.c: Likewise. > * gcc.target/i386/pr90980-2.c: Likewise. +/* Internal data types for implementing unaligned version of intrinsics. */ +typedef int __v16si_u __attribute__ ((__vector_size__ (64), + __aligned__ (1))); +typedef long long __v8di_u __attribute__ ((__vector_size__ (64), + __aligned__ (1))); You should define only one generic __m512i_u type, something like: typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1))); Please see avxintrin.h how __m256i_u is defined and used. Uros. ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] i386: Add AVX512 unaligned intrinsics 2019-07-10 9:03 ` Uros Bizjak @ 2019-07-10 19:20 ` Sunil Pandey 2019-07-10 19:30 ` Uros Bizjak 0 siblings, 1 reply; 6+ messages in thread From: Sunil Pandey @ 2019-07-10 19:20 UTC (permalink / raw) To: Uros Bizjak; +Cc: gcc-patches, H. J. Lu [-- Attachment #1: Type: text/plain, Size: 3123 bytes --] Thanks Uros. I incorporated suggested changes in attached patch. --Sunil Pandey i386: Add AVX512 unaligned intrinsics __m512i _mm512_loadu_epi32( void * sa); __m512i _mm512_loadu_epi64( void * sa); void _mm512_storeu_epi32(void * d, __m512i a); void _mm256_storeu_epi32(void * d, __m256i a); void _mm_storeu_epi32(void * d, __m128i a); void _mm512_storeu_epi64(void * d, __m512i a); void _mm256_storeu_epi64(void * d, __m256i a); void _mm_storeu_epi64(void * d, __m128i a); Tested on x86-64. gcc/ PR target/90980 * config/i386/avx512fintrin.h (_mm512_loadu_epi32): New. (_mm512_loadu_epi64): Likewise. (_mm512_storeu_epi32): Likewise. (_mm512_storeu_epi64): Likewise. * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New. (_mm256_storeu_epi32): Likewise. (_mm_storeu_epi64): Likewise. (_mm256_storeu_epi64): Likewise. gcc/testsuite/ PR target/90980 * gcc.target/i386/pr90980-1.c: New test. * gcc.target/i386/pr90980-2.c: Likewise. * gcc.target/i386/pr90980-3.c: Likewise. On Tue, Jul 9, 2019 at 11:39 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Tue, Jul 9, 2019 at 11:44 PM Sunil Pandey <skpgkp1@gmail.com> wrote: > > > > __m512i _mm512_loadu_epi32( void * sa); > > __m512i _mm512_loadu_epi64( void * sa); > > void _mm512_storeu_epi32(void * d, __m512i a); > > void _mm256_storeu_epi32(void * d, __m256i a); > > void _mm_storeu_epi32(void * d, __m128i a); > > void _mm512_storeu_epi64(void * d, __m512i a); > > void _mm256_storeu_epi64(void * d, __m256i a); > > void _mm_storeu_epi64(void * d, __m128i a); > > > > Tested on x86-64. > > > > OK for trunk? > > > > --Sunil Pandey > > > > > > gcc/ > > > > PR target/90980 > > * config/i386/avx512fintrin.h (__v16si_u): New data type > > (__v8di_u): Likewise > > (_mm512_loadu_epi32): New. > > (_mm512_loadu_epi64): Likewise. > > (_mm512_storeu_epi32): Likewise. > > (_mm512_storeu_epi64): Likewise. > > * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New. > > (_mm256_storeu_epi32): Likewise. > > (_mm_storeu_epi64): Likewise. > > (_mm256_storeu_epi64): Likewise. > > > > gcc/testsuite/ > > > > PR target/90980 > > * gcc.target/i386/avx512f-vmovdqu32-3.c: New test. > > * gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise. > > * gcc.target/i386/pr90980-1.c: Likewise. > > * gcc.target/i386/pr90980-2.c: Likewise. > > +/* Internal data types for implementing unaligned version of intrinsics. */ > +typedef int __v16si_u __attribute__ ((__vector_size__ (64), > + __aligned__ (1))); > +typedef long long __v8di_u __attribute__ ((__vector_size__ (64), > + __aligned__ (1))); > > You should define only one generic __m512i_u type, something like: > > typedef long long __m512i_u __attribute__ ((__vector_size__ (64), > __may_alias__, __aligned__ (1))); > > Please see avxintrin.h how __m256i_u is defined and used. > > Uros. [-- Attachment #2: 0001-i386-Add-AVX512-unaligned-intrinsics.patch --] [-- Type: application/octet-stream, Size: 6129 bytes --] From 7a0a4bb7c4d4115acf559094a7317d8b3c0ced06 Mon Sep 17 00:00:00 2001 From: Sunil K Pandey <skpgkp1@gmail.com> Date: Mon, 8 Jul 2019 12:36:02 -0700 Subject: [PATCH] i386: Add AVX512 unaligned intrinsics __m512i _mm512_loadu_epi32( void * sa); __m512i _mm512_loadu_epi64( void * sa); void _mm512_storeu_epi32(void * d, __m512i a); void _mm256_storeu_epi32(void * d, __m256i a); void _mm_storeu_epi32(void * d, __m128i a); void _mm512_storeu_epi64(void * d, __m512i a); void _mm256_storeu_epi64(void * d, __m256i a); void _mm_storeu_epi64(void * d, __m128i a); Tested on x86-64. gcc/ PR target/90980 * config/i386/avx512fintrin.h (_mm512_loadu_epi32): New. (_mm512_loadu_epi64): Likewise. (_mm512_storeu_epi32): Likewise. (_mm512_storeu_epi64): Likewise. * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New. (_mm256_storeu_epi32): Likewise. (_mm_storeu_epi64): Likewise. (_mm256_storeu_epi64): Likewise. gcc/testsuite/ PR target/90980 * gcc.target/i386/pr90980-1.c: New test. * gcc.target/i386/pr90980-2.c: Likewise. * gcc.target/i386/pr90980-3.c: Likewise. --- gcc/config/i386/avx512fintrin.h | 28 +++++++++++++++++++++++ gcc/config/i386/avx512vlintrin.h | 28 +++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr90980-1.c | 17 ++++++++++++++ gcc/testsuite/gcc.target/i386/pr90980-2.c | 17 ++++++++++++++ gcc/testsuite/gcc.target/i386/pr90980-3.c | 20 ++++++++++++++++ 5 files changed, 110 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-3.c diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index e35eedb9268..71a7dc5a00c 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -405,6 +405,34 @@ _mm512_maskz_load_pd (__mmask8 __U, void const *__P) (__mmask8) __U); } +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_loadu_epi32 (void const *__P) +{ + return *(__m512i_u *) __P; +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_loadu_epi64 (void const *__P) +{ + return *(__m512i_u *) __P; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_storeu_epi32 (void *__P, __m512i __A) +{ + *(__m512i_u *) __P = (__m512i_u) __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_storeu_epi64 (void *__P, __m512i __A) +{ + *(__m512i_u *) __P = (__m512i_u) __A; +} + extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_store_pd (void *__P, __m512d __A) diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index 3eaf817f898..06bcc50b6b4 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -598,6 +598,34 @@ _mm256_store_epi64 (void *__P, __m256i __A) *(__m256i *) __P = __A; } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storeu_epi32 (void *__P, __m128i __A) +{ + *(__m128i_u *) __P = (__m128i_u) __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_storeu_epi32 (void *__P, __m256i __A) +{ + *(__m256i_u *) __P = (__m256i_u) __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storeu_epi64 (void *__P, __m128i __A) +{ + *(__m128i_u *) __P = (__m128i_u) __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_storeu_epi64 (void *__P, __m256i __A) +{ + *(__m256i_u *) __P = (__m256i_u) __A; +} + extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_epi64 (void *__P, __m128i __A) diff --git a/gcc/testsuite/gcc.target/i386/pr90980-1.c b/gcc/testsuite/gcc.target/i386/pr90980-1.c new file mode 100644 index 00000000000..72a30dc8da2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90980-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=skylake-avx512 -O2" } */ +/* { dg-final { scan-assembler-times "(?:vmovups|vmovdqu)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */ + +#include <immintrin.h> + +int *a; +long long *b; +volatile __m128i xx; +volatile __m128i xx1; + +void extern +avx512vl_test (void) +{ + _mm_storeu_epi32 (a, xx); + _mm_storeu_epi64 (b, xx1); +} diff --git a/gcc/testsuite/gcc.target/i386/pr90980-2.c b/gcc/testsuite/gcc.target/i386/pr90980-2.c new file mode 100644 index 00000000000..b1980e65140 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90980-2.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=skylake-avx512 -O2" } */ +/* { dg-final { scan-assembler-times "vmovdqu\[0-9\]*\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */ + +#include <immintrin.h> + +int *a; +long long *b; +volatile __m256i yy; +volatile __m256i yy1; + +void extern +avx512vl_test (void) +{ + _mm256_storeu_epi32 (a, yy); + _mm256_storeu_epi64 (b, yy1); +} diff --git a/gcc/testsuite/gcc.target/i386/pr90980-3.c b/gcc/testsuite/gcc.target/i386/pr90980-3.c new file mode 100644 index 00000000000..d839ee011aa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90980-3.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=skylake-avx512 -O2" } */ +/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */ + +#include <immintrin.h> + +int *a; +long long *b; +volatile __m512i zz; +volatile __m512i zz1; + +void extern +avx512f_test (void) +{ + zz = _mm512_loadu_epi32 (a); + _mm512_storeu_epi32 (a, zz); + zz1 = _mm512_loadu_epi64 (b); + _mm512_storeu_epi64 (b, zz1); +} -- 2.20.1 ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] i386: Add AVX512 unaligned intrinsics 2019-07-10 19:20 ` Sunil Pandey @ 2019-07-10 19:30 ` Uros Bizjak 2019-07-11 17:04 ` Sunil Pandey 0 siblings, 1 reply; 6+ messages in thread From: Uros Bizjak @ 2019-07-10 19:30 UTC (permalink / raw) To: Sunil Pandey; +Cc: gcc-patches, H. J. Lu On Wed, Jul 10, 2019 at 9:11 PM Sunil Pandey <skpgkp1@gmail.com> wrote: > > Thanks Uros. I incorporated suggested changes in attached patch. > > --Sunil Pandey > > i386: Add AVX512 unaligned intrinsics > > __m512i _mm512_loadu_epi32( void * sa); > __m512i _mm512_loadu_epi64( void * sa); > void _mm512_storeu_epi32(void * d, __m512i a); > void _mm256_storeu_epi32(void * d, __m256i a); > void _mm_storeu_epi32(void * d, __m128i a); > void _mm512_storeu_epi64(void * d, __m512i a); > void _mm256_storeu_epi64(void * d, __m256i a); > void _mm_storeu_epi64(void * d, __m128i a); > > Tested on x86-64. > > gcc/ > > PR target/90980 > * config/i386/avx512fintrin.h (_mm512_loadu_epi32): New. > (_mm512_loadu_epi64): Likewise. > (_mm512_storeu_epi32): Likewise. > (_mm512_storeu_epi64): Likewise. > * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New. > (_mm256_storeu_epi32): Likewise. > (_mm_storeu_epi64): Likewise. > (_mm256_storeu_epi64): Likewise. > > gcc/testsuite/ > > PR target/90980 > * gcc.target/i386/pr90980-1.c: New test. > * gcc.target/i386/pr90980-2.c: Likewise. > * gcc.target/i386/pr90980-3.c: Likewise. Looks good, but please put new intrinsics nearby existing intrinsics, so we will have e.g.: _mm512_loadu_epi32 _mm512_mask_loadu_epi32 _mm512_maskz_loadu_epi32 and in similar way for other loads and stores. Uros. > > On Tue, Jul 9, 2019 at 11:39 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > On Tue, Jul 9, 2019 at 11:44 PM Sunil Pandey <skpgkp1@gmail.com> wrote: > > > > > > __m512i _mm512_loadu_epi32( void * sa); > > > __m512i _mm512_loadu_epi64( void * sa); > > > void _mm512_storeu_epi32(void * d, __m512i a); > > > void _mm256_storeu_epi32(void * d, __m256i a); > > > void _mm_storeu_epi32(void * d, __m128i a); > > > void _mm512_storeu_epi64(void * d, __m512i a); > > > void _mm256_storeu_epi64(void * d, __m256i a); > > > void _mm_storeu_epi64(void * d, __m128i a); > > > > > > Tested on x86-64. > > > > > > OK for trunk? > > > > > > --Sunil Pandey > > > > > > > > > gcc/ > > > > > > PR target/90980 > > > * config/i386/avx512fintrin.h (__v16si_u): New data type > > > (__v8di_u): Likewise > > > (_mm512_loadu_epi32): New. > > > (_mm512_loadu_epi64): Likewise. > > > (_mm512_storeu_epi32): Likewise. > > > (_mm512_storeu_epi64): Likewise. > > > * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New. > > > (_mm256_storeu_epi32): Likewise. > > > (_mm_storeu_epi64): Likewise. > > > (_mm256_storeu_epi64): Likewise. > > > > > > gcc/testsuite/ > > > > > > PR target/90980 > > > * gcc.target/i386/avx512f-vmovdqu32-3.c: New test. > > > * gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise. > > > * gcc.target/i386/pr90980-1.c: Likewise. > > > * gcc.target/i386/pr90980-2.c: Likewise. > > > > +/* Internal data types for implementing unaligned version of intrinsics. */ > > +typedef int __v16si_u __attribute__ ((__vector_size__ (64), > > + __aligned__ (1))); > > +typedef long long __v8di_u __attribute__ ((__vector_size__ (64), > > + __aligned__ (1))); > > > > You should define only one generic __m512i_u type, something like: > > > > typedef long long __m512i_u __attribute__ ((__vector_size__ (64), > > __may_alias__, __aligned__ (1))); > > > > Please see avxintrin.h how __m256i_u is defined and used. > > > > Uros. ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] i386: Add AVX512 unaligned intrinsics 2019-07-10 19:30 ` Uros Bizjak @ 2019-07-11 17:04 ` Sunil Pandey 2019-07-11 18:47 ` Uros Bizjak 0 siblings, 1 reply; 6+ messages in thread From: Sunil Pandey @ 2019-07-11 17:04 UTC (permalink / raw) To: Uros Bizjak; +Cc: gcc-patches, H. J. Lu [-- Attachment #1: Type: text/plain, Size: 4900 bytes --] Fixed. --Sunil Pandey i386: Add AVX512 unaligned intrinsics __m512i _mm512_loadu_epi64( void * sa); void _mm512_storeu_epi64(void * d, __m512i a); __m512i _mm512_loadu_epi32( void * sa); void _mm512_storeu_epi32(void * d, __m512i a); void _mm256_storeu_epi64(void * d, __m256i a); void _mm_storeu_epi64(void * d, __m128i a); void _mm256_storeu_epi32(void * d, __m256i a); void _mm_storeu_epi32(void * d, __m128i a); Tested on x86-64. gcc/ PR target/90980 * config/i386/avx512fintrin.h (_mm512_loadu_epi64): New. (_mm512_storeu_epi64): Likewise. (_mm512_loadu_epi32): Likewise. (_mm512_storeu_epi32): Likewise. * config/i386/avx512vlintrin.h (_mm256_storeu_epi64): New. (_mm_storeu_epi64): Likewise. (_mm256_storeu_epi32): Likewise. (_mm_storeu_epi32): Likewise. gcc/testsuite/ PR target/90980 * gcc.target/i386/pr90980-1.c: New test. * gcc.target/i386/pr90980-2.c: Likewise. * gcc.target/i386/pr90980-3.c: Likewise. On Wed, Jul 10, 2019 at 12:20 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Wed, Jul 10, 2019 at 9:11 PM Sunil Pandey <skpgkp1@gmail.com> wrote: > > > > Thanks Uros. I incorporated suggested changes in attached patch. > > > > --Sunil Pandey > > > > i386: Add AVX512 unaligned intrinsics > > > > __m512i _mm512_loadu_epi32( void * sa); > > __m512i _mm512_loadu_epi64( void * sa); > > void _mm512_storeu_epi32(void * d, __m512i a); > > void _mm256_storeu_epi32(void * d, __m256i a); > > void _mm_storeu_epi32(void * d, __m128i a); > > void _mm512_storeu_epi64(void * d, __m512i a); > > void _mm256_storeu_epi64(void * d, __m256i a); > > void _mm_storeu_epi64(void * d, __m128i a); > > > > Tested on x86-64. > > > > gcc/ > > > > PR target/90980 > > * config/i386/avx512fintrin.h (_mm512_loadu_epi32): New. > > (_mm512_loadu_epi64): Likewise. > > (_mm512_storeu_epi32): Likewise. > > (_mm512_storeu_epi64): Likewise. > > * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New. > > (_mm256_storeu_epi32): Likewise. > > (_mm_storeu_epi64): Likewise. > > (_mm256_storeu_epi64): Likewise. > > > > gcc/testsuite/ > > > > PR target/90980 > > * gcc.target/i386/pr90980-1.c: New test. > > * gcc.target/i386/pr90980-2.c: Likewise. > > * gcc.target/i386/pr90980-3.c: Likewise. > > Looks good, but please put new intrinsics nearby existing intrinsics, > so we will have e.g.: > > _mm512_loadu_epi32 > _mm512_mask_loadu_epi32 > _mm512_maskz_loadu_epi32 > > and in similar way for other loads and stores. > > Uros. > > > > > On Tue, Jul 9, 2019 at 11:39 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > > > On Tue, Jul 9, 2019 at 11:44 PM Sunil Pandey <skpgkp1@gmail.com> wrote: > > > > > > > > __m512i _mm512_loadu_epi32( void * sa); > > > > __m512i _mm512_loadu_epi64( void * sa); > > > > void _mm512_storeu_epi32(void * d, __m512i a); > > > > void _mm256_storeu_epi32(void * d, __m256i a); > > > > void _mm_storeu_epi32(void * d, __m128i a); > > > > void _mm512_storeu_epi64(void * d, __m512i a); > > > > void _mm256_storeu_epi64(void * d, __m256i a); > > > > void _mm_storeu_epi64(void * d, __m128i a); > > > > > > > > Tested on x86-64. > > > > > > > > OK for trunk? > > > > > > > > --Sunil Pandey > > > > > > > > > > > > gcc/ > > > > > > > > PR target/90980 > > > > * config/i386/avx512fintrin.h (__v16si_u): New data type > > > > (__v8di_u): Likewise > > > > (_mm512_loadu_epi32): New. > > > > (_mm512_loadu_epi64): Likewise. > > > > (_mm512_storeu_epi32): Likewise. > > > > (_mm512_storeu_epi64): Likewise. > > > > * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New. > > > > (_mm256_storeu_epi32): Likewise. > > > > (_mm_storeu_epi64): Likewise. > > > > (_mm256_storeu_epi64): Likewise. > > > > > > > > gcc/testsuite/ > > > > > > > > PR target/90980 > > > > * gcc.target/i386/avx512f-vmovdqu32-3.c: New test. > > > > * gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise. > > > > * gcc.target/i386/pr90980-1.c: Likewise. > > > > * gcc.target/i386/pr90980-2.c: Likewise. > > > > > > +/* Internal data types for implementing unaligned version of intrinsics. */ > > > +typedef int __v16si_u __attribute__ ((__vector_size__ (64), > > > + __aligned__ (1))); > > > +typedef long long __v8di_u __attribute__ ((__vector_size__ (64), > > > + __aligned__ (1))); > > > > > > You should define only one generic __m512i_u type, something like: > > > > > > typedef long long __m512i_u __attribute__ ((__vector_size__ (64), > > > __may_alias__, __aligned__ (1))); > > > > > > Please see avxintrin.h how __m256i_u is defined and used. > > > > > > Uros. [-- Attachment #2: 0001-i386-Add-AVX512-unaligned-intrinsics.patch --] [-- Type: application/octet-stream, Size: 7836 bytes --] From b5d08e427ed1178c9159a3b20b921a7663206bdd Mon Sep 17 00:00:00 2001 From: Sunil K Pandey <skpgkp1@gmail.com> Date: Mon, 8 Jul 2019 12:36:02 -0700 Subject: [PATCH] i386: Add AVX512 unaligned intrinsics __m512i _mm512_loadu_epi64( void * sa); void _mm512_storeu_epi64(void * d, __m512i a); __m512i _mm512_loadu_epi32( void * sa); void _mm512_storeu_epi32(void * d, __m512i a); void _mm256_storeu_epi64(void * d, __m256i a); void _mm_storeu_epi64(void * d, __m128i a); void _mm256_storeu_epi32(void * d, __m256i a); void _mm_storeu_epi32(void * d, __m128i a); Tested on x86-64. gcc/ PR target/90980 * config/i386/avx512fintrin.h (_mm512_loadu_epi64): New. (_mm512_storeu_epi64): Likewise. (_mm512_loadu_epi32): Likewise. (_mm512_storeu_epi32): Likewise. * config/i386/avx512vlintrin.h (_mm256_storeu_epi64): New. (_mm_storeu_epi64): Likewise. (_mm256_storeu_epi32): Likewise. (_mm_storeu_epi32): Likewise. gcc/testsuite/ PR target/90980 * gcc.target/i386/pr90980-1.c: New test. * gcc.target/i386/pr90980-2.c: Likewise. * gcc.target/i386/pr90980-3.c: Likewise. --- gcc/config/i386/avx512fintrin.h | 28 +++++++++++++++++++++++ gcc/config/i386/avx512vlintrin.h | 28 +++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr90980-1.c | 17 ++++++++++++++ gcc/testsuite/gcc.target/i386/pr90980-2.c | 17 ++++++++++++++ gcc/testsuite/gcc.target/i386/pr90980-3.c | 20 ++++++++++++++++ 5 files changed, 110 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-3.c diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index e35eedb9268..454fd3d247d 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -6350,6 +6350,13 @@ _mm_mask_store_sd (double *__P, __mmask8 __U, __m128d __A) __builtin_ia32_storesd_mask (__P, (__v2df) __A, (__mmask8) __U); } +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_loadu_epi64 (void const *__P) +{ + return *(__m512i_u *) __P; +} + extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) @@ -6369,6 +6376,13 @@ _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P) (__mmask8) __U); } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_storeu_epi64 (void *__P, __m512i __A) +{ + *(__m512i_u *) __P = (__m512i_u) __A; +} + extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A) @@ -6384,6 +6398,13 @@ _mm512_loadu_si512 (void const *__P) return *(__m512i_u *)__P; } +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_loadu_epi32 (void const *__P) +{ + return *(__m512i_u *) __P; +} + extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) @@ -6410,6 +6431,13 @@ _mm512_storeu_si512 (void *__P, __m512i __A) *(__m512i_u *)__P = __A; } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_storeu_epi32 (void *__P, __m512i __A) +{ + *(__m512i_u *) __P = (__m512i_u) __A; +} + extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A) diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index 3eaf817f898..bd8746ddc94 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -755,6 +755,13 @@ _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) (__mmask8) __U); } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_storeu_epi64 (void *__P, __m256i __A) +{ + *(__m256i_u *) __P = (__m256i_u) __A; +} + extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) @@ -764,6 +771,13 @@ _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) (__mmask8) __U); } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storeu_epi64 (void *__P, __m128i __A) +{ + *(__m128i_u *) __P = (__m128i_u) __A; +} + extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) @@ -811,6 +825,13 @@ _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) (__mmask8) __U); } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_storeu_epi32 (void *__P, __m256i __A) +{ + *(__m256i_u *) __P = (__m256i_u) __A; +} + extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) @@ -820,6 +841,13 @@ _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) (__mmask8) __U); } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storeu_epi32 (void *__P, __m128i __A) +{ + *(__m128i_u *) __P = (__m128i_u) __A; +} + extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) diff --git a/gcc/testsuite/gcc.target/i386/pr90980-1.c b/gcc/testsuite/gcc.target/i386/pr90980-1.c new file mode 100644 index 00000000000..72a30dc8da2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90980-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=skylake-avx512 -O2" } */ +/* { dg-final { scan-assembler-times "(?:vmovups|vmovdqu)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */ + +#include <immintrin.h> + +int *a; +long long *b; +volatile __m128i xx; +volatile __m128i xx1; + +void extern +avx512vl_test (void) +{ + _mm_storeu_epi32 (a, xx); + _mm_storeu_epi64 (b, xx1); +} diff --git a/gcc/testsuite/gcc.target/i386/pr90980-2.c b/gcc/testsuite/gcc.target/i386/pr90980-2.c new file mode 100644 index 00000000000..b1980e65140 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90980-2.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=skylake-avx512 -O2" } */ +/* { dg-final { scan-assembler-times "vmovdqu\[0-9\]*\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */ + +#include <immintrin.h> + +int *a; +long long *b; +volatile __m256i yy; +volatile __m256i yy1; + +void extern +avx512vl_test (void) +{ + _mm256_storeu_epi32 (a, yy); + _mm256_storeu_epi64 (b, yy1); +} diff --git a/gcc/testsuite/gcc.target/i386/pr90980-3.c b/gcc/testsuite/gcc.target/i386/pr90980-3.c new file mode 100644 index 00000000000..d839ee011aa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90980-3.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=skylake-avx512 -O2" } */ +/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */ + +#include <immintrin.h> + +int *a; +long long *b; +volatile __m512i zz; +volatile __m512i zz1; + +void extern +avx512f_test (void) +{ + zz = _mm512_loadu_epi32 (a); + _mm512_storeu_epi32 (a, zz); + zz1 = _mm512_loadu_epi64 (b); + _mm512_storeu_epi64 (b, zz1); +} -- 2.20.1 ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] i386: Add AVX512 unaligned intrinsics 2019-07-11 17:04 ` Sunil Pandey @ 2019-07-11 18:47 ` Uros Bizjak 0 siblings, 0 replies; 6+ messages in thread From: Uros Bizjak @ 2019-07-11 18:47 UTC (permalink / raw) To: Sunil Pandey; +Cc: gcc-patches, H. J. Lu On Thu, Jul 11, 2019 at 6:54 PM Sunil Pandey <skpgkp1@gmail.com> wrote: > > Fixed. > > --Sunil Pandey > > i386: Add AVX512 unaligned intrinsics > > __m512i _mm512_loadu_epi64( void * sa); > void _mm512_storeu_epi64(void * d, __m512i a); > __m512i _mm512_loadu_epi32( void * sa); > void _mm512_storeu_epi32(void * d, __m512i a); > void _mm256_storeu_epi64(void * d, __m256i a); > void _mm_storeu_epi64(void * d, __m128i a); > void _mm256_storeu_epi32(void * d, __m256i a); > void _mm_storeu_epi32(void * d, __m128i a); > > Tested on x86-64. > > gcc/ > > PR target/90980 > * config/i386/avx512fintrin.h (_mm512_loadu_epi64): New. > (_mm512_storeu_epi64): Likewise. > (_mm512_loadu_epi32): Likewise. > (_mm512_storeu_epi32): Likewise. > * config/i386/avx512vlintrin.h (_mm256_storeu_epi64): New. > (_mm_storeu_epi64): Likewise. > (_mm256_storeu_epi32): Likewise. > (_mm_storeu_epi32): Likewise. > > gcc/testsuite/ > > PR target/90980 > * gcc.target/i386/pr90980-1.c: New test. > * gcc.target/i386/pr90980-2.c: Likewise. > * gcc.target/i386/pr90980-3.c: Likewise. OK. Thanks, Uros. > On Wed, Jul 10, 2019 at 12:20 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > On Wed, Jul 10, 2019 at 9:11 PM Sunil Pandey <skpgkp1@gmail.com> wrote: > > > > > > Thanks Uros. I incorporated suggested changes in attached patch. > > > > > > --Sunil Pandey > > > > > > i386: Add AVX512 unaligned intrinsics > > > > > > __m512i _mm512_loadu_epi32( void * sa); > > > __m512i _mm512_loadu_epi64( void * sa); > > > void _mm512_storeu_epi32(void * d, __m512i a); > > > void _mm256_storeu_epi32(void * d, __m256i a); > > > void _mm_storeu_epi32(void * d, __m128i a); > > > void _mm512_storeu_epi64(void * d, __m512i a); > > > void _mm256_storeu_epi64(void * d, __m256i a); > > > void _mm_storeu_epi64(void * d, __m128i a); > > > > > > Tested on x86-64. > > > > > > gcc/ > > > > > > PR target/90980 > > > * config/i386/avx512fintrin.h (_mm512_loadu_epi32): New. > > > (_mm512_loadu_epi64): Likewise. > > > (_mm512_storeu_epi32): Likewise. > > > (_mm512_storeu_epi64): Likewise. > > > * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New. > > > (_mm256_storeu_epi32): Likewise. > > > (_mm_storeu_epi64): Likewise. > > > (_mm256_storeu_epi64): Likewise. > > > > > > gcc/testsuite/ > > > > > > PR target/90980 > > > * gcc.target/i386/pr90980-1.c: New test. > > > * gcc.target/i386/pr90980-2.c: Likewise. > > > * gcc.target/i386/pr90980-3.c: Likewise. > > > > Looks good, but please put new intrinsics nearby existing intrinsics, > > so we will have e.g.: > > > > _mm512_loadu_epi32 > > _mm512_mask_loadu_epi32 > > _mm512_maskz_loadu_epi32 > > > > and in similar way for other loads and stores. > > > > Uros. > > > > > > > > On Tue, Jul 9, 2019 at 11:39 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > > > > > On Tue, Jul 9, 2019 at 11:44 PM Sunil Pandey <skpgkp1@gmail.com> wrote: > > > > > > > > > > __m512i _mm512_loadu_epi32( void * sa); > > > > > __m512i _mm512_loadu_epi64( void * sa); > > > > > void _mm512_storeu_epi32(void * d, __m512i a); > > > > > void _mm256_storeu_epi32(void * d, __m256i a); > > > > > void _mm_storeu_epi32(void * d, __m128i a); > > > > > void _mm512_storeu_epi64(void * d, __m512i a); > > > > > void _mm256_storeu_epi64(void * d, __m256i a); > > > > > void _mm_storeu_epi64(void * d, __m128i a); > > > > > > > > > > Tested on x86-64. > > > > > > > > > > OK for trunk? > > > > > > > > > > --Sunil Pandey > > > > > > > > > > > > > > > gcc/ > > > > > > > > > > PR target/90980 > > > > > * config/i386/avx512fintrin.h (__v16si_u): New data type > > > > > (__v8di_u): Likewise > > > > > (_mm512_loadu_epi32): New. > > > > > (_mm512_loadu_epi64): Likewise. > > > > > (_mm512_storeu_epi32): Likewise. > > > > > (_mm512_storeu_epi64): Likewise. > > > > > * config/i386/avx512vlintrin.h (_mm_storeu_epi32): New. > > > > > (_mm256_storeu_epi32): Likewise. > > > > > (_mm_storeu_epi64): Likewise. > > > > > (_mm256_storeu_epi64): Likewise. > > > > > > > > > > gcc/testsuite/ > > > > > > > > > > PR target/90980 > > > > > * gcc.target/i386/avx512f-vmovdqu32-3.c: New test. > > > > > * gcc.target/i386/avx512f-vmovdqu64-3.c: Likewise. > > > > > * gcc.target/i386/pr90980-1.c: Likewise. > > > > > * gcc.target/i386/pr90980-2.c: Likewise. > > > > > > > > +/* Internal data types for implementing unaligned version of intrinsics. */ > > > > +typedef int __v16si_u __attribute__ ((__vector_size__ (64), > > > > + __aligned__ (1))); > > > > +typedef long long __v8di_u __attribute__ ((__vector_size__ (64), > > > > + __aligned__ (1))); > > > > > > > > You should define only one generic __m512i_u type, something like: > > > > > > > > typedef long long __m512i_u __attribute__ ((__vector_size__ (64), > > > > __may_alias__, __aligned__ (1))); > > > > > > > > Please see avxintrin.h how __m256i_u is defined and used. > > > > > > > > Uros. ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2019-07-11 18:43 UTC | newest] Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2019-07-09 22:03 [PATCH] i386: Add AVX512 unaligned intrinsics Sunil Pandey 2019-07-10 9:03 ` Uros Bizjak 2019-07-10 19:20 ` Sunil Pandey 2019-07-10 19:30 ` Uros Bizjak 2019-07-11 17:04 ` Sunil Pandey 2019-07-11 18:47 ` Uros Bizjak
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).