* [PATCH] Add _mm512_{,mask_}mullox_epi64 intrinsics (PR target/85530)
@ 2018-04-26 20:15 Jakub Jelinek
2018-05-03 9:28 ` Patch ping (Re: [PATCH] Add _mm512_{,mask_}mullox_epi64 intrinsics (PR target/85530)) Jakub Jelinek
0 siblings, 1 reply; 3+ messages in thread
From: Jakub Jelinek @ 2018-04-26 20:15 UTC (permalink / raw)
To: Kirill Yukhin; +Cc: gcc-patches
Hi!
ICC apparently has these two intrinsics (why it doesn't have a maskz_ one
is unclear to me) which are like _mm512_{,mask_}mullo_epi64, except they are
available in AVX512F rather than just AVX512DQ and if AVX512DQ is not
enabled they expand to 3 vpmuludq instructions + 3 shifts + 2 adds; for
AVX512DQ they are the same as mullo without x.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2018-04-26 Jakub Jelinek <jakub@redhat.com>
PR target/85530
* config/i386/avx512fintrin.h (_mm512_mullox_epi64,
_mm512_mask_mullox_epi64): New intrinsics.
* gcc.target/i386/avx512f-vpmullq-1.c: New test.
* gcc.target/i386/avx512f-vpmullq-2.c: New test.
* gcc.target/i386/avx512dq-vpmullq-3.c: New test.
* gcc.target/i386/avx512dq-vpmullq-4.c: New test.
--- gcc/config/i386/avx512fintrin.h.jj 2018-02-12 19:17:40.087215130 +0100
+++ gcc/config/i386/avx512fintrin.h 2018-04-26 11:51:09.176953712 +0200
@@ -567,6 +567,20 @@ _mm512_mask_mullo_epi32 (__m512i __W, __
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mullox_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) ((__v8du) __A * (__v8du) __B);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
+{
+ return _mm512_mask_mov_epi64 (__W, __M, _mm512_mullox_epi64 (__A, __B));
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
--- gcc/testsuite/gcc.target/i386/avx512f-vpmullq-1.c.jj 2018-04-26 12:01:51.049333280 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-vpmullq-1.c 2018-04-26 12:29:04.957253010 +0200
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mno-avx512dq -O2" } */
+/* { dg-final { scan-assembler-times "vpmullq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 0 } } */
+/* { dg-final { scan-assembler-times "vpmullq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 0 } } */
+/* { dg-final { scan-assembler-times "vpmuludq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6 } } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\{\n\]*\\\$32\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 4 } } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\{\n\]*\\\$32\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpaddq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 3 } } */
+/* { dg-final { scan-assembler-times "vpaddq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i _x1, _y1, _z1;
+
+void extern
+avx512f_test (void)
+{
+ _x1 = _mm512_mullox_epi64 (_y1, _z1);
+ _x1 = _mm512_mask_mullox_epi64 (_x1, 3, _y1, _z1);
+}
--- gcc/testsuite/gcc.target/i386/avx512f-vpmullq-2.c.jj 2018-04-26 12:01:54.545335345 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-vpmullq-2.c 2018-04-26 12:01:09.491308704 +0200
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq" } */
+/* { dg-require-effective-target avx512dq } */
+
+#define AVX512DQ
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void
+CALC (long long *src1, long long *src2, long long *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] * src2[i];
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) src1, src2, dst1, dst2;
+ long long dst_ref[SIZE];
+ int i;
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i + 50;
+ src2.a[i] = i + 100;
+ dst2.a[i] = DEFAULT_VALUE;
+ }
+
+ dst1.x = INTRINSIC (_mullox_epi64) (src1.x, src2.x);
+ dst2.x = INTRINSIC (_mask_mullox_epi64) (dst2.x, mask, src1.x, src2.x);
+ CALC (src1.a, src2.a, dst_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (dst1, dst_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (dst2, dst_ref))
+ abort ();
+}
--- gcc/testsuite/gcc.target/i386/avx512dq-vpmullq-3.c.jj 2018-04-26 11:58:43.604222431 +0200
+++ gcc/testsuite/gcc.target/i386/avx512dq-vpmullq-3.c 2018-04-26 11:59:49.585261449 +0200
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "vpmullq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpmullq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i _x1, _y1, _z1;
+
+void extern
+avx512dq_test (void)
+{
+ _x1 = _mm512_mullox_epi64 (_y1, _z1);
+ _x1 = _mm512_mask_mullox_epi64 (_x1, 3, _y1, _z1);
+}
--- gcc/testsuite/gcc.target/i386/avx512dq-vpmullq-4.c.jj 2018-04-26 11:59:59.526267331 +0200
+++ gcc/testsuite/gcc.target/i386/avx512dq-vpmullq-4.c 2018-04-26 12:01:09.491308704 +0200
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq" } */
+/* { dg-require-effective-target avx512dq } */
+
+#define AVX512DQ
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void
+CALC (long long *src1, long long *src2, long long *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] * src2[i];
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) src1, src2, dst1, dst2;
+ long long dst_ref[SIZE];
+ int i;
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i + 50;
+ src2.a[i] = i + 100;
+ dst2.a[i] = DEFAULT_VALUE;
+ }
+
+ dst1.x = INTRINSIC (_mullox_epi64) (src1.x, src2.x);
+ dst2.x = INTRINSIC (_mask_mullox_epi64) (dst2.x, mask, src1.x, src2.x);
+ CALC (src1.a, src2.a, dst_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (dst1, dst_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (dst2, dst_ref))
+ abort ();
+}
Jakub
^ permalink raw reply [flat|nested] 3+ messages in thread
* Patch ping (Re: [PATCH] Add _mm512_{,mask_}mullox_epi64 intrinsics (PR target/85530))
2018-04-26 20:15 [PATCH] Add _mm512_{,mask_}mullox_epi64 intrinsics (PR target/85530) Jakub Jelinek
@ 2018-05-03 9:28 ` Jakub Jelinek
2018-05-03 18:05 ` Kirill Yukhin
0 siblings, 1 reply; 3+ messages in thread
From: Jakub Jelinek @ 2018-05-03 9:28 UTC (permalink / raw)
To: Kirill Yukhin; +Cc: gcc-patches
On Thu, Apr 26, 2018 at 10:09:48PM +0200, Jakub Jelinek wrote:
> ICC apparently has these two intrinsics (why it doesn't have a maskz_ one
> is unclear to me) which are like _mm512_{,mask_}mullo_epi64, except they are
> available in AVX512F rather than just AVX512DQ and if AVX512DQ is not
> enabled they expand to 3 vpmuludq instructions + 3 shifts + 2 adds; for
> AVX512DQ they are the same as mullo without x.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2018-04-26 Jakub Jelinek <jakub@redhat.com>
>
> PR target/85530
> * config/i386/avx512fintrin.h (_mm512_mullox_epi64,
> _mm512_mask_mullox_epi64): New intrinsics.
>
> * gcc.target/i386/avx512f-vpmullq-1.c: New test.
> * gcc.target/i386/avx512f-vpmullq-2.c: New test.
> * gcc.target/i386/avx512dq-vpmullq-3.c: New test.
> * gcc.target/i386/avx512dq-vpmullq-4.c: New test.
I'd like to ping this patch, ok for trunk?
Jakub
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: Patch ping (Re: [PATCH] Add _mm512_{,mask_}mullox_epi64 intrinsics (PR target/85530))
2018-05-03 9:28 ` Patch ping (Re: [PATCH] Add _mm512_{,mask_}mullox_epi64 intrinsics (PR target/85530)) Jakub Jelinek
@ 2018-05-03 18:05 ` Kirill Yukhin
0 siblings, 0 replies; 3+ messages in thread
From: Kirill Yukhin @ 2018-05-03 18:05 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: gcc-patches
Hi Jakub,
> On 3 May 2018, at 12:28, Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Thu, Apr 26, 2018 at 10:09:48PM +0200, Jakub Jelinek wrote:
>> ICC apparently has these two intrinsics (why it doesn't have a maskz_ one
>> is unclear to me) which are like _mm512_{,mask_}mullo_epi64, except they are
>> available in AVX512F rather than just AVX512DQ and if AVX512DQ is not
>> enabled they expand to 3 vpmuludq instructions + 3 shifts + 2 adds; for
>> AVX512DQ they are the same as mullo without x.
>>
>> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>>
>> 2018-04-26 Jakub Jelinek <jakub@redhat.com>
>>
>> PR target/85530
>> * config/i386/avx512fintrin.h (_mm512_mullox_epi64,
>> _mm512_mask_mullox_epi64): New intrinsics.
>>
>> * gcc.target/i386/avx512f-vpmullq-1.c: New test.
>> * gcc.target/i386/avx512f-vpmullq-2.c: New test.
>> * gcc.target/i386/avx512dq-vpmullq-3.c: New test.
>> * gcc.target/i386/avx512dq-vpmullq-4.c: New test.
>
> I'd like to ping this patch, ok for trunk?
Your patch is ok for trunk.
>
> Jakub
--
Thanks, K
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2018-05-03 18:05 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-26 20:15 [PATCH] Add _mm512_{,mask_}mullox_epi64 intrinsics (PR target/85530) Jakub Jelinek
2018-05-03 9:28 ` Patch ping (Re: [PATCH] Add _mm512_{,mask_}mullox_epi64 intrinsics (PR target/85530)) Jakub Jelinek
2018-05-03 18:05 ` Kirill Yukhin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).