From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2078) id 6372F3858C27; Fri, 22 Sep 2023 03:19:17 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 6372F3858C27 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1695352757; bh=VE2a6Gch+izTR7VIHh4vcFSMSC3tuarIuaLW9n1gdp8=; h=From:To:Subject:Date:From; b=cFrUxC7IeRT5KoWKObZrGmv/dF7FBezBjkGAVrE+zRgibExULjywPQs5Fb3AGpjEC yYEBF+iXUnm0sTB+8XrcQyT8A/sLIPAG/iq+bpXQK+BGWUi/AD02v7t4rHtD3zn9Qe nFCIxIWOOgW8SXMBdX/xl6X1W74c/FZzGsO59iTc= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: hongtao Liu To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/vendors/ix86/heads/evex512)] Push evex512 target for 512 bit intrins X-Act-Checkin: gcc X-Git-Author: Haochen Jiang X-Git-Refname: refs/vendors/ix86/heads/evex512 X-Git-Oldrev: 4d8f284b6dae2c2d8225444f4de842b1d8730021 X-Git-Newrev: 23bccd14d1b18a00a35e3f408377d42982a5c467 Message-Id: <20230922031917.6372F3858C27@sourceware.org> Date: Fri, 22 Sep 2023 03:19:17 +0000 (GMT) List-Id: https://gcc.gnu.org/g:23bccd14d1b18a00a35e3f408377d42982a5c467 commit 23bccd14d1b18a00a35e3f408377d42982a5c467 Author: Haochen Jiang Date: Mon Aug 28 11:13:44 2023 +0800 Push evex512 target for 512 bit intrins gcc/ChangeLog: * config/i386/avx512dqintrin.h: Add evex512 target for 512 bit intrins. Diff: --- gcc/config/i386/avx512dqintrin.h | 1840 +++++++++++++++++++------------------- 1 file changed, 926 insertions(+), 914 deletions(-) diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h index 93900a0b5c7..b6a1d499e25 100644 --- a/gcc/config/i386/avx512dqintrin.h +++ b/gcc/config/i386/avx512dqintrin.h @@ -184,1275 +184,1426 @@ _kandn_mask8 (__mmask8 __A, __mmask8 __B) return (__mmask8) __builtin_ia32_kandnqi ((__mmask8) __A, (__mmask8) __B); } -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_broadcast_f64x2 (__m128d __A) -{ - return (__m512d) - __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A, - _mm512_undefined_pd (), - (__mmask8) -1); -} - -extern __inline __m512d +#ifdef __OPTIMIZE__ +extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A) +_kshiftli_mask8 (__mmask8 __A, unsigned int __B) { - return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) - __A, - (__v8df) - __O, __M); + return (__mmask8) __builtin_ia32_kshiftliqi ((__mmask8) __A, (__mmask8) __B); } -extern __inline __m512d +extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) +_kshiftri_mask8 (__mmask8 __A, unsigned int __B) { - return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) - __A, - (__v8df) - _mm512_setzero_ps (), - __M); + return (__mmask8) __builtin_ia32_kshiftriqi ((__mmask8) __A, (__mmask8) __B); } -extern __inline __m512i +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_broadcast_i64x2 (__m128i __A) +_mm_reduce_sd (__m128d __A, __m128d __B, int __C) { - return (__m512i) - __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A, - _mm512_undefined_epi32 (), + return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A, + (__v2df) __B, __C, + (__v2df) _mm_setzero_pd (), (__mmask8) -1); } -extern __inline __m512i +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A) +_mm_reduce_round_sd (__m128d __A, __m128d __B, int __C, const int __R) { - return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) - __A, - (__v8di) - __O, __M); + return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A, + (__v2df) __B, __C, + (__v2df) + _mm_setzero_pd (), + (__mmask8) -1, __R); } -extern __inline __m512i +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) +_mm_mask_reduce_sd (__m128d __W, __mmask8 __U, __m128d __A, + __m128d __B, int __C) { - return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) - __A, - (__v8di) - _mm512_setzero_si512 (), - __M); + return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A, + (__v2df) __B, __C, + (__v2df) __W, + (__mmask8) __U); } -extern __inline __m512 +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_broadcast_f32x2 (__m128 __A) +_mm_mask_reduce_round_sd (__m128d __W, __mmask8 __U, __m128d __A, + __m128d __B, int __C, const int __R) { - return (__m512) - __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, - (__v16sf)_mm512_undefined_ps (), - (__mmask16) -1); + return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A, + (__v2df) __B, __C, + (__v2df) __W, + __U, __R); } -extern __inline __m512 +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) +_mm_maskz_reduce_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C) { - return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, - (__v16sf) - __O, __M); + return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A, + (__v2df) __B, __C, + (__v2df) _mm_setzero_pd (), + (__mmask8) __U); } -extern __inline __m512 +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) +_mm_maskz_reduce_round_sd (__mmask8 __U, __m128d __A, __m128d __B, + int __C, const int __R) { - return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, - (__v16sf) - _mm512_setzero_ps (), - __M); + return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A, + (__v2df) __B, __C, + (__v2df) + _mm_setzero_pd (), + __U, __R); } -extern __inline __m512i +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_broadcast_i32x2 (__m128i __A) +_mm_reduce_ss (__m128 __A, __m128 __B, int __C) { - return (__m512i) - __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A, - (__v16si) - _mm512_undefined_epi32 (), - (__mmask16) -1); + return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A, + (__v4sf) __B, __C, + (__v4sf) _mm_setzero_ps (), + (__mmask8) -1); } -extern __inline __m512i +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) +_mm_reduce_round_ss (__m128 __A, __m128 __B, int __C, const int __R) { - return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) - __A, - (__v16si) - __O, __M); + return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A, + (__v4sf) __B, __C, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1, __R); } -extern __inline __m512i +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) +_mm_mask_reduce_ss (__m128 __W, __mmask8 __U, __m128 __A, + __m128 __B, int __C) { - return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) - __A, - (__v16si) - _mm512_setzero_si512 (), - __M); + return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A, + (__v4sf) __B, __C, + (__v4sf) __W, + (__mmask8) __U); } -extern __inline __m512 +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_broadcast_f32x8 (__m256 __A) +_mm_mask_reduce_round_ss (__m128 __W, __mmask8 __U, __m128 __A, + __m128 __B, int __C, const int __R) { - return (__m512) - __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, - _mm512_undefined_ps (), - (__mmask16) -1); + return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A, + (__v4sf) __B, __C, + (__v4sf) __W, + __U, __R); } -extern __inline __m512 +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A) +_mm_maskz_reduce_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C) { - return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, - (__v16sf)__O, - __M); + return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A, + (__v4sf) __B, __C, + (__v4sf) _mm_setzero_ps (), + (__mmask8) __U); } -extern __inline __m512 +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A) +_mm_maskz_reduce_round_ss (__mmask8 __U, __m128 __A, __m128 __B, + int __C, const int __R) { - return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, - (__v16sf) - _mm512_setzero_ps (), - __M); + return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A, + (__v4sf) __B, __C, + (__v4sf) + _mm_setzero_ps (), + __U, __R); } -extern __inline __m512i +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_broadcast_i32x8 (__m256i __A) +_mm_range_sd (__m128d __A, __m128d __B, int __C) { - return (__m512i) - __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A, - (__v16si) - _mm512_undefined_epi32 (), - (__mmask16) -1); + return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, + (__v2df) __B, __C, + (__v2df) + _mm_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m512i +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A) +_mm_mask_range_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, int __C) { - return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) - __A, - (__v16si)__O, - __M); + return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, + (__v2df) __B, __C, + (__v2df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m512i +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A) +_mm_maskz_range_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C) { - return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) - __A, - (__v16si) - _mm512_setzero_si512 (), - __M); + return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, + (__v2df) __B, __C, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m512i +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mullo_epi64 (__m512i __A, __m512i __B) +_mm_range_ss (__m128 __A, __m128 __B, int __C) { - return (__m512i) ((__v8du) __A * (__v8du) __B); + return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, + (__v4sf) __B, __C, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m512i +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, - __m512i __B) +_mm_mask_range_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, int __C) { - return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) __W, - (__mmask8) __U); + return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, + (__v4sf) __B, __C, + (__v4sf) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m512i +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) +_mm_maskz_range_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C) { - return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); + return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, + (__v4sf) __B, __C, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m512d +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_xor_pd (__m512d __A, __m512d __B) +_mm_range_round_sd (__m128d __A, __m128d __B, int __C, const int __R) { - return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) -1); + return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, + (__v2df) __B, __C, + (__v2df) + _mm_setzero_pd (), + (__mmask8) -1, __R); } -extern __inline __m512d +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, - __m512d __B) +_mm_mask_range_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, + int __C, const int __R) { - return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __W, - (__mmask8) __U); + return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, + (__v2df) __B, __C, + (__v2df) __W, + (__mmask8) __U, __R); } -extern __inline __m512d +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) +_mm_maskz_range_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C, + const int __R) { - return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U); + return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, + (__v2df) __B, __C, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, __R); } -extern __inline __m512 +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_xor_ps (__m512 __A, __m512 __B) +_mm_range_round_ss (__m128 __A, __m128 __B, int __C, const int __R) { - return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) -1); + return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, + (__v4sf) __B, __C, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1, __R); } -extern __inline __m512 +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) +_mm_mask_range_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, + int __C, const int __R) { - return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __W, - (__mmask16) __U); + return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, + (__v4sf) __B, __C, + (__v4sf) __W, + (__mmask8) __U, __R); } -extern __inline __m512 +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) +_mm_maskz_range_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C, + const int __R) { - return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U); + return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, + (__v4sf) __B, __C, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, __R); } -extern __inline __m512d +extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_or_pd (__m512d __A, __m512d __B) +_mm_fpclass_ss_mask (__m128 __A, const int __imm) { - return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) -1); + return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm, + (__mmask8) -1); } -extern __inline __m512d +extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) +_mm_fpclass_sd_mask (__m128d __A, const int __imm) { - return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __W, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, + (__mmask8) -1); } -extern __inline __m512d +extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) +_mm_mask_fpclass_ss_mask (__mmask8 __U, __m128 __A, const int __imm) { - return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U); + return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm, __U); } -extern __inline __m512 +extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_or_ps (__m512 __A, __m512 __B) +_mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm) { - return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) -1); + return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, __U); } -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) -{ - return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __W, - (__mmask16) __U); -} +#else +#define _kshiftli_mask8(X, Y) \ + ((__mmask8) __builtin_ia32_kshiftliqi ((__mmask8)(X), (__mmask8)(Y))) -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) -{ - return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U); -} +#define _kshiftri_mask8(X, Y) \ + ((__mmask8) __builtin_ia32_kshiftriqi ((__mmask8)(X), (__mmask8)(Y))) + +#define _mm_range_sd(A, B, C) \ + ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ + (__mmask8) -1, _MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_range_sd(W, U, A, B, C) \ + ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \ + (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_maskz_range_sd(U, A, B, C) \ + ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ + (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_range_ss(A, B, C) \ + ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ + (__mmask8) -1, _MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_range_ss(W, U, A, B, C) \ + ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \ + (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_maskz_range_ss(U, A, B, C) \ + ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ + (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_range_round_sd(A, B, C, R) \ + ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ + (__mmask8) -1, (R))) + +#define _mm_mask_range_round_sd(W, U, A, B, C, R) \ + ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \ + (__mmask8)(U), (R))) + +#define _mm_maskz_range_round_sd(U, A, B, C, R) \ + ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ + (__mmask8)(U), (R))) + +#define _mm_range_round_ss(A, B, C, R) \ + ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ + (__mmask8) -1, (R))) + +#define _mm_mask_range_round_ss(W, U, A, B, C, R) \ + ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \ + (__mmask8)(U), (R))) + +#define _mm_maskz_range_round_ss(U, A, B, C, R) \ + ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ + (__mmask8)(U), (R))) + +#define _mm_fpclass_ss_mask(X, C) \ + ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), \ + (int) (C), (__mmask8) (-1))) \ + +#define _mm_fpclass_sd_mask(X, C) \ + ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \ + (int) (C), (__mmask8) (-1))) \ + +#define _mm_mask_fpclass_ss_mask(X, C, U) \ + ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), \ + (int) (C), (__mmask8) (U))) + +#define _mm_mask_fpclass_sd_mask(X, C, U) \ + ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \ + (int) (C), (__mmask8) (U))) +#define _mm_reduce_sd(A, B, C) \ + ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ + (__mmask8)-1)) + +#define _mm_mask_reduce_sd(W, U, A, B, C) \ + ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), (__mmask8)(U))) + +#define _mm_maskz_reduce_sd(U, A, B, C) \ + ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ + (__mmask8)(U))) + +#define _mm_reduce_round_sd(A, B, C, R) \ + ((__m128d) __builtin_ia32_reducesd_round ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), (__mmask8)(U), (int)(R))) + +#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \ + ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm_maskz_reduce_round_sd(U, A, B, C, R) \ + ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ + (__mmask8)(U), (int)(R))) + +#define _mm_reduce_ss(A, B, C) \ + ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ + (__mmask8)-1)) + +#define _mm_mask_reduce_ss(W, U, A, B, C) \ + ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), (__mmask8)(U))) + +#define _mm_maskz_reduce_ss(U, A, B, C) \ + ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ + (__mmask8)(U))) + +#define _mm_reduce_round_ss(A, B, C, R) \ + ((__m128) __builtin_ia32_reducess_round ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), (__mmask8)(U), (int)(R))) + +#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \ + ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm_maskz_reduce_round_ss(U, A, B, C, R) \ + ((__m128) __builtin_ia32_reducesd_mask_round ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ + (__mmask8)(U), (int)(R))) + +#endif + +#ifdef __DISABLE_AVX512DQ__ +#undef __DISABLE_AVX512DQ__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512DQ__ */ + +#if !defined (__AVX512DQ__) || !defined (__EVEX512__) +#pragma GCC push_options +#pragma GCC target("avx512dq,evex512") +#define __DISABLE_AVX512DQ_512__ +#endif /* __AVX512DQ_512__ */ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_and_pd (__m512d __A, __m512d __B) +_mm512_broadcast_f64x2 (__m128d __A) { - return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), + return (__m512d) + __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A, + _mm512_undefined_pd (), (__mmask8) -1); } extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, - __m512d __B) +_mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A) { - return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __W, - (__mmask8) __U); + return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) + __A, + (__v8df) + __O, __M); } extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) +_mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) { - return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U); + return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) + __A, + (__v8df) + _mm512_setzero_ps (), + __M); } -extern __inline __m512 +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_and_ps (__m512 __A, __m512 __B) +_mm512_broadcast_i64x2 (__m128i __A) { - return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) -1); + return (__m512i) + __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A, + _mm512_undefined_epi32 (), + (__mmask8) -1); } -extern __inline __m512 +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) +_mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A) { - return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __W, - (__mmask16) __U); + return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) + __A, + (__v8di) + __O, __M); } -extern __inline __m512 +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) +_mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) { - return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U); + return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) + __A, + (__v8di) + _mm512_setzero_si512 (), + __M); } -extern __inline __m512d +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_andnot_pd (__m512d __A, __m512d __B) +_mm512_broadcast_f32x2 (__m128 __A) { - return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) -1); + return (__m512) + __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, + (__v16sf)_mm512_undefined_ps (), + (__mmask16) -1); } -extern __inline __m512d +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, - __m512d __B) +_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) { - return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __W, - (__mmask8) __U); + return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, + (__v16sf) + __O, __M); } -extern __inline __m512d +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) +_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) { - return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U); + return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, + (__v16sf) + _mm512_setzero_ps (), + __M); } -extern __inline __m512 +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_andnot_ps (__m512 __A, __m512 __B) +_mm512_broadcast_i32x2 (__m128i __A) { - return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), + return (__m512i) + __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A, + (__v16si) + _mm512_undefined_epi32 (), (__mmask16) -1); } -extern __inline __m512 +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, - __m512 __B) +_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) { - return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __W, - (__mmask16) __U); + return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) + __A, + (__v16si) + __O, __M); } -extern __inline __m512 +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) +_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) { - return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U); + return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) + __A, + (__v16si) + _mm512_setzero_si512 (), + __M); } -extern __inline __mmask16 +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_movepi32_mask (__m512i __A) +_mm512_broadcast_f32x8 (__m256 __A) { - return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); + return (__m512) + __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, + _mm512_undefined_ps (), + (__mmask16) -1); } -extern __inline __mmask8 +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_movepi64_mask (__m512i __A) +_mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A) { - return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A); + return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, + (__v16sf)__O, + __M); } -extern __inline __m512i +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_movm_epi32 (__mmask16 __A) +_mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A) { - return (__m512i) __builtin_ia32_cvtmask2d512 (__A); + return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, + (__v16sf) + _mm512_setzero_ps (), + __M); } extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_movm_epi64 (__mmask8 __A) +_mm512_broadcast_i32x8 (__m256i __A) { - return (__m512i) __builtin_ia32_cvtmask2q512 (__A); + return (__m512i) + __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A, + (__v16si) + _mm512_undefined_epi32 (), + (__mmask16) -1); } extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvttpd_epi64 (__m512d __A) +_mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A) { - return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) + __A, + (__v16si)__O, + __M); } extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) +_mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A) { - return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, - (__v8di) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) + __A, + (__v16si) + _mm512_setzero_si512 (), + __M); } extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) +_mm512_mullo_epi64 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512i) ((__v8du) __A * (__v8du) __B); } extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvttpd_epu64 (__m512d __A) +_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, + __m512i __B) { - return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, + (__mmask8) __U); } extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) +_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, - (__v8di) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); } -extern __inline __m512i +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) +_mm512_xor_pd (__m512d __A, __m512d __B) { - return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); } -extern __inline __m512i +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvttps_epi64 (__m256 __A) +_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, + __m512d __B) { - return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U); } -extern __inline __m512i +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) +_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) { - return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, - (__v8di) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); } -extern __inline __m512i +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) +_mm512_xor_ps (__m512 __A, __m512 __B) { - return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); } -extern __inline __m512i +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvttps_epu64 (__m256 __A) +_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U); } -extern __inline __m512i +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) +_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) { - return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, - (__v8di) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); } -extern __inline __m512i +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) +_mm512_or_pd (__m512d __A, __m512d __B) { - return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); } -extern __inline __m512i +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtpd_epi64 (__m512d __A) +_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U); } -extern __inline __m512i +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) +_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) { - return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, - (__v8di) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); } -extern __inline __m512i +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) +_mm512_or_ps (__m512 __A, __m512 __B) { - return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); } -extern __inline __m512i +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtpd_epu64 (__m512d __A) +_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U); } -extern __inline __m512i +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) +_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) { - return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, - (__v8di) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); } -extern __inline __m512i +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) +_mm512_and_pd (__m512d __A, __m512d __B) { - return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); } -extern __inline __m512i +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtps_epi64 (__m256 __A) +_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, + __m512d __B) { - return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U); } -extern __inline __m512i +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) +_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) { - return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, - (__v8di) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); } -extern __inline __m512i +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) +_mm512_and_ps (__m512 __A, __m512 __B) { - return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); } -extern __inline __m512i +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtps_epu64 (__m256 __A) +_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U); } -extern __inline __m512i +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) +_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) { - return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, - (__v8di) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); } -extern __inline __m512i +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) +_mm512_andnot_pd (__m512d __A, __m512d __B) { - return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); } -extern __inline __m256 +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtepi64_ps (__m512i __A) +_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, + __m512d __B) { - return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U); } -extern __inline __m256 +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) +_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) { - return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, - (__v8sf) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); } -extern __inline __m256 +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) +_mm512_andnot_ps (__m512 __A, __m512 __B) { - return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); } -extern __inline __m256 +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtepu64_ps (__m512i __A) +_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, + __m512 __B) { - return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U); } -extern __inline __m256 +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) +_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) { - return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, - (__v8sf) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); } -extern __inline __m256 +extern __inline __mmask16 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) +_mm512_movepi32_mask (__m512i __A) { - return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); } -extern __inline __m512d +extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtepi64_pd (__m512i __A) +_mm512_movepi64_mask (__m512i __A) { - return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A); } -extern __inline __m512d +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) +_mm512_movm_epi32 (__mmask16 __A) { - return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, - (__v8df) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512i) __builtin_ia32_cvtmask2d512 (__A); } -extern __inline __m512d +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) +_mm512_movm_epi64 (__mmask8 __A) { - return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512i) __builtin_ia32_cvtmask2q512 (__A); } -extern __inline __m512d +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtepu64_pd (__m512i __A) +_mm512_cvttpd_epi64 (__m512d __A) { - return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, - (__v8df) - _mm512_setzero_pd (), + return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } -extern __inline __m512d +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) +_mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { - return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, - (__v8df) __W, + return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, + (__v8di) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } -extern __inline __m512d +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) +_mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) { - return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, - (__v8df) - _mm512_setzero_pd (), + return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } -#ifdef __OPTIMIZE__ -extern __inline __mmask8 +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kshiftli_mask8 (__mmask8 __A, unsigned int __B) +_mm512_cvttpd_epu64 (__m512d __A) { - return (__mmask8) __builtin_ia32_kshiftliqi ((__mmask8) __A, (__mmask8) __B); + return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __mmask8 +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kshiftri_mask8 (__mmask8 __A, unsigned int __B) +_mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { - return (__mmask8) __builtin_ia32_kshiftriqi ((__mmask8) __A, (__mmask8) __B); + return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, + (__v8di) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m512d +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_range_pd (__m512d __A, __m512d __B, int __C) +_mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) { - return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, - (__v8df) __B, __C, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m512d +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_range_pd (__m512d __W, __mmask8 __U, - __m512d __A, __m512d __B, int __C) +_mm512_cvttps_epi64 (__m256 __A) { - return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, - (__v8df) __B, __C, - (__v8df) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m512d +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_range_pd (__mmask8 __U, __m512d __A, __m512d __B, int __C) +_mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { - return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, - (__v8df) __B, __C, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m512 +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_range_ps (__m512 __A, __m512 __B, int __C) +_mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) { - return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, - (__v16sf) __B, __C, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m512 +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_range_ps (__m512 __W, __mmask16 __U, - __m512 __A, __m512 __B, int __C) +_mm512_cvttps_epu64 (__m256 __A) { - return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, - (__v16sf) __B, __C, - (__v16sf) __W, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m512 +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_range_ps (__mmask16 __U, __m512 __A, __m512 __B, int __C) +_mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { - return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, - (__v16sf) __B, __C, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128d +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_reduce_sd (__m128d __A, __m128d __B, int __C) +_mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) { - return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A, - (__v2df) __B, __C, - (__v2df) _mm_setzero_pd (), - (__mmask8) -1); + return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128d +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_reduce_round_sd (__m128d __A, __m128d __B, int __C, const int __R) +_mm512_cvtpd_epi64 (__m512d __A) { - return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A, - (__v2df) __B, __C, - (__v2df) - _mm_setzero_pd (), - (__mmask8) -1, __R); + return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) +{ + return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, + (__v8di) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128d +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_reduce_sd (__m128d __W, __mmask8 __U, __m128d __A, - __m128d __B, int __C) +_mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) { - return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A, - (__v2df) __B, __C, - (__v2df) __W, - (__mmask8) __U); + return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128d +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_reduce_round_sd (__m128d __W, __mmask8 __U, __m128d __A, - __m128d __B, int __C, const int __R) +_mm512_cvtpd_epu64 (__m512d __A) { - return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A, - (__v2df) __B, __C, - (__v2df) __W, - __U, __R); + return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128d +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_reduce_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C) +_mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { - return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A, - (__v2df) __B, __C, - (__v2df) _mm_setzero_pd (), - (__mmask8) __U); + return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, + (__v8di) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128d +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_reduce_round_sd (__mmask8 __U, __m128d __A, __m128d __B, - int __C, const int __R) +_mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) { - return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A, - (__v2df) __B, __C, - (__v2df) - _mm_setzero_pd (), - __U, __R); + return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128 +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_reduce_ss (__m128 __A, __m128 __B, int __C) +_mm512_cvtps_epi64 (__m256 __A) { - return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A, - (__v4sf) __B, __C, - (__v4sf) _mm_setzero_ps (), - (__mmask8) -1); + return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128 +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_reduce_round_ss (__m128 __A, __m128 __B, int __C, const int __R) +_mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { - return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A, - (__v4sf) __B, __C, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) -1, __R); + return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128 +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_reduce_ss (__m128 __W, __mmask8 __U, __m128 __A, - __m128 __B, int __C) +_mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) { - return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A, - (__v4sf) __B, __C, - (__v4sf) __W, - (__mmask8) __U); + return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128 +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_reduce_round_ss (__m128 __W, __mmask8 __U, __m128 __A, - __m128 __B, int __C, const int __R) +_mm512_cvtps_epu64 (__m256 __A) { - return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A, - (__v4sf) __B, __C, - (__v4sf) __W, - __U, __R); + return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128 +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_reduce_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C) +_mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { - return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A, - (__v4sf) __B, __C, - (__v4sf) _mm_setzero_ps (), - (__mmask8) __U); + return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128 +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_reduce_round_ss (__mmask8 __U, __m128 __A, __m128 __B, - int __C, const int __R) +_mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) { - return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A, - (__v4sf) __B, __C, - (__v4sf) - _mm_setzero_ps (), - __U, __R); + return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128d +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_range_sd (__m128d __A, __m128d __B, int __C) +_mm512_cvtepi64_ps (__m512i __A) { - return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, - (__v2df) __B, __C, - (__v2df) - _mm_setzero_pd (), + return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, + (__v8sf) + _mm256_setzero_ps (), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128d +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_range_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, int __C) +_mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) { - return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, - (__v2df) __B, __C, - (__v2df) __W, + return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, + (__v8sf) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128d +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_range_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C) +_mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) { - return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, - (__v2df) __B, __C, - (__v2df) - _mm_setzero_pd (), + return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, + (__v8sf) + _mm256_setzero_ps (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128 +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_range_ss (__m128 __A, __m128 __B, int __C) +_mm512_cvtepu64_ps (__m512i __A) { - return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, - (__v4sf) __B, __C, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128 +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_range_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, int __C) +_mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) { - return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, - (__v4sf) __B, __C, - (__v4sf) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, + (__v8sf) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) +{ + return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} -extern __inline __m128 +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_range_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C) +_mm512_cvtepi64_pd (__m512i __A) { - return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, - (__v4sf) __B, __C, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128d +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_range_round_sd (__m128d __A, __m128d __B, int __C, const int __R) +_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) { - return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, - (__v2df) __B, __C, - (__v2df) - _mm_setzero_pd (), - (__mmask8) -1, __R); + return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128d +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_range_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, - int __C, const int __R) +_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) +{ + return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepu64_pd (__m512i __A) { - return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, - (__v2df) __B, __C, - (__v2df) __W, - (__mmask8) __U, __R); + return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128d +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_range_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C, - const int __R) +_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) { - return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, - (__v2df) __B, __C, - (__v2df) - _mm_setzero_pd (), - (__mmask8) __U, __R); + return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128 +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_range_round_ss (__m128 __A, __m128 __B, int __C, const int __R) +_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) { - return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, - (__v4sf) __B, __C, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) -1, __R); + return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128 +#ifdef __OPTIMIZE__ +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_range_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, - int __C, const int __R) +_mm512_range_pd (__m512d __A, __m512d __B, int __C) { - return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, - (__v4sf) __B, __C, - (__v4sf) __W, - (__mmask8) __U, __R); + return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, + (__v8df) __B, __C, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __m128 +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_range_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C, - const int __R) +_mm512_mask_range_pd (__m512d __W, __mmask8 __U, + __m512d __A, __m512d __B, int __C) { - return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, - (__v4sf) __B, __C, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U, __R); + return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, + (__v8df) __B, __C, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __mmask8 +extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_fpclass_ss_mask (__m128 __A, const int __imm) +_mm512_maskz_range_pd (__mmask8 __U, __m512d __A, __m512d __B, int __C) { - return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm, - (__mmask8) -1); + return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, + (__v8df) __B, __C, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __mmask8 +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_fpclass_sd_mask (__m128d __A, const int __imm) +_mm512_range_ps (__m512 __A, __m512 __B, int __C) { - return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, - (__mmask8) -1); + return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, + (__v16sf) __B, __C, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __mmask8 +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_fpclass_ss_mask (__mmask8 __U, __m128 __A, const int __imm) +_mm512_mask_range_ps (__m512 __W, __mmask16 __U, + __m512 __A, __m512 __B, int __C) { - return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm, __U); + return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, + (__v16sf) __B, __C, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); } -extern __inline __mmask8 +extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm) +_mm512_maskz_range_ps (__mmask16 __U, __m512 __A, __m512 __B, int __C) { - return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, __U); + return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, + (__v16sf) __B, __C, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); } extern __inline __m512i @@ -2395,72 +2546,6 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) } #else -#define _kshiftli_mask8(X, Y) \ - ((__mmask8) __builtin_ia32_kshiftliqi ((__mmask8)(X), (__mmask8)(Y))) - -#define _kshiftri_mask8(X, Y) \ - ((__mmask8) __builtin_ia32_kshiftriqi ((__mmask8)(X), (__mmask8)(Y))) - -#define _mm_range_sd(A, B, C) \ - ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ - (__mmask8) -1, _MM_FROUND_CUR_DIRECTION)) - -#define _mm_mask_range_sd(W, U, A, B, C) \ - ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \ - (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) - -#define _mm_maskz_range_sd(U, A, B, C) \ - ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ - (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) - -#define _mm_range_ss(A, B, C) \ - ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ - (__mmask8) -1, _MM_FROUND_CUR_DIRECTION)) - -#define _mm_mask_range_ss(W, U, A, B, C) \ - ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \ - (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) - -#define _mm_maskz_range_ss(U, A, B, C) \ - ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ - (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) - -#define _mm_range_round_sd(A, B, C, R) \ - ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ - (__mmask8) -1, (R))) - -#define _mm_mask_range_round_sd(W, U, A, B, C, R) \ - ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \ - (__mmask8)(U), (R))) - -#define _mm_maskz_range_round_sd(U, A, B, C, R) \ - ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ - (__mmask8)(U), (R))) - -#define _mm_range_round_ss(A, B, C, R) \ - ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ - (__mmask8) -1, (R))) - -#define _mm_mask_range_round_ss(W, U, A, B, C, R) \ - ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \ - (__mmask8)(U), (R))) - -#define _mm_maskz_range_round_ss(U, A, B, C, R) \ - ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ - (__mmask8)(U), (R))) - #define _mm512_cvtt_roundpd_epi64(A, B) \ ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di) \ _mm512_setzero_si512 (), \ @@ -2792,22 +2877,6 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) (__v16si)(__m512i)_mm512_setzero_si512 (),\ (__mmask16)(U))) -#define _mm_fpclass_ss_mask(X, C) \ - ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), \ - (int) (C), (__mmask8) (-1))) \ - -#define _mm_fpclass_sd_mask(X, C) \ - ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \ - (int) (C), (__mmask8) (-1))) \ - -#define _mm_mask_fpclass_ss_mask(X, C, U) \ - ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), \ - (int) (C), (__mmask8) (U))) - -#define _mm_mask_fpclass_sd_mask(X, C, U) \ - ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \ - (int) (C), (__mmask8) (U))) - #define _mm512_mask_fpclass_pd_mask(u, X, C) \ ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \ (int) (C), (__mmask8)(u))) @@ -2824,68 +2893,11 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\ (int) (c),(__mmask16)-1)) -#define _mm_reduce_sd(A, B, C) \ - ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ - (__mmask8)-1)) - -#define _mm_mask_reduce_sd(W, U, A, B, C) \ - ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), (__mmask8)(U))) - -#define _mm_maskz_reduce_sd(U, A, B, C) \ - ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ - (__mmask8)(U))) - -#define _mm_reduce_round_sd(A, B, C, R) \ - ((__m128d) __builtin_ia32_reducesd_round ((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), (__mmask8)(U), (int)(R))) - -#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \ - ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm_maskz_reduce_round_sd(U, A, B, C, R) \ - ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ - (__mmask8)(U), (int)(R))) - -#define _mm_reduce_ss(A, B, C) \ - ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ - (__mmask8)-1)) - -#define _mm_mask_reduce_ss(W, U, A, B, C) \ - ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), (__mmask8)(U))) - -#define _mm_maskz_reduce_ss(U, A, B, C) \ - ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ - (__mmask8)(U))) - -#define _mm_reduce_round_ss(A, B, C, R) \ - ((__m128) __builtin_ia32_reducess_round ((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), (__mmask8)(U), (int)(R))) - -#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \ - ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm_maskz_reduce_round_ss(U, A, B, C, R) \ - ((__m128) __builtin_ia32_reducesd_mask_round ((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ - (__mmask8)(U), (int)(R))) - - #endif -#ifdef __DISABLE_AVX512DQ__ -#undef __DISABLE_AVX512DQ__ +#ifdef __DISABLE_AVX512DQ_512__ +#undef __DISABLE_AVX512DQ_512__ #pragma GCC pop_options -#endif /* __DISABLE_AVX512DQ__ */ +#endif /* __DISABLE_AVX512DQ_512__ */ #endif /* _AVX512DQINTRIN_H_INCLUDED */