* [x86, 5/n] Replace builtins with vector extensions
@ 2014-11-09 15:57 Marc Glisse
2014-11-10 20:45 ` Uros Bizjak
0 siblings, 1 reply; 2+ messages in thread
From: Marc Glisse @ 2014-11-09 15:57 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak
[-- Attachment #1: Type: TEXT/PLAIN, Size: 456 bytes --]
Hello,
&|^ of size 256 and 512. Regtested with 6/n.
2014-11-10 Marc Glisse <marc.glisse@inria.fr>
* config/i386/avx2intrin.h (_mm256_and_si256, _mm256_or_si256,
_mm256_xor_si256): Use vector extensions instead of builtins.
* config/i386/avx512fintrin.h (_mm512_or_si512, _mm512_or_epi32,
_mm512_or_epi64, _mm512_xor_si512, _mm512_xor_epi32, _mm512_xor_epi64,
_mm512_and_si512, _mm512_and_epi32, _mm512_and_epi64): Likewise.
--
Marc Glisse
[-- Attachment #2: Type: TEXT/PLAIN, Size: 9075 bytes --]
Index: avx2intrin.h
===================================================================
--- avx2intrin.h (revision 217262)
+++ avx2intrin.h (working copy)
@@ -171,21 +171,21 @@ _mm256_alignr_epi8 (__m256i __A, __m256i
#define _mm256_alignr_epi8(A, B, N) \
((__m256i) __builtin_ia32_palignr256 ((__v4di)(__m256i)(A), \
(__v4di)(__m256i)(B), \
(int)(N) * 8))
#endif
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_and_si256 (__m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_andsi256 ((__v4di)__A, (__v4di)__B);
+ return (__m256i) ((__v4du)__A & (__v4du)__B);
}
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_andnot_si256 (__m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_andnotsi256 ((__v4di)__A, (__v4di)__B);
}
extern __inline __m256i
@@ -569,21 +569,21 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mul_epu32 (__m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_pmuludq256 ((__v8si)__A, (__v8si)__B);
}
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_or_si256 (__m256i __A, __m256i __B)
{
- return (__m256i)__builtin_ia32_por256 ((__v4di)__A, (__v4di)__B);
+ return (__m256i) ((__v4du)__A | (__v4du)__B);
}
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sad_epu8 (__m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_psadbw256 ((__v32qi)__A, (__v32qi)__B);
}
extern __inline __m256i
@@ -890,21 +890,21 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpacklo_epi64 (__m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_punpcklqdq256 ((__v4di)__A, (__v4di)__B);
}
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_xor_si256 (__m256i __A, __m256i __B)
{
- return (__m256i)__builtin_ia32_pxor256 ((__v4di)__A, (__v4di)__B);
+ return (__m256i) ((__v4du)__A ^ (__v4du)__B);
}
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_stream_load_si256 (__m256i const *__X)
{
return (__m256i) __builtin_ia32_movntdqa256 ((__v4di *) __X);
}
extern __inline __m128
Index: avx512fintrin.h
===================================================================
--- avx512fintrin.h (revision 217262)
+++ avx512fintrin.h (working copy)
@@ -6774,36 +6774,28 @@ _mm512_maskz_moveldup_ps (__mmask16 __U,
return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_or_si512 (__m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
- (__v16si) __B,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
+ return (__m512i) ((__v16su) __A | (__v16su) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_or_epi32 (__m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
- (__v16si) __B,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
+ return (__m512i) ((__v16su) __A | (__v16su) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si) __W,
(__mmask16) __U);
@@ -6817,25 +6809,21 @@ _mm512_maskz_or_epi32 (__mmask16 __U, __
(__v16si) __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_or_epi64 (__m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
- (__v8di) __B,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1);
+ return (__m512i) ((__v8du) __A | (__v8du) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di) __W,
(__mmask8) __U);
@@ -6849,36 +6837,28 @@ _mm512_maskz_or_epi64 (__mmask8 __U, __m
(__v8di) __B,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_xor_si512 (__m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
- (__v16si) __B,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
+ return (__m512i) ((__v16su) __A ^ (__v16su) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_xor_epi32 (__m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
- (__v16si) __B,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
+ return (__m512i) ((__v16su) __A ^ (__v16su) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si) __W,
(__mmask16) __U);
@@ -6892,25 +6872,21 @@ _mm512_maskz_xor_epi32 (__mmask16 __U, _
(__v16si) __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_xor_epi64 (__m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
- (__v8di) __B,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1);
+ return (__m512i) ((__v8du) __A ^ (__v8du) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di) __W,
(__mmask8) __U);
@@ -7105,36 +7081,28 @@ _mm512_maskz_ror_epi64 (__mmask8 __U, __
((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
(int)(B), \
(__v8di)_mm512_setzero_si512 (), \
(__mmask8)(U)))
#endif
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_and_si512 (__m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
- (__v16si) __B,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
+ return (__m512i) ((__v16su) __A & (__v16su) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_and_epi32 (__m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
- (__v16si) __B,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
+ return (__m512i) ((__v16su) __A & (__v16su) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si) __W,
(__mmask16) __U);
@@ -7148,25 +7116,21 @@ _mm512_maskz_and_epi32 (__mmask16 __U, _
(__v16si) __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_and_epi64 (__m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
- (__v8di) __B,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1);
+ return (__m512i) ((__v8du) __A & (__v8du) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di) __W, __U);
}
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [x86, 5/n] Replace builtins with vector extensions
2014-11-09 15:57 [x86, 5/n] Replace builtins with vector extensions Marc Glisse
@ 2014-11-10 20:45 ` Uros Bizjak
0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2014-11-10 20:45 UTC (permalink / raw)
To: Marc Glisse; +Cc: gcc-patches
On Sun, Nov 9, 2014 at 4:57 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> Hello,
>
> &|^ of size 256 and 512. Regtested with 6/n.
>
> 2014-11-10 Marc Glisse <marc.glisse@inria.fr>
>
> * config/i386/avx2intrin.h (_mm256_and_si256, _mm256_or_si256,
> _mm256_xor_si256): Use vector extensions instead of builtins.
> * config/i386/avx512fintrin.h (_mm512_or_si512, _mm512_or_epi32,
> _mm512_or_epi64, _mm512_xor_si512, _mm512_xor_epi32,
> _mm512_xor_epi64,
> _mm512_and_si512, _mm512_and_epi32, _mm512_and_epi64): Likewise.
OK.
Thanks,
Uros.
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2014-11-10 20:42 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-11-09 15:57 [x86, 5/n] Replace builtins with vector extensions Marc Glisse
2014-11-10 20:45 ` Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).