public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-103] Add reduce_*_ep[i|u][8|16] series intrinsics
@ 2023-04-20  1:33 Haochen Jiang
  0 siblings, 0 replies; only message in thread
From: Haochen Jiang @ 2023-04-20  1:33 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:ca3bd377c7eae2ece01a1bb686a920daad179a89

commit r14-103-gca3bd377c7eae2ece01a1bb686a920daad179a89
Author: Hu, Lin1 <lin1.hu@intel.com>
Date:   Thu Feb 16 09:10:16 2023 +0800

    Add reduce_*_ep[i|u][8|16] series intrinsics
    
    gcc/ChangeLog:
    
            * config/i386/avx2intrin.h
            (_MM_REDUCE_OPERATOR_BASIC_EPI16): New macro.
            (_MM_REDUCE_OPERATOR_MAX_MIN_EP16): Ditto.
            (_MM256_REDUCE_OPERATOR_BASIC_EPI16): Ditto.
            (_MM256_REDUCE_OPERATOR_MAX_MIN_EP16): Ditto.
            (_MM_REDUCE_OPERATOR_BASIC_EPI8): Ditto.
            (_MM_REDUCE_OPERATOR_MAX_MIN_EP8): Ditto.
            (_MM256_REDUCE_OPERATOR_BASIC_EPI8): Ditto.
            (_MM256_REDUCE_OPERATOR_MAX_MIN_EP8): Ditto.
            (_mm_reduce_add_epi16): New instrinsics.
            (_mm_reduce_mul_epi16): Ditto.
            (_mm_reduce_and_epi16): Ditto.
            (_mm_reduce_or_epi16): Ditto.
            (_mm_reduce_max_epi16): Ditto.
            (_mm_reduce_max_epu16): Ditto.
            (_mm_reduce_min_epi16): Ditto.
            (_mm_reduce_min_epu16): Ditto.
            (_mm256_reduce_add_epi16): Ditto.
            (_mm256_reduce_mul_epi16): Ditto.
            (_mm256_reduce_and_epi16): Ditto.
            (_mm256_reduce_or_epi16): Ditto.
            (_mm256_reduce_max_epi16): Ditto.
            (_mm256_reduce_max_epu16): Ditto.
            (_mm256_reduce_min_epi16): Ditto.
            (_mm256_reduce_min_epu16): Ditto.
            (_mm_reduce_add_epi8): Ditto.
            (_mm_reduce_mul_epi8): Ditto.
            (_mm_reduce_and_epi8): Ditto.
            (_mm_reduce_or_epi8): Ditto.
            (_mm_reduce_max_epi8): Ditto.
            (_mm_reduce_max_epu8): Ditto.
            (_mm_reduce_min_epi8): Ditto.
            (_mm_reduce_min_epu8): Ditto.
            (_mm256_reduce_add_epi8): Ditto.
            (_mm256_reduce_mul_epi8): Ditto.
            (_mm256_reduce_and_epi8): Ditto.
            (_mm256_reduce_or_epi8): Ditto.
            (_mm256_reduce_max_epi8): Ditto.
            (_mm256_reduce_max_epu8): Ditto.
            (_mm256_reduce_min_epi8): Ditto.
            (_mm256_reduce_min_epu8): Ditto.
            * config/i386/avx512vlbwintrin.h:
            (_mm_mask_reduce_add_epi16): Ditto.
            (_mm_mask_reduce_mul_epi16): Ditto.
            (_mm_mask_reduce_and_epi16): Ditto.
            (_mm_mask_reduce_or_epi16): Ditto.
            (_mm_mask_reduce_max_epi16): Ditto.
            (_mm_mask_reduce_max_epu16): Ditto.
            (_mm_mask_reduce_min_epi16): Ditto.
            (_mm_mask_reduce_min_epu16): Ditto.
            (_mm256_mask_reduce_add_epi16): Ditto.
            (_mm256_mask_reduce_mul_epi16): Ditto.
            (_mm256_mask_reduce_and_epi16): Ditto.
            (_mm256_mask_reduce_or_epi16): Ditto.
            (_mm256_mask_reduce_max_epi16): Ditto.
            (_mm256_mask_reduce_max_epu16): Ditto.
            (_mm256_mask_reduce_min_epi16): Ditto.
            (_mm256_mask_reduce_min_epu16): Ditto.
            (_mm_mask_reduce_add_epi8): Ditto.
            (_mm_mask_reduce_mul_epi8): Ditto.
            (_mm_mask_reduce_and_epi8): Ditto.
            (_mm_mask_reduce_or_epi8): Ditto.
            (_mm_mask_reduce_max_epi8): Ditto.
            (_mm_mask_reduce_max_epu8): Ditto.
            (_mm_mask_reduce_min_epi8): Ditto.
            (_mm_mask_reduce_min_epu8): Ditto.
            (_mm256_mask_reduce_add_epi8): Ditto.
            (_mm256_mask_reduce_mul_epi8): Ditto.
            (_mm256_mask_reduce_and_epi8): Ditto.
            (_mm256_mask_reduce_or_epi8): Ditto.
            (_mm256_mask_reduce_max_epi8): Ditto.
            (_mm256_mask_reduce_max_epu8): Ditto.
            (_mm256_mask_reduce_min_epi8): Ditto.
            (_mm256_mask_reduce_min_epu8): Ditto.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/avx512vlbw-reduce-op-1.c: New test.

Diff:
---
 gcc/config/i386/avx2intrin.h                       | 347 +++++++++++++++++++++
 gcc/config/i386/avx512vlbwintrin.h                 | 256 +++++++++++++++
 .../gcc.target/i386/avx512vlbw-reduce-op-1.c       | 206 ++++++++++++
 3 files changed, 809 insertions(+)

diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h
index 1b9c8169a96..9b8c13b7233 100644
--- a/gcc/config/i386/avx2intrin.h
+++ b/gcc/config/i386/avx2intrin.h
@@ -1915,6 +1915,353 @@ _mm256_mask_i64gather_epi32 (__m128i __src, int const *__base,
 					   (int) (SCALE))
 #endif  /* __OPTIMIZE__ */
 
+#define _MM_REDUCE_OPERATOR_BASIC_EPI16(op) \
+  __v8hi __T1 = (__v8hi)__W; \
+  __v8hi __T2 = __builtin_shufflevector (__T1, __T1, 4, 5, 6, 7, 4, 5, 6, 7); \
+  __v8hi __T3 = __T1 op __T2; \
+  __v8hi __T4 = __builtin_shufflevector (__T3, __T3, 2, 3, 2, 3, 4, 5, 6, 7); \
+  __v8hi __T5 = __T3 op __T4; \
+  __v8hi __T6 = __builtin_shufflevector (__T5, __T5, 1, 1, 2, 3, 4, 5, 6, 7); \
+  __v8hi __T7 = __T5 op __T6; \
+  return __T7[0]
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_add_epi16 (__m128i __W)
+{
+  _MM_REDUCE_OPERATOR_BASIC_EPI16 (+);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_mul_epi16 (__m128i __W)
+{
+  _MM_REDUCE_OPERATOR_BASIC_EPI16 (*);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_and_epi16 (__m128i __W)
+{
+  _MM_REDUCE_OPERATOR_BASIC_EPI16 (&);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_or_epi16 (__m128i __W)
+{
+  _MM_REDUCE_OPERATOR_BASIC_EPI16 (|);
+}
+
+#define _MM_REDUCE_OPERATOR_MAX_MIN_EP16(op) \
+  __m128i __T1 = (__m128i)__builtin_shufflevector ((__v8hi)__V, \
+		  (__v8hi)__V, 4, 5, 6, 7, 4, 5, 6, 7); \
+  __m128i __T2 = _mm_##op (__V, __T1); \
+  __m128i __T3 = (__m128i)__builtin_shufflevector ((__v8hi)__T2, \
+		  (__v8hi)__T2, 2, 3, 2, 3, 4, 5, 6, 7); \
+  __m128i __T4 = _mm_##op (__T2, __T3); \
+  __m128i __T5 = (__m128i)__builtin_shufflevector ((__v8hi)__T4, \
+		  (__v8hi)__T4, 1, 1, 2, 3, 4, 5, 6, 7); \
+  __v8hi __T6 = (__v8hi)_mm_##op (__T4, __T5); \
+  return __T6[0]
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_max_epi16 (__m128i __V)
+{
+  _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epi16);
+}
+
+extern __inline unsigned short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_max_epu16 (__m128i __V)
+{
+  _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epu16);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_min_epi16 (__m128i __V)
+{
+  _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epi16);
+}
+
+extern __inline unsigned short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_min_epu16 (__m128i __V)
+{
+  _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epu16);
+}
+
+#define _MM256_REDUCE_OPERATOR_BASIC_EPI16(op) \
+  __v8hi __T1 = (__v8hi)_mm256_extracti128_si256 (__W, 0); \
+  __v8hi __T2 = (__v8hi)_mm256_extracti128_si256 (__W, 1); \
+  __v8hi __T3 = __T1 op __T2; \
+  __v8hi __T4 = __builtin_shufflevector (__T3, __T3, 4, 5, 6, 7, 4, 5, 6, 7); \
+  __v8hi __T5 = __T3 op __T4; \
+  __v8hi __T6 = __builtin_shufflevector (__T5, __T5, 2, 3, 2, 3, 4, 5, 6, 7); \
+  __v8hi __T7 = __T5 op __T6; \
+  __v8hi __T8 = __builtin_shufflevector (__T7, __T7, 1, 1, 2, 3, 4, 5, 6, 7); \
+  __v8hi __T9 = __T7 op __T8; \
+  return __T9[0]
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_add_epi16 (__m256i __W)
+{
+  _MM256_REDUCE_OPERATOR_BASIC_EPI16 (+);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_mul_epi16 (__m256i __W)
+{
+  _MM256_REDUCE_OPERATOR_BASIC_EPI16 (*);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_and_epi16 (__m256i __W)
+{
+  _MM256_REDUCE_OPERATOR_BASIC_EPI16 (&);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_or_epi16 (__m256i __W)
+{
+  _MM256_REDUCE_OPERATOR_BASIC_EPI16 (|);
+}
+
+#define _MM256_REDUCE_OPERATOR_MAX_MIN_EP16(op) \
+  __m128i __T1 = _mm256_extracti128_si256 (__V, 0); \
+  __m128i __T2 = _mm256_extracti128_si256 (__V, 1); \
+  __m128i __T3 = _mm_##op (__T1, __T2); \
+  __m128i __T4 = (__m128i)__builtin_shufflevector ((__v8hi)__T3, \
+		  (__v8hi)__T3, 4, 5, 6, 7, 4, 5, 6, 7); \
+  __m128i __T5 = _mm_##op (__T3, __T4); \
+  __m128i __T6 = (__m128i)__builtin_shufflevector ((__v8hi)__T5, \
+		  (__v8hi)__T5, 2, 3, 2, 3, 4, 5, 6, 7); \
+  __m128i __T7 = _mm_##op (__T5, __T6); \
+  __m128i __T8 = (__m128i)__builtin_shufflevector ((__v8hi)__T7, \
+		  (__v8hi)__T7, 1, 1, 2, 3, 4, 5, 6, 7); \
+  __v8hi __T9 = (__v8hi)_mm_##op (__T7, __T8); \
+  return __T9[0]
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_max_epi16 (__m256i __V)
+{
+  _MM256_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epi16);
+}
+
+extern __inline unsigned short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_max_epu16 (__m256i __V)
+{
+  _MM256_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epu16);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_min_epi16 (__m256i __V)
+{
+  _MM256_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epi16);
+}
+
+extern __inline unsigned short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_min_epu16 (__m256i __V)
+{
+  _MM256_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epu16);
+}
+
+#define _MM_REDUCE_OPERATOR_BASIC_EPI8(op) \
+  __v16qi __T1 = (__v16qi)__W; \
+  __v16qi __T2 = __builtin_shufflevector (__T1, __T1, \
+		  8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15); \
+  __v16qi __T3 = __T1 op __T2; \
+  __v16qi __T4 = __builtin_shufflevector (__T3, __T3, \
+		  4, 5, 6, 7, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
+  __v16qi __T5 = __T3 op __T4; \
+  __v16qi __T6 = __builtin_shufflevector (__T5, __T5, \
+		  2, 3, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
+  __v16qi __T7 = __T5 op __T6; \
+  __v16qi __T8 = __builtin_shufflevector (__T7, __T7, \
+		  1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
+  __v16qi __T9 = __T7 op __T8; \
+  return __T9[0]
+
+extern __inline char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_add_epi8 (__m128i __W)
+{
+  _MM_REDUCE_OPERATOR_BASIC_EPI8 (+);
+}
+
+extern __inline char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_mul_epi8 (__m128i __W)
+{
+  _MM_REDUCE_OPERATOR_BASIC_EPI8 (*);
+}
+
+extern __inline char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_and_epi8 (__m128i __W)
+{
+  _MM_REDUCE_OPERATOR_BASIC_EPI8 (&);
+}
+
+extern __inline char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_or_epi8 (__m128i __W)
+{
+  _MM_REDUCE_OPERATOR_BASIC_EPI8 (|);
+}
+
+#define _MM_REDUCE_OPERATOR_MAX_MIN_EP8(op) \
+  __m128i __T1 = (__m128i)__builtin_shufflevector ((__v16qi)__V, (__v16qi)__V, \
+		  8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15); \
+  __m128i __T2 = _mm_##op (__V, __T1); \
+  __m128i __T3 = (__m128i)__builtin_shufflevector ((__v16qi)__T2, \
+		  (__v16qi)__T2, \
+		  4, 5, 6, 7, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
+  __m128i __T4 = _mm_##op (__T2, __T3); \
+  __m128i __T5 = (__m128i)__builtin_shufflevector ((__v16qi)__T4, \
+		  (__v16qi)__T4, \
+		  2, 3, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
+  __m128i __T6 = _mm_##op (__T4, __T5); \
+  __m128i __T7 = (__m128i)__builtin_shufflevector ((__v16qi)__T6, \
+		  (__v16qi)__T6, \
+		  1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
+  __v16qi __T8 = (__v16qi)_mm_##op (__T6, __T7); \
+  return __T8[0]
+
+extern __inline signed char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_max_epi8 (__m128i __V)
+{
+  _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epi8);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_max_epu8 (__m128i __V)
+{
+  _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epu8);
+}
+
+extern __inline signed char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_min_epi8 (__m128i __V)
+{
+  _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epi8);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_min_epu8 (__m128i __V)
+{
+  _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epu8);
+}
+
+#define _MM256_REDUCE_OPERATOR_BASIC_EPI8(op) \
+  __v16qi __T1 = (__v16qi)_mm256_extracti128_si256 (__W, 0); \
+  __v16qi __T2 = (__v16qi)_mm256_extracti128_si256 (__W, 1); \
+  __v16qi __T3 = __T1 op __T2; \
+  __v16qi __T4 = __builtin_shufflevector (__T3, __T3, \
+		  8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15); \
+  __v16qi __T5 = __T3 op __T4; \
+  __v16qi __T6 = __builtin_shufflevector (__T5, __T5, \
+		  4, 5, 6, 7, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
+  __v16qi __T7 = __T5 op __T6; \
+  __v16qi __T8 = __builtin_shufflevector (__T7, __T7, \
+		  2, 3, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
+  __v16qi __T9 = __T7 op __T8; \
+  __v16qi __T10 = __builtin_shufflevector (__T9, __T9, \
+		  1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
+  __v16qi __T11 = __T9 op __T10; \
+  return __T11[0]
+
+extern __inline char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_add_epi8 (__m256i __W)
+{
+  _MM256_REDUCE_OPERATOR_BASIC_EPI8 (+);
+}
+
+extern __inline char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_mul_epi8 (__m256i __W)
+{
+  _MM256_REDUCE_OPERATOR_BASIC_EPI8 (*);
+}
+
+extern __inline char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_and_epi8 (__m256i __W)
+{
+  _MM256_REDUCE_OPERATOR_BASIC_EPI8 (&);
+}
+
+extern __inline char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_or_epi8 (__m256i __W)
+{
+  _MM256_REDUCE_OPERATOR_BASIC_EPI8 (|);
+}
+
+#define _MM256_REDUCE_OPERATOR_MAX_MIN_EP8(op) \
+  __m128i __T1 = _mm256_extracti128_si256 (__V, 0); \
+  __m128i __T2 = _mm256_extracti128_si256 (__V, 1); \
+  __m128i __T3 = _mm_##op (__T1, __T2); \
+  __m128i __T4 = (__m128i)__builtin_shufflevector ((__v16qi)__T3, \
+		  (__v16qi)__T3, \
+		  8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15); \
+  __m128i __T5 = _mm_##op (__T3, __T4); \
+  __m128i __T6 = (__m128i)__builtin_shufflevector ((__v16qi)__T5, \
+		  (__v16qi)__T5, \
+		  4, 5, 6, 7, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
+  __m128i __T7 = _mm_##op (__T5, __T6); \
+  __m128i __T8 = (__m128i)__builtin_shufflevector ((__v16qi)__T7, \
+		  (__v16qi)__T5, \
+		  2, 3, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
+  __m128i __T9 = _mm_##op (__T7, __T8); \
+  __m128i __T10 = (__m128i)__builtin_shufflevector ((__v16qi)__T9, \
+		  (__v16qi)__T9, \
+		  1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
+  __v16qi __T11 = (__v16qi)_mm_##op (__T9, __T10); \
+  return __T11[0]
+
+extern __inline signed char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_max_epi8 (__m256i __V)
+{
+  _MM256_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epi8);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_max_epu8 (__m256i __V)
+{
+  _MM256_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epu8);
+}
+
+extern __inline signed char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_min_epi8 (__m256i __V)
+{
+  _MM256_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epi8);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_min_epu8 (__m256i __V)
+{
+  _MM256_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epu8);
+}
+
 #ifdef __DISABLE_AVX2__
 #undef __DISABLE_AVX2__
 #pragma GCC pop_options
diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h
index 9d2aba2a8ff..3654cf214c4 100644
--- a/gcc/config/i386/avx512vlbwintrin.h
+++ b/gcc/config/i386/avx512vlbwintrin.h
@@ -4730,6 +4730,262 @@ _mm256_mask_cmple_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 						  (__mmask16) __M);
 }
 
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_add_epi16 (__mmask8 __M, __m128i __W)
+{
+  __W = _mm_maskz_mov_epi16 (__M, __W);
+  _MM_REDUCE_OPERATOR_BASIC_EPI16 (+);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_mul_epi16 (__mmask8 __M, __m128i __W)
+{
+  __W = _mm_mask_mov_epi16 (_mm_set1_epi16 (1), __M, __W);
+  _MM_REDUCE_OPERATOR_BASIC_EPI16 (*);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_and_epi16 (__mmask8 __M, __m128i __W)
+{
+  __W = _mm_mask_mov_epi16 (_mm_set1_epi16 (-1), __M, __W);
+  _MM_REDUCE_OPERATOR_BASIC_EPI16 (&);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_or_epi16 (__mmask8 __M, __m128i __W)
+{
+  __W = _mm_maskz_mov_epi16 (__M, __W);
+  _MM_REDUCE_OPERATOR_BASIC_EPI16 (|);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_max_epi16 (__mmask16 __M, __m128i __V)
+{
+  __V = _mm_mask_mov_epi16 (_mm_set1_epi16 (-32767-1), __M, __V);
+  _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epi16);
+}
+
+extern __inline unsigned short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_max_epu16 (__mmask16 __M, __m128i __V)
+{
+  __V = _mm_maskz_mov_epi16 (__M, __V);
+  _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epu16);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_min_epi16 (__mmask16 __M, __m128i __V)
+{
+  __V = _mm_mask_mov_epi16 (_mm_set1_epi16 (32767), __M, __V);
+  _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epi16);
+}
+
+extern __inline unsigned short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_min_epu16 (__mmask16 __M, __m128i __V)
+{
+  __V = _mm_mask_mov_epi16 (_mm_set1_epi16 (-1), __M, __V);
+  _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epu16);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_add_epi16 (__mmask16 __M, __m256i __W)
+{
+  __W = _mm256_maskz_mov_epi16 (__M, __W);
+  _MM256_REDUCE_OPERATOR_BASIC_EPI16 (+);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_mul_epi16 (__mmask16 __M, __m256i __W)
+{
+  __W = _mm256_mask_mov_epi16 (_mm256_set1_epi16 (1), __M, __W);
+  _MM256_REDUCE_OPERATOR_BASIC_EPI16 (*);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_and_epi16 (__mmask16 __M, __m256i __W)
+{
+  __W = _mm256_mask_mov_epi16 (_mm256_set1_epi16 (-1), __M, __W);
+  _MM256_REDUCE_OPERATOR_BASIC_EPI16 (&);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_or_epi16 (__mmask16 __M, __m256i __W)
+{
+  __W = _mm256_maskz_mov_epi16 (__M, __W);
+  _MM256_REDUCE_OPERATOR_BASIC_EPI16 (|);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_max_epi16 (__mmask16 __M, __m256i __V)
+{
+  __V = _mm256_mask_mov_epi16 (_mm256_set1_epi16 (-32767-1), __M, __V);
+  _MM256_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epi16);
+}
+
+extern __inline unsigned short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_max_epu16 (__mmask16 __M, __m256i __V)
+{
+  __V = _mm256_maskz_mov_epi16 (__M, __V);
+  _MM256_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epu16);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_min_epi16 (__mmask16 __M, __m256i __V)
+{
+  __V = _mm256_mask_mov_epi16 (_mm256_set1_epi16 (32767), __M, __V);
+  _MM256_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epi16);
+}
+
+extern __inline unsigned short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_min_epu16 (__mmask16 __M, __m256i __V)
+{
+  __V = _mm256_mask_mov_epi16 (_mm256_set1_epi16 (-1), __M, __V);
+  _MM256_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epu16);
+}
+
+extern __inline char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_add_epi8 (__mmask16 __M, __m128i __W)
+{
+  __W = _mm_maskz_mov_epi8 (__M, __W);
+  _MM_REDUCE_OPERATOR_BASIC_EPI8 (+);
+}
+
+extern __inline char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_mul_epi8 (__mmask16 __M, __m128i __W)
+{
+  __W = _mm_mask_mov_epi8 (_mm_set1_epi8 (1), __M, __W);
+  _MM_REDUCE_OPERATOR_BASIC_EPI8 (*);
+}
+
+extern __inline char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_and_epi8 (__mmask16 __M, __m128i __W)
+{
+  __W = _mm_mask_mov_epi8 (_mm_set1_epi8 (-1), __M, __W);
+  _MM_REDUCE_OPERATOR_BASIC_EPI8 (&);
+}
+
+extern __inline char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_or_epi8 (__mmask16 __M, __m128i __W)
+{
+  __W = _mm_maskz_mov_epi8 (__M, __W);
+  _MM_REDUCE_OPERATOR_BASIC_EPI8 (|);
+}
+
+extern __inline signed char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_max_epi8 (__mmask16 __M, __m128i __V)
+{
+  __V = _mm_mask_mov_epi8 (_mm_set1_epi8 (-127-1), __M, __V);
+  _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epi8);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_max_epu8 (__mmask16 __M, __m128i __V)
+{
+  __V = _mm_maskz_mov_epi8 (__M, __V);
+  _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epu8);
+}
+
+extern __inline signed char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_min_epi8 (__mmask16 __M, __m128i __V)
+{
+  __V = _mm_mask_mov_epi8 (_mm_set1_epi8 (127), __M, __V);
+  _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epi8);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_min_epu8 (__mmask16 __M, __m128i __V)
+{
+  __V = _mm_mask_mov_epi8 (_mm_set1_epi8 (-1), __M, __V);
+  _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epu8);
+}
+
+extern __inline char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_add_epi8 (__mmask32 __M, __m256i __W)
+{
+  __W = _mm256_maskz_mov_epi8 (__M, __W);
+  _MM256_REDUCE_OPERATOR_BASIC_EPI8 (+);
+}
+
+extern __inline char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_mul_epi8 (__mmask32 __M, __m256i __W)
+{
+  __W = _mm256_mask_mov_epi8 (_mm256_set1_epi8 (1), __M, __W);
+  _MM256_REDUCE_OPERATOR_BASIC_EPI8 (*);
+}
+
+extern __inline char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_and_epi8 (__mmask32 __M, __m256i __W)
+{
+  __W = _mm256_mask_mov_epi8 (_mm256_set1_epi8 (-1), __M, __W);
+  _MM256_REDUCE_OPERATOR_BASIC_EPI8 (&);
+}
+
+extern __inline char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_or_epi8 (__mmask32 __M, __m256i __W)
+{
+  __W = _mm256_maskz_mov_epi8 (__M, __W);
+  _MM256_REDUCE_OPERATOR_BASIC_EPI8 (|);
+}
+
+extern __inline signed char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_max_epi8 (__mmask32 __M, __m256i __V)
+{
+  __V = _mm256_mask_mov_epi8 (_mm256_set1_epi8 (-127-1), __M, __V);
+  _MM256_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epi8);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_max_epu8 (__mmask32 __M, __m256i __V)
+{
+  __V = _mm256_maskz_mov_epi8 (__M, __V);
+  _MM256_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epu8);
+}
+
+extern __inline signed char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_min_epi8 (__mmask32 __M, __m256i __V)
+{
+  __V = _mm256_mask_mov_epi8 (_mm256_set1_epi8 (127), __M, __V);
+  _MM256_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epi8);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_min_epu8 (__mmask32 __M, __m256i __V)
+{
+  __V = _mm256_mask_mov_epi8 (_mm256_set1_epi8 (-1), __M, __V);
+  _MM256_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epu8);
+}
+
 #ifdef __DISABLE_AVX512VLBW__
 #undef __DISABLE_AVX512VLBW__
 #pragma GCC pop_options
diff --git a/gcc/testsuite/gcc.target/i386/avx512vlbw-reduce-op-1.c b/gcc/testsuite/gcc.target/i386/avx512vlbw-reduce-op-1.c
new file mode 100644
index 00000000000..146ef6bf8da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vlbw-reduce-op-1.c
@@ -0,0 +1,206 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512BW
+#define AVX512VL
+
+#include "avx512f-helper.h"
+
+#define FUNC_TEST_REDUCE_BASIC(opname) \
+  FUNC_TEST_REDUCE_OP (, short, epi16, opname, __m128i, __mmask8) \
+  FUNC_TEST_REDUCE_OP (256, short, epi16, opname, __m256i,  __mmask16) \
+  FUNC_TEST_REDUCE_OP (, char, epi8, opname, __m128i, __mmask16) \
+  FUNC_TEST_REDUCE_OP (256, char, epi8, opname, __m256i,  __mmask32)
+
+#define FUNC_TEST_REDUCE_MAX_MIN(opname) \
+  FUNC_TEST_REDUCE_OP (, short, epi16, opname, __m128i, __mmask8) \
+  FUNC_TEST_REDUCE_OP (256, short, epi16, opname, __m256i,  __mmask16) \
+  FUNC_TEST_REDUCE_OP (, char, epi8, opname, __m128i, __mmask16) \
+  FUNC_TEST_REDUCE_OP (256, char, epi8, opname, __m256i,  __mmask32) \
+  FUNC_TEST_REDUCE_OP (, unsigned short, epu16, opname, __m128i, __mmask8) \
+  FUNC_TEST_REDUCE_OP (256, unsigned short, epu16, \
+		       opname, __m256i,  __mmask16) \
+  FUNC_TEST_REDUCE_OP (, unsigned char, epu8, opname, __m128i, __mmask16) \
+  FUNC_TEST_REDUCE_OP (256, unsigned char, epu8, opname, __m256i,  __mmask32)
+
+#define FUNC_TEST_REDUCE_OP(len, rtype, type, opname, argtype, masktype) \
+  __attribute__((noinline, noclone)) rtype \
+  test_##len##_reduce_##opname##_##type (argtype a) \
+  { \
+    return _mm##len##_reduce_##opname##_##type (a); \
+  } \
+  __attribute__((noinline, noclone)) rtype \
+  test_##len##_mask_reduce_##opname##_##type (masktype u, argtype a) \
+  { \
+    return _mm##len##_mask_reduce_##opname##_##type (u, a); \
+  }
+
+FUNC_TEST_REDUCE_BASIC (add)
+FUNC_TEST_REDUCE_BASIC (mul)
+FUNC_TEST_REDUCE_BASIC (and)
+FUNC_TEST_REDUCE_BASIC (or)
+FUNC_TEST_REDUCE_MAX_MIN (max)
+FUNC_TEST_REDUCE_MAX_MIN (min)
+
+#define TESTOP(len, opname, op, type, suffix, neutral) \
+  do {									\
+    type r1 = _mm##len##_reduce_##opname##_##suffix (v.x);			\
+    type r2 = test_##len##_reduce_##opname##_##suffix (v.x);			\
+    type r3 = neutral;							\
+    if (r1 != r2)							\
+      __builtin_abort ();						\
+    for (int i = 0; i < SIZE; i++)					\
+      r3 = r3 op v.a[i];						\
+    if (r1 != r3)							\
+      __builtin_abort ();						\
+    type r4 = _mm##len##_mask_reduce_##opname##_##suffix (MASK_VALUE, v.x);	\
+    type r5 = test_##len##_mask_reduce_##opname##_##suffix (MASK_VALUE, v.x);	\
+    if (r4 != r5)							\
+      __builtin_abort ();						\
+    r3 = neutral;							\
+    for (int i = 0; i < SIZE; i++)					\
+      if (MASK_VALUE & (1 << i))					\
+	r3 = r3 op v.a[i];						\
+    if (r4 != r3)							\
+      __builtin_abort ();						\
+    type r6 = _mm##len##_mask_reduce_##opname##_##suffix (0, v.x);		\
+    type r7 = test_##len##_mask_reduce_##opname##_##suffix (0, v.x);		\
+    if (r6 != r7 || r6 != neutral)					\
+      __builtin_abort ();						\
+  } while (0)
+
+#undef AVX512F_LEN
+#define AVX512F_LEN 128
+
+#undef SIZE
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+#define TEST_128_EPI8(c1, c2, c3, c4, c5, c6, c7, c8, \
+		   c9, c10, c11, c12, c13, c14, c15, c16)		\
+  do {									\
+    UNION_TYPE (AVX512F_LEN, i_b) v;					\
+    v.x = _mm_set_epi8 (c1, c2, c3, c4, c5, c6, c7, c8,		\
+			    c9, c10, c11, c12, c13, c14, c15, c16);	\
+    TESTOP (, add, +, char, epi8, 0);					\
+    TESTOP (, mul, *, char, epi8, 1);					\
+    TESTOP (, and, &, char, epi8, (char) ~0);					\
+    TESTOP (, or, |, char, epi8, 0);					\
+    TESTOP (, min, < v.a[i] ? r3 :, char, epi8, __SCHAR_MAX__);		\
+    TESTOP (, max, > v.a[i] ? r3 :, char, epi8, -__SCHAR_MAX__ - 1);	\
+    TESTOP (, min, < (unsigned char) v.a[i] ? r3 :, unsigned char, epu8, (unsigned char) ~0U);	\
+    TESTOP (, max, > (unsigned char) v.a[i] ? r3 :, unsigned char, epu8, 0); \
+  } while (0)
+
+static void
+test_128_epi8 (void)
+{
+  TEST_128_EPI8 (1, 2, 3, 4, 5, 6, 6, 5, 4, 3, 2, 1, 7, 6, 5, 4);
+  TEST_128_EPI8 (-1, 15, -1, 7, -1, 7, -1, -1, 6, 6, -1, -1, -1, -1, 7, 6);
+}
+
+#undef SIZE
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+#define TEST_128_EPI16(c1, c2, c3, c4, c5, c6, c7, c8) \
+  do {									\
+    UNION_TYPE (AVX512F_LEN, i_w) v;					\
+    v.x = _mm_set_epi16 (c1, c2, c3, c4, c5, c6, c7, c8);		\
+    TESTOP (, add, +, short, epi16, 0);					\
+    TESTOP (, mul, *, short, epi16, 1);					\
+    TESTOP (, and, &, short, epi16, (short) ~0);			\
+    TESTOP (, or, |, short, epi16, 0);					\
+    TESTOP (, min, < v.a[i] ? r3 :, short, epi16, __SHRT_MAX__);	\
+    TESTOP (, max, > v.a[i] ? r3 :, short, epi16, -__SHRT_MAX__ - 1);	\
+    TESTOP (, min, < (unsigned short) v.a[i] ? r3 :, unsigned short, epu16,(unsigned short) ~0U);	\
+    TESTOP (, max, > (unsigned short) v.a[i] ? r3 :, unsigned short, epu16, 0);	\
+  } while (0)
+
+static void
+test_128_epi16 (void)
+{
+  TEST_128_EPI16 (1, 2, 3, 4, 5, 6, 6, 5);
+  TEST_128_EPI16 (-1, 15, -1, 7, -1, 7, -1, -1);
+}
+
+void
+test_128 (void)
+{
+  test_128_epi8 ();
+  test_128_epi16 ();
+}
+
+#undef AVX512F_LEN
+#define AVX512F_LEN 256
+
+#undef SIZE
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+#define TEST_256_EPI8(c1, c2, c3, c4, c5, c6, c7, c8, \
+		   c9, c10, c11, c12, c13, c14, c15, c16,		\
+		   c17, c18, c19, c20, c21, c22, c23, c24,		\
+		   c25, c26, c27, c28, c29, c30, c31, c32)		\
+  do {									\
+    UNION_TYPE (AVX512F_LEN, i_b) v;					\
+    v.x = _mm256_set_epi8 (c1, c2, c3, c4, c5, c6, c7, c8,		\
+			    c9, c10, c11, c12, c13, c14, c15, c16,	\
+			    c17, c18, c19, c20, c21, c22, c23, c24,	\
+			    c25, c26, c27, c28, c29, c30, c31, c32);	\
+    TESTOP (256, add, +, char, epi8, 0);				\
+    TESTOP (256, mul, *, char, epi8, 1);				\
+    TESTOP (256, and, &, char, epi8, (char) ~0);			\
+    TESTOP (256, or, |, char, epi8, 0);					\
+    TESTOP (256, min, < v.a[i] ? r3 :, char, epi8, __SCHAR_MAX__);	\
+    TESTOP (256, max, > v.a[i] ? r3 :, char, epi8, -__SCHAR_MAX__ - 1);	\
+    TESTOP (256, min, < (unsigned char) v.a[i] ? r3 :,			\
+	    unsigned char, epu8, (unsigned char)~0U);			\
+    TESTOP (256, max, > (unsigned char) v.a[i] ? r3 :,			\
+	    unsigned char, epu8, 0);	\
+  } while (0)
+
+static void
+test_256_epi8 (void)
+{
+  TEST_256_EPI8 (1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 12, 11, 10, 9, 9, 7, 6, 5, 4, 3, 2, 1, 7, 6, 5, 4, 7, 10, 11, 12);
+  TEST_256_EPI8 (-1, 15, -1, 7, -1, 7, -1, -1, 6, 6, -1, -1, -1, -1, 7, 6, -1, 30, -1, 28, -1, 26, -1, 24, -1, 22, -1, -1, -1, -1, 17, 16);
+}
+
+#undef SIZE
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+#define TEST_256_EPI16(c1, c2, c3, c4, c5, c6, c7, c8, \
+		   c9, c10, c11, c12, c13, c14, c15, c16)		\
+  do {									\
+    UNION_TYPE (AVX512F_LEN, i_w) v;					\
+    v.x = _mm256_set_epi16 (c1, c2, c3, c4, c5, c6, c7, c8,		\
+			    c9, c10, c11, c12, c13, c14, c15, c16);	\
+    TESTOP (256, add, +, short, epi16, 0);				\
+    TESTOP (256, mul, *, short, epi16, 1);				\
+    TESTOP (256, and, &, short, epi16, (short) ~0);			\
+    TESTOP (256, or, |, short, epi16, 0);				\
+    TESTOP (256, min, < v.a[i] ? r3 :, short, epi16, __SHRT_MAX__);	\
+    TESTOP (256, max, > v.a[i] ? r3 :, short, epi16, -__SHRT_MAX__ - 1);\
+    TESTOP (256, min, < (unsigned short) v.a[i] ? r3 :,			\
+	    unsigned short, epu16, (unsigned short) ~0U);		\
+    TESTOP (256, max, > (unsigned short) v.a[i] ? r3 :,			\
+	    unsigned short, epu16, 0);					\
+  } while (0)
+
+static void
+test_256_epi16 (void)
+{
+  TEST_256_EPI16 (9, 7, 6, 5, 4, 3, 2, 1, 7, 6, 5, 4, 7, 10, 11, 12);
+  TEST_256_EPI16 (-1, 15, -1, 7, -1, 7, -1, -1, 6, 6, -1, -1, -1, -1, 7, 6);
+}
+
+void
+test_256 (void)
+{
+  test_256_epi8 ();
+  test_256_epi16 ();
+}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-04-20  1:33 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-20  1:33 [gcc r14-103] Add reduce_*_ep[i|u][8|16] series intrinsics Haochen Jiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).