* [PATCH] AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339]
@ 2022-04-22 8:10 Hongyu Wang
2022-04-22 8:49 ` Hongtao Liu
0 siblings, 1 reply; 4+ messages in thread
From: Hongyu Wang @ 2022-04-22 8:10 UTC (permalink / raw)
To: hongtao.liu; +Cc: gcc-patches
Hi,
Add missing macro under O0 and adjust macro format for scalf
intrinsics.
Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for master and backport to GCC 9/10/11?
gcc/ChangeLog:
PR target/105339
* config/i386/avx512fintrin.h (_mm512_scalef_round_pd):
Add parentheses for parameters and djust format.
(_mm512_mask_scalef_round_pd): Ditto.
(_mm512_maskz_scalef_round_pd): Ditto.
(_mm512_scalef_round_ps): Ditto.
(_mm512_mask_scalef_round_ps): Ditto.
(_mm512_maskz_scalef_round_ps): Ditto.
(_mm_scalef_round_sd): Use _mm_undefined_pd.
(_mm_scalef_round_ss): Use _mm_undefined_ps.
(_mm_mask_scalef_round_sd): New macro.
(_mm_mask_scalef_round_ss): Ditto.
(_mm_maskz_scalef_round_sd): Ditto.
(_mm_maskz_scalef_round_ss): Ditto.
---
gcc/config/i386/avx512fintrin.h | 76 ++++++++++++++++++++++++---------
1 file changed, 56 insertions(+), 20 deletions(-)
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 29511fd2831..6dc69ff0234 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -3286,31 +3286,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
(__mmask8) __U, __R);
}
#else
-#define _mm512_scalef_round_pd(A, B, C) \
- (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
-
-#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
- (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
-
-#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
- (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
+#define _mm512_scalef_round_pd(A, B, C) \
+ ((__m512d) \
+ __builtin_ia32_scalefpd512_mask((A), (B), \
+ (__v8df) _mm512_undefined_pd(), \
+ -1, (C)))
+
+#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
+ ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C)))
+
+#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
+ ((__m512d) \
+ __builtin_ia32_scalefpd512_mask((A), (B), \
+ (__v8df) _mm512_setzero_pd(), \
+ (U), (C)))
+
+#define _mm512_scalef_round_ps(A, B, C) \
+ ((__m512) \
+ __builtin_ia32_scalefps512_mask((A), (B), \
+ (__v16sf) _mm512_undefined_ps(), \
+ -1, (C)))
+
+#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
+ ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C)))
+
+#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
+ ((__m512) \
+ __builtin_ia32_scalefps512_mask((A), (B), \
+ (__v16sf) _mm512_setzero_ps(), \
+ (U), (C)))
+
+#define _mm_scalef_round_sd(A, B, C) \
+ ((__m128d) \
+ __builtin_ia32_scalefsd_mask_round ((A), (B), \
+ (__v2df) _mm_undefined_pd (), \
+ -1, (C)))
-#define _mm512_scalef_round_ps(A, B, C) \
- (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
+#define _mm_scalef_round_ss(A, B, C) \
+ ((__m128) \
+ __builtin_ia32_scalefss_mask_round ((A), (B), \
+ (__v4sf) _mm_undefined_ps (), \
+ -1, (C)))
-#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
- (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
+#define _mm_mask_scalef_round_sd(W, U, A, B, C) \
+ ((__m128d) \
+ __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
-#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
- (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+#define _mm_mask_scalef_round_ss(W, U, A, B, C) \
+ ((__m128) \
+ __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
-#define _mm_scalef_round_sd(A, B, C) \
- (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
- (__v2df)_mm_setzero_pd (), -1, C)
+#define _mm_maskz_scalef_round_sd(U, A, B, C) \
+ ((__m128d) \
+ __builtin_ia32_scalefsd_mask_round ((A), (B), \
+ (__v2df) _mm_setzero_pd (), \
+ (U), (C)))
-#define _mm_scalef_round_ss(A, B, C) \
- (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
- (__v4sf)_mm_setzero_ps (), -1, C)
+#define _mm_maskz_scalef_round_ss(U, A, B, C) \
+ ((__m128) \
+ __builtin_ia32_scalefss_mask_round ((A), (B), \
+ (__v4sf) _mm_setzero_ps (), \
+ (W), (U), (C)))
#endif
#define _mm_mask_scalef_sd(W, U, A, B) \
--
2.18.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339]
2022-04-22 8:10 [PATCH] AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339] Hongyu Wang
@ 2022-04-22 8:49 ` Hongtao Liu
2022-04-22 12:38 ` Hongyu Wang
0 siblings, 1 reply; 4+ messages in thread
From: Hongtao Liu @ 2022-04-22 8:49 UTC (permalink / raw)
To: Hongyu Wang; +Cc: Liu, Hongtao, GCC Patches
On Fri, Apr 22, 2022 at 4:12 PM Hongyu Wang via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
>
> Add missing macro under O0 and adjust macro format for scalf
> intrinsics.
>
Please add the corresponding intrinsic test in sse-14.c.
> Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,}.
>
> Ok for master and backport to GCC 9/10/11?
>
> gcc/ChangeLog:
>
> PR target/105339
> * config/i386/avx512fintrin.h (_mm512_scalef_round_pd):
> Add parentheses for parameters and djust format.
> (_mm512_mask_scalef_round_pd): Ditto.
> (_mm512_maskz_scalef_round_pd): Ditto.
> (_mm512_scalef_round_ps): Ditto.
> (_mm512_mask_scalef_round_ps): Ditto.
> (_mm512_maskz_scalef_round_ps): Ditto.
> (_mm_scalef_round_sd): Use _mm_undefined_pd.
> (_mm_scalef_round_ss): Use _mm_undefined_ps.
> (_mm_mask_scalef_round_sd): New macro.
> (_mm_mask_scalef_round_ss): Ditto.
> (_mm_maskz_scalef_round_sd): Ditto.
> (_mm_maskz_scalef_round_ss): Ditto.
> ---
> gcc/config/i386/avx512fintrin.h | 76 ++++++++++++++++++++++++---------
> 1 file changed, 56 insertions(+), 20 deletions(-)
>
> diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
> index 29511fd2831..6dc69ff0234 100644
> --- a/gcc/config/i386/avx512fintrin.h
> +++ b/gcc/config/i386/avx512fintrin.h
> @@ -3286,31 +3286,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
> (__mmask8) __U, __R);
> }
> #else
> -#define _mm512_scalef_round_pd(A, B, C) \
> - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
> -
> -#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
> - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
> -
> -#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
> - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
> +#define _mm512_scalef_round_pd(A, B, C) \
> + ((__m512d) \
> + __builtin_ia32_scalefpd512_mask((A), (B), \
> + (__v8df) _mm512_undefined_pd(), \
> + -1, (C)))
> +
> +#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
> + ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C)))
> +
> +#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
> + ((__m512d) \
> + __builtin_ia32_scalefpd512_mask((A), (B), \
> + (__v8df) _mm512_setzero_pd(), \
> + (U), (C)))
> +
> +#define _mm512_scalef_round_ps(A, B, C) \
> + ((__m512) \
> + __builtin_ia32_scalefps512_mask((A), (B), \
> + (__v16sf) _mm512_undefined_ps(), \
> + -1, (C)))
> +
> +#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
> + ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C)))
> +
> +#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
> + ((__m512) \
> + __builtin_ia32_scalefps512_mask((A), (B), \
> + (__v16sf) _mm512_setzero_ps(), \
> + (U), (C)))
> +
> +#define _mm_scalef_round_sd(A, B, C) \
> + ((__m128d) \
> + __builtin_ia32_scalefsd_mask_round ((A), (B), \
> + (__v2df) _mm_undefined_pd (), \
> + -1, (C)))
>
> -#define _mm512_scalef_round_ps(A, B, C) \
> - (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
> +#define _mm_scalef_round_ss(A, B, C) \
> + ((__m128) \
> + __builtin_ia32_scalefss_mask_round ((A), (B), \
> + (__v4sf) _mm_undefined_ps (), \
> + -1, (C)))
>
> -#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
> - (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
> +#define _mm_mask_scalef_round_sd(W, U, A, B, C) \
> + ((__m128d) \
> + __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
>
> -#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
> - (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
> +#define _mm_mask_scalef_round_ss(W, U, A, B, C) \
> + ((__m128) \
> + __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
>
> -#define _mm_scalef_round_sd(A, B, C) \
> - (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
> - (__v2df)_mm_setzero_pd (), -1, C)
> +#define _mm_maskz_scalef_round_sd(U, A, B, C) \
> + ((__m128d) \
> + __builtin_ia32_scalefsd_mask_round ((A), (B), \
> + (__v2df) _mm_setzero_pd (), \
> + (U), (C)))
>
> -#define _mm_scalef_round_ss(A, B, C) \
> - (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
> - (__v4sf)_mm_setzero_ps (), -1, C)
> +#define _mm_maskz_scalef_round_ss(U, A, B, C) \
> + ((__m128) \
> + __builtin_ia32_scalefss_mask_round ((A), (B), \
> + (__v4sf) _mm_setzero_ps (), \
> + (W), (U), (C)))
> #endif
>
> #define _mm_mask_scalef_sd(W, U, A, B) \
> --
> 2.18.1
>
--
BR,
Hongtao
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339]
2022-04-22 8:49 ` Hongtao Liu
@ 2022-04-22 12:38 ` Hongyu Wang
2022-04-24 2:35 ` Hongtao Liu
0 siblings, 1 reply; 4+ messages in thread
From: Hongyu Wang @ 2022-04-22 12:38 UTC (permalink / raw)
To: Hongtao Liu; +Cc: Hongyu Wang, Liu, Hongtao, GCC Patches
[-- Attachment #1: Type: text/plain, Size: 7068 bytes --]
> Please add the corresponding intrinsic test in sse-14.c
Sorry for forgetting this part. Updated patch. Thanks.
Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> 于2022年4月22日周五 16:49写道:
>
> On Fri, Apr 22, 2022 at 4:12 PM Hongyu Wang via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Hi,
> >
> > Add missing macro under O0 and adjust macro format for scalf
> > intrinsics.
> >
> Please add the corresponding intrinsic test in sse-14.c.
> > Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,}.
> >
> > Ok for master and backport to GCC 9/10/11?
> >
> > gcc/ChangeLog:
> >
> > PR target/105339
> > * config/i386/avx512fintrin.h (_mm512_scalef_round_pd):
> > Add parentheses for parameters and djust format.
> > (_mm512_mask_scalef_round_pd): Ditto.
> > (_mm512_maskz_scalef_round_pd): Ditto.
> > (_mm512_scalef_round_ps): Ditto.
> > (_mm512_mask_scalef_round_ps): Ditto.
> > (_mm512_maskz_scalef_round_ps): Ditto.
> > (_mm_scalef_round_sd): Use _mm_undefined_pd.
> > (_mm_scalef_round_ss): Use _mm_undefined_ps.
> > (_mm_mask_scalef_round_sd): New macro.
> > (_mm_mask_scalef_round_ss): Ditto.
> > (_mm_maskz_scalef_round_sd): Ditto.
> > (_mm_maskz_scalef_round_ss): Ditto.
> > ---
> > gcc/config/i386/avx512fintrin.h | 76 ++++++++++++++++++++++++---------
> > 1 file changed, 56 insertions(+), 20 deletions(-)
> >
> > diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
> > index 29511fd2831..6dc69ff0234 100644
> > --- a/gcc/config/i386/avx512fintrin.h
> > +++ b/gcc/config/i386/avx512fintrin.h
> > @@ -3286,31 +3286,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
> > (__mmask8) __U, __R);
> > }
> > #else
> > -#define _mm512_scalef_round_pd(A, B, C) \
> > - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
> > -
> > -#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
> > - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
> > -
> > -#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
> > - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
> > +#define _mm512_scalef_round_pd(A, B, C) \
> > + ((__m512d) \
> > + __builtin_ia32_scalefpd512_mask((A), (B), \
> > + (__v8df) _mm512_undefined_pd(), \
> > + -1, (C)))
> > +
> > +#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
> > + ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C)))
> > +
> > +#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
> > + ((__m512d) \
> > + __builtin_ia32_scalefpd512_mask((A), (B), \
> > + (__v8df) _mm512_setzero_pd(), \
> > + (U), (C)))
> > +
> > +#define _mm512_scalef_round_ps(A, B, C) \
> > + ((__m512) \
> > + __builtin_ia32_scalefps512_mask((A), (B), \
> > + (__v16sf) _mm512_undefined_ps(), \
> > + -1, (C)))
> > +
> > +#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
> > + ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C)))
> > +
> > +#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
> > + ((__m512) \
> > + __builtin_ia32_scalefps512_mask((A), (B), \
> > + (__v16sf) _mm512_setzero_ps(), \
> > + (U), (C)))
> > +
> > +#define _mm_scalef_round_sd(A, B, C) \
> > + ((__m128d) \
> > + __builtin_ia32_scalefsd_mask_round ((A), (B), \
> > + (__v2df) _mm_undefined_pd (), \
> > + -1, (C)))
> >
> > -#define _mm512_scalef_round_ps(A, B, C) \
> > - (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
> > +#define _mm_scalef_round_ss(A, B, C) \
> > + ((__m128) \
> > + __builtin_ia32_scalefss_mask_round ((A), (B), \
> > + (__v4sf) _mm_undefined_ps (), \
> > + -1, (C)))
> >
> > -#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
> > - (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
> > +#define _mm_mask_scalef_round_sd(W, U, A, B, C) \
> > + ((__m128d) \
> > + __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
> >
> > -#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
> > - (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
> > +#define _mm_mask_scalef_round_ss(W, U, A, B, C) \
> > + ((__m128) \
> > + __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
> >
> > -#define _mm_scalef_round_sd(A, B, C) \
> > - (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
> > - (__v2df)_mm_setzero_pd (), -1, C)
> > +#define _mm_maskz_scalef_round_sd(U, A, B, C) \
> > + ((__m128d) \
> > + __builtin_ia32_scalefsd_mask_round ((A), (B), \
> > + (__v2df) _mm_setzero_pd (), \
> > + (U), (C)))
> >
> > -#define _mm_scalef_round_ss(A, B, C) \
> > - (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
> > - (__v4sf)_mm_setzero_ps (), -1, C)
> > +#define _mm_maskz_scalef_round_ss(U, A, B, C) \
> > + ((__m128) \
> > + __builtin_ia32_scalefss_mask_round ((A), (B), \
> > + (__v4sf) _mm_setzero_ps (), \
> > + (W), (U), (C)))
> > #endif
> >
> > #define _mm_mask_scalef_sd(W, U, A, B) \
> > --
> > 2.18.1
> >
>
>
> --
> BR,
> Hongtao
[-- Attachment #2: 0001-AVX512F-Add-missing-macro-for-mask-z-_scalf_s-sd-PR-.patch --]
[-- Type: text/x-patch, Size: 6553 bytes --]
From 93be6b95b3237e1bff61001031ed6ad733c5de3f Mon Sep 17 00:00:00 2001
From: Hongyu Wang <hongyu.wang@intel.com>
Date: Fri, 22 Apr 2022 14:42:30 +0800
Subject: [PATCH] AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR
105339]
Add missing macro under O0 and adjust macro format for scalf
intrinsics.
gcc/ChangeLog:
PR target/105339
* config/i386/avx512fintrin.h (_mm512_scalef_round_pd):
Add parentheses for parameters and djust format.
(_mm512_mask_scalef_round_pd): Ditto.
(_mm512_maskz_scalef_round_pd): Ditto.
(_mm512_scalef_round_ps): Ditto.
(_mm512_mask_scalef_round_ps): Ditto.
(_mm512_maskz_scalef_round_ps): Ditto.
(_mm_scalef_round_sd): Use _mm_undefined_pd.
(_mm_scalef_round_ss): Use _mm_undefined_ps.
(_mm_mask_scalef_round_sd): New macro.
(_mm_mask_scalef_round_ss): Ditto.
(_mm_maskz_scalef_round_sd): Ditto.
(_mm_maskz_scalef_round_ss): Ditto.
gcc/testsuite/ChangeLog:
PR target/105339
* gcc.target/i386/sse-14.c: Add tests for new macro.
---
gcc/config/i386/avx512fintrin.h | 76 +++++++++++++++++++-------
gcc/testsuite/gcc.target/i386/sse-14.c | 4 ++
2 files changed, 60 insertions(+), 20 deletions(-)
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 29511fd2831..77d6249c2bc 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -3286,31 +3286,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
(__mmask8) __U, __R);
}
#else
-#define _mm512_scalef_round_pd(A, B, C) \
- (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
-
-#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
- (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
-
-#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
- (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
+#define _mm512_scalef_round_pd(A, B, C) \
+ ((__m512d) \
+ __builtin_ia32_scalefpd512_mask((A), (B), \
+ (__v8df) _mm512_undefined_pd(), \
+ -1, (C)))
+
+#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
+ ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C)))
+
+#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
+ ((__m512d) \
+ __builtin_ia32_scalefpd512_mask((A), (B), \
+ (__v8df) _mm512_setzero_pd(), \
+ (U), (C)))
+
+#define _mm512_scalef_round_ps(A, B, C) \
+ ((__m512) \
+ __builtin_ia32_scalefps512_mask((A), (B), \
+ (__v16sf) _mm512_undefined_ps(), \
+ -1, (C)))
+
+#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
+ ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C)))
+
+#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
+ ((__m512) \
+ __builtin_ia32_scalefps512_mask((A), (B), \
+ (__v16sf) _mm512_setzero_ps(), \
+ (U), (C)))
+
+#define _mm_scalef_round_sd(A, B, C) \
+ ((__m128d) \
+ __builtin_ia32_scalefsd_mask_round ((A), (B), \
+ (__v2df) _mm_undefined_pd (), \
+ -1, (C)))
-#define _mm512_scalef_round_ps(A, B, C) \
- (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
+#define _mm_scalef_round_ss(A, B, C) \
+ ((__m128) \
+ __builtin_ia32_scalefss_mask_round ((A), (B), \
+ (__v4sf) _mm_undefined_ps (), \
+ -1, (C)))
-#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
- (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
+#define _mm_mask_scalef_round_sd(W, U, A, B, C) \
+ ((__m128d) \
+ __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
-#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
- (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+#define _mm_mask_scalef_round_ss(W, U, A, B, C) \
+ ((__m128) \
+ __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
-#define _mm_scalef_round_sd(A, B, C) \
- (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
- (__v2df)_mm_setzero_pd (), -1, C)
+#define _mm_maskz_scalef_round_sd(U, A, B, C) \
+ ((__m128d) \
+ __builtin_ia32_scalefsd_mask_round ((A), (B), \
+ (__v2df) _mm_setzero_pd (), \
+ (U), (C)))
-#define _mm_scalef_round_ss(A, B, C) \
- (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
- (__v4sf)_mm_setzero_ps (), -1, C)
+#define _mm_maskz_scalef_round_ss(U, A, B, C) \
+ ((__m128) \
+ __builtin_ia32_scalefss_mask_round ((A), (B), \
+ (__v4sf) _mm_setzero_ps (), \
+ (U), (C)))
#endif
#define _mm_mask_scalef_sd(W, U, A, B) \
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 956a9d16f84..f41493b93f3 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -430,7 +430,9 @@ test_3 (_mm_maskz_mul_round_sd, __m128d, __mmask8, __m128d, __m128d, 9)
test_3 (_mm512_maskz_mul_round_ps, __m512, __mmask16, __m512, __m512, 9)
test_3 (_mm_maskz_mul_round_ss, __m128, __mmask8, __m128, __m128, 9)
test_3 (_mm512_maskz_scalef_round_pd, __m512d, __mmask8, __m512d, __m512d, 9)
+test_3 (_mm_maskz_scalef_round_sd, __m128d, __mmask8, __m128d, __m128d, 9)
test_3 (_mm512_maskz_scalef_round_ps, __m512, __mmask16, __m512, __m512, 9)
+test_3 (_mm_maskz_scalef_round_ss, __m128, __mmask8, __m128, __m128, 9)
test_3 (_mm512_maskz_shuffle_f32x4, __m512, __mmask16, __m512, __m512, 1)
test_3 (_mm512_maskz_shuffle_f64x2, __m512d, __mmask8, __m512d, __m512d, 1)
test_3 (_mm512_maskz_shuffle_i32x4, __m512i, __mmask16, __m512i, __m512i, 1)
@@ -545,7 +547,9 @@ test_4 (_mm_mask_mul_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9)
test_4 (_mm512_mask_mul_round_ps, __m512, __m512, __mmask16, __m512, __m512, 9)
test_4 (_mm_mask_mul_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9)
test_4 (_mm512_mask_scalef_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 9)
+test_4 (_mm_mask_scalef_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9)
test_4 (_mm512_mask_scalef_round_ps, __m512, __m512, __mmask16, __m512, __m512, 9)
+test_4 (_mm_mask_scalef_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9)
test_4 (_mm512_mask_shuffle_f32x4, __m512, __m512, __mmask16, __m512, __m512, 1)
test_4 (_mm512_mask_shuffle_f64x2, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
test_4 (_mm512_mask_shuffle_i32x4, __m512i, __m512i, __mmask16, __m512i, __m512i, 1)
--
2.18.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339]
2022-04-22 12:38 ` Hongyu Wang
@ 2022-04-24 2:35 ` Hongtao Liu
0 siblings, 0 replies; 4+ messages in thread
From: Hongtao Liu @ 2022-04-24 2:35 UTC (permalink / raw)
To: Hongyu Wang; +Cc: Hongyu Wang, Liu, Hongtao, GCC Patches
On Fri, Apr 22, 2022 at 8:43 PM Hongyu Wang <wwwhhhyyy333@gmail.com> wrote:
>
> > Please add the corresponding intrinsic test in sse-14.c
>
> Sorry for forgetting this part. Updated patch. Thanks.
>
LGTM.
> Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> 于2022年4月22日周五 16:49写道:
> >
> > On Fri, Apr 22, 2022 at 4:12 PM Hongyu Wang via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > Hi,
> > >
> > > Add missing macro under O0 and adjust macro format for scalf
> > > intrinsics.
> > >
> > Please add the corresponding intrinsic test in sse-14.c.
> > > Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,}.
> > >
> > > Ok for master and backport to GCC 9/10/11?
> > >
> > > gcc/ChangeLog:
> > >
> > > PR target/105339
> > > * config/i386/avx512fintrin.h (_mm512_scalef_round_pd):
> > > Add parentheses for parameters and djust format.
> > > (_mm512_mask_scalef_round_pd): Ditto.
> > > (_mm512_maskz_scalef_round_pd): Ditto.
> > > (_mm512_scalef_round_ps): Ditto.
> > > (_mm512_mask_scalef_round_ps): Ditto.
> > > (_mm512_maskz_scalef_round_ps): Ditto.
> > > (_mm_scalef_round_sd): Use _mm_undefined_pd.
> > > (_mm_scalef_round_ss): Use _mm_undefined_ps.
> > > (_mm_mask_scalef_round_sd): New macro.
> > > (_mm_mask_scalef_round_ss): Ditto.
> > > (_mm_maskz_scalef_round_sd): Ditto.
> > > (_mm_maskz_scalef_round_ss): Ditto.
> > > ---
> > > gcc/config/i386/avx512fintrin.h | 76 ++++++++++++++++++++++++---------
> > > 1 file changed, 56 insertions(+), 20 deletions(-)
> > >
> > > diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
> > > index 29511fd2831..6dc69ff0234 100644
> > > --- a/gcc/config/i386/avx512fintrin.h
> > > +++ b/gcc/config/i386/avx512fintrin.h
> > > @@ -3286,31 +3286,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
> > > (__mmask8) __U, __R);
> > > }
> > > #else
> > > -#define _mm512_scalef_round_pd(A, B, C) \
> > > - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
> > > -
> > > -#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
> > > - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
> > > -
> > > -#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
> > > - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
> > > +#define _mm512_scalef_round_pd(A, B, C) \
> > > + ((__m512d) \
> > > + __builtin_ia32_scalefpd512_mask((A), (B), \
> > > + (__v8df) _mm512_undefined_pd(), \
> > > + -1, (C)))
> > > +
> > > +#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
> > > + ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C)))
> > > +
> > > +#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
> > > + ((__m512d) \
> > > + __builtin_ia32_scalefpd512_mask((A), (B), \
> > > + (__v8df) _mm512_setzero_pd(), \
> > > + (U), (C)))
> > > +
> > > +#define _mm512_scalef_round_ps(A, B, C) \
> > > + ((__m512) \
> > > + __builtin_ia32_scalefps512_mask((A), (B), \
> > > + (__v16sf) _mm512_undefined_ps(), \
> > > + -1, (C)))
> > > +
> > > +#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
> > > + ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C)))
> > > +
> > > +#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
> > > + ((__m512) \
> > > + __builtin_ia32_scalefps512_mask((A), (B), \
> > > + (__v16sf) _mm512_setzero_ps(), \
> > > + (U), (C)))
> > > +
> > > +#define _mm_scalef_round_sd(A, B, C) \
> > > + ((__m128d) \
> > > + __builtin_ia32_scalefsd_mask_round ((A), (B), \
> > > + (__v2df) _mm_undefined_pd (), \
> > > + -1, (C)))
> > >
> > > -#define _mm512_scalef_round_ps(A, B, C) \
> > > - (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
> > > +#define _mm_scalef_round_ss(A, B, C) \
> > > + ((__m128) \
> > > + __builtin_ia32_scalefss_mask_round ((A), (B), \
> > > + (__v4sf) _mm_undefined_ps (), \
> > > + -1, (C)))
> > >
> > > -#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
> > > - (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
> > > +#define _mm_mask_scalef_round_sd(W, U, A, B, C) \
> > > + ((__m128d) \
> > > + __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
> > >
> > > -#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
> > > - (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
> > > +#define _mm_mask_scalef_round_ss(W, U, A, B, C) \
> > > + ((__m128) \
> > > + __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
> > >
> > > -#define _mm_scalef_round_sd(A, B, C) \
> > > - (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
> > > - (__v2df)_mm_setzero_pd (), -1, C)
> > > +#define _mm_maskz_scalef_round_sd(U, A, B, C) \
> > > + ((__m128d) \
> > > + __builtin_ia32_scalefsd_mask_round ((A), (B), \
> > > + (__v2df) _mm_setzero_pd (), \
> > > + (U), (C)))
> > >
> > > -#define _mm_scalef_round_ss(A, B, C) \
> > > - (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
> > > - (__v4sf)_mm_setzero_ps (), -1, C)
> > > +#define _mm_maskz_scalef_round_ss(U, A, B, C) \
> > > + ((__m128) \
> > > + __builtin_ia32_scalefss_mask_round ((A), (B), \
> > > + (__v4sf) _mm_setzero_ps (), \
> > > + (W), (U), (C)))
> > > #endif
> > >
> > > #define _mm_mask_scalef_sd(W, U, A, B) \
> > > --
> > > 2.18.1
> > >
> >
> >
> > --
> > BR,
> > Hongtao
--
BR,
Hongtao
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2022-04-24 2:35 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-22 8:10 [PATCH] AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339] Hongyu Wang
2022-04-22 8:49 ` Hongtao Liu
2022-04-22 12:38 ` Hongyu Wang
2022-04-24 2:35 ` Hongtao Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).