* [PATCH] rs6000: Add Power10 optimization for most _mm_movemask*
@ 2021-10-21 17:22 Paul A. Clarke
2021-11-08 17:42 ` [PING PATCH] " Paul A. Clarke
2021-11-19 18:15 ` [PATCH] " Segher Boessenkool
0 siblings, 2 replies; 4+ messages in thread
From: Paul A. Clarke @ 2021-10-21 17:22 UTC (permalink / raw)
To: segher, gcc-patches; +Cc: wschmidt
Power10 ISA added `vextract*` instructions which are realized in the
`vec_extractm` instrinsic.
Use `vec_extractm` for `_mm_movemask_ps`, `_mm_movemask_pd`, and
`_mm_movemask_epi8` compatibility intrinsics, when `_ARCH_PWR10`.
2021-10-21 Paul A. Clarke <pc@us.ibm.com>
gcc
* config/rs6000/xmmintrin.h (_mm_movemask_ps): Use vec_extractm
when _ARCH_PWR10.
* config/rs6000/emmintrin.h (_mm_movemask_pd): Likewise.
(_mm_movemask_epi8): Likewise.
---
Tested on Power10 powerpc64le-linux (compiled with and without
`-mcpu=power10`).
OK for trunk?
gcc/config/rs6000/emmintrin.h | 8 ++++++++
gcc/config/rs6000/xmmintrin.h | 4 ++++
2 files changed, 12 insertions(+)
diff --git a/gcc/config/rs6000/emmintrin.h b/gcc/config/rs6000/emmintrin.h
index 32ad72b4cc35..ab16c13c379e 100644
--- a/gcc/config/rs6000/emmintrin.h
+++ b/gcc/config/rs6000/emmintrin.h
@@ -1233,6 +1233,9 @@ _mm_loadl_pd (__m128d __A, double const *__B)
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_pd (__m128d __A)
{
+#ifdef _ARCH_PWR10
+ return vec_extractm ((__v2du) __A);
+#else
__vector unsigned long long result;
static const __vector unsigned int perm_mask =
{
@@ -1252,6 +1255,7 @@ _mm_movemask_pd (__m128d __A)
#else
return result[0];
#endif
+#endif /* !_ARCH_PWR10 */
}
#endif /* _ARCH_PWR8 */
@@ -2030,6 +2034,9 @@ _mm_min_epu8 (__m128i __A, __m128i __B)
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_epi8 (__m128i __A)
{
+#ifdef _ARCH_PWR10
+ return vec_extractm ((__v16qu) __A);
+#else
__vector unsigned long long result;
static const __vector unsigned char perm_mask =
{
@@ -2046,6 +2053,7 @@ _mm_movemask_epi8 (__m128i __A)
#else
return result[0];
#endif
+#endif /* !_ARCH_PWR10 */
}
#endif /* _ARCH_PWR8 */
diff --git a/gcc/config/rs6000/xmmintrin.h b/gcc/config/rs6000/xmmintrin.h
index ae1a33e8d95b..4c093fd1d5ae 100644
--- a/gcc/config/rs6000/xmmintrin.h
+++ b/gcc/config/rs6000/xmmintrin.h
@@ -1352,6 +1352,9 @@ _mm_storel_pi (__m64 *__P, __m128 __A)
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_ps (__m128 __A)
{
+#ifdef _ARCH_PWR10
+ return vec_extractm ((vector unsigned int) __A);
+#else
__vector unsigned long long result;
static const __vector unsigned int perm_mask =
{
@@ -1371,6 +1374,7 @@ _mm_movemask_ps (__m128 __A)
#else
return result[0];
#endif
+#endif /* !_ARCH_PWR10 */
}
#endif /* _ARCH_PWR8 */
--
2.27.0
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PING PATCH] rs6000: Add Power10 optimization for most _mm_movemask*
2021-10-21 17:22 [PATCH] rs6000: Add Power10 optimization for most _mm_movemask* Paul A. Clarke
@ 2021-11-08 17:42 ` Paul A. Clarke
2021-11-19 2:26 ` [PING^2 " Paul A. Clarke
2021-11-19 18:15 ` [PATCH] " Segher Boessenkool
1 sibling, 1 reply; 4+ messages in thread
From: Paul A. Clarke @ 2021-11-08 17:42 UTC (permalink / raw)
To: segher, gcc-patches, wschmidt
Gentle ping...
On Thu, Oct 21, 2021 at 12:22:12PM -0500, Paul A. Clarke via Gcc-patches wrote:
> Power10 ISA added `vextract*` instructions which are realized in the
> `vec_extractm` instrinsic.
>
> Use `vec_extractm` for `_mm_movemask_ps`, `_mm_movemask_pd`, and
> `_mm_movemask_epi8` compatibility intrinsics, when `_ARCH_PWR10`.
>
> 2021-10-21 Paul A. Clarke <pc@us.ibm.com>
>
> gcc
> * config/rs6000/xmmintrin.h (_mm_movemask_ps): Use vec_extractm
> when _ARCH_PWR10.
> * config/rs6000/emmintrin.h (_mm_movemask_pd): Likewise.
> (_mm_movemask_epi8): Likewise.
> ---
> Tested on Power10 powerpc64le-linux (compiled with and without
> `-mcpu=power10`).
>
> OK for trunk?
>
> gcc/config/rs6000/emmintrin.h | 8 ++++++++
> gcc/config/rs6000/xmmintrin.h | 4 ++++
> 2 files changed, 12 insertions(+)
>
> diff --git a/gcc/config/rs6000/emmintrin.h b/gcc/config/rs6000/emmintrin.h
> index 32ad72b4cc35..ab16c13c379e 100644
> --- a/gcc/config/rs6000/emmintrin.h
> +++ b/gcc/config/rs6000/emmintrin.h
> @@ -1233,6 +1233,9 @@ _mm_loadl_pd (__m128d __A, double const *__B)
> extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_movemask_pd (__m128d __A)
> {
> +#ifdef _ARCH_PWR10
> + return vec_extractm ((__v2du) __A);
> +#else
> __vector unsigned long long result;
> static const __vector unsigned int perm_mask =
> {
> @@ -1252,6 +1255,7 @@ _mm_movemask_pd (__m128d __A)
> #else
> return result[0];
> #endif
> +#endif /* !_ARCH_PWR10 */
> }
> #endif /* _ARCH_PWR8 */
>
> @@ -2030,6 +2034,9 @@ _mm_min_epu8 (__m128i __A, __m128i __B)
> extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_movemask_epi8 (__m128i __A)
> {
> +#ifdef _ARCH_PWR10
> + return vec_extractm ((__v16qu) __A);
> +#else
> __vector unsigned long long result;
> static const __vector unsigned char perm_mask =
> {
> @@ -2046,6 +2053,7 @@ _mm_movemask_epi8 (__m128i __A)
> #else
> return result[0];
> #endif
> +#endif /* !_ARCH_PWR10 */
> }
> #endif /* _ARCH_PWR8 */
>
> diff --git a/gcc/config/rs6000/xmmintrin.h b/gcc/config/rs6000/xmmintrin.h
> index ae1a33e8d95b..4c093fd1d5ae 100644
> --- a/gcc/config/rs6000/xmmintrin.h
> +++ b/gcc/config/rs6000/xmmintrin.h
> @@ -1352,6 +1352,9 @@ _mm_storel_pi (__m64 *__P, __m128 __A)
> extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_movemask_ps (__m128 __A)
> {
> +#ifdef _ARCH_PWR10
> + return vec_extractm ((vector unsigned int) __A);
> +#else
> __vector unsigned long long result;
> static const __vector unsigned int perm_mask =
> {
> @@ -1371,6 +1374,7 @@ _mm_movemask_ps (__m128 __A)
> #else
> return result[0];
> #endif
> +#endif /* !_ARCH_PWR10 */
> }
> #endif /* _ARCH_PWR8 */
>
> --
> 2.27.0
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PING^2 PATCH] rs6000: Add Power10 optimization for most _mm_movemask*
2021-11-08 17:42 ` [PING PATCH] " Paul A. Clarke
@ 2021-11-19 2:26 ` Paul A. Clarke
0 siblings, 0 replies; 4+ messages in thread
From: Paul A. Clarke @ 2021-11-19 2:26 UTC (permalink / raw)
To: segher, gcc-patches, wschmidt
On Mon, Nov 08, 2021 at 11:42:56AM -0600, Paul A. Clarke via Gcc-patches wrote:
> Gentle ping...
Gentle re-ping.
> On Thu, Oct 21, 2021 at 12:22:12PM -0500, Paul A. Clarke via Gcc-patches wrote:
> > Power10 ISA added `vextract*` instructions which are realized in the
> > `vec_extractm` instrinsic.
> >
> > Use `vec_extractm` for `_mm_movemask_ps`, `_mm_movemask_pd`, and
> > `_mm_movemask_epi8` compatibility intrinsics, when `_ARCH_PWR10`.
> >
> > 2021-10-21 Paul A. Clarke <pc@us.ibm.com>
> >
> > gcc
> > * config/rs6000/xmmintrin.h (_mm_movemask_ps): Use vec_extractm
> > when _ARCH_PWR10.
> > * config/rs6000/emmintrin.h (_mm_movemask_pd): Likewise.
> > (_mm_movemask_epi8): Likewise.
> > ---
> > Tested on Power10 powerpc64le-linux (compiled with and without
> > `-mcpu=power10`).
> >
> > OK for trunk?
> >
> > gcc/config/rs6000/emmintrin.h | 8 ++++++++
> > gcc/config/rs6000/xmmintrin.h | 4 ++++
> > 2 files changed, 12 insertions(+)
> >
> > diff --git a/gcc/config/rs6000/emmintrin.h b/gcc/config/rs6000/emmintrin.h
> > index 32ad72b4cc35..ab16c13c379e 100644
> > --- a/gcc/config/rs6000/emmintrin.h
> > +++ b/gcc/config/rs6000/emmintrin.h
> > @@ -1233,6 +1233,9 @@ _mm_loadl_pd (__m128d __A, double const *__B)
> > extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > _mm_movemask_pd (__m128d __A)
> > {
> > +#ifdef _ARCH_PWR10
> > + return vec_extractm ((__v2du) __A);
> > +#else
> > __vector unsigned long long result;
> > static const __vector unsigned int perm_mask =
> > {
> > @@ -1252,6 +1255,7 @@ _mm_movemask_pd (__m128d __A)
> > #else
> > return result[0];
> > #endif
> > +#endif /* !_ARCH_PWR10 */
> > }
> > #endif /* _ARCH_PWR8 */
> >
> > @@ -2030,6 +2034,9 @@ _mm_min_epu8 (__m128i __A, __m128i __B)
> > extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > _mm_movemask_epi8 (__m128i __A)
> > {
> > +#ifdef _ARCH_PWR10
> > + return vec_extractm ((__v16qu) __A);
> > +#else
> > __vector unsigned long long result;
> > static const __vector unsigned char perm_mask =
> > {
> > @@ -2046,6 +2053,7 @@ _mm_movemask_epi8 (__m128i __A)
> > #else
> > return result[0];
> > #endif
> > +#endif /* !_ARCH_PWR10 */
> > }
> > #endif /* _ARCH_PWR8 */
> >
> > diff --git a/gcc/config/rs6000/xmmintrin.h b/gcc/config/rs6000/xmmintrin.h
> > index ae1a33e8d95b..4c093fd1d5ae 100644
> > --- a/gcc/config/rs6000/xmmintrin.h
> > +++ b/gcc/config/rs6000/xmmintrin.h
> > @@ -1352,6 +1352,9 @@ _mm_storel_pi (__m64 *__P, __m128 __A)
> > extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > _mm_movemask_ps (__m128 __A)
> > {
> > +#ifdef _ARCH_PWR10
> > + return vec_extractm ((vector unsigned int) __A);
> > +#else
> > __vector unsigned long long result;
> > static const __vector unsigned int perm_mask =
> > {
> > @@ -1371,6 +1374,7 @@ _mm_movemask_ps (__m128 __A)
> > #else
> > return result[0];
> > #endif
> > +#endif /* !_ARCH_PWR10 */
> > }
> > #endif /* _ARCH_PWR8 */
> >
> > --
> > 2.27.0
> >
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] rs6000: Add Power10 optimization for most _mm_movemask*
2021-10-21 17:22 [PATCH] rs6000: Add Power10 optimization for most _mm_movemask* Paul A. Clarke
2021-11-08 17:42 ` [PING PATCH] " Paul A. Clarke
@ 2021-11-19 18:15 ` Segher Boessenkool
1 sibling, 0 replies; 4+ messages in thread
From: Segher Boessenkool @ 2021-11-19 18:15 UTC (permalink / raw)
To: Paul A. Clarke; +Cc: gcc-patches, wschmidt
On Thu, Oct 21, 2021 at 12:22:12PM -0500, Paul A. Clarke wrote:
> Power10 ISA added `vextract*` instructions which are realized in the
> `vec_extractm` instrinsic.
>
> Use `vec_extractm` for `_mm_movemask_ps`, `_mm_movemask_pd`, and
> `_mm_movemask_epi8` compatibility intrinsics, when `_ARCH_PWR10`.
>
> 2021-10-21 Paul A. Clarke <pc@us.ibm.com>
>
> gcc
> * config/rs6000/xmmintrin.h (_mm_movemask_ps): Use vec_extractm
> when _ARCH_PWR10.
> * config/rs6000/emmintrin.h (_mm_movemask_pd): Likewise.
> (_mm_movemask_epi8): Likewise.
Okay for trunk. Thanks!
Segher
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-11-19 18:16 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-21 17:22 [PATCH] rs6000: Add Power10 optimization for most _mm_movemask* Paul A. Clarke
2021-11-08 17:42 ` [PING PATCH] " Paul A. Clarke
2021-11-19 2:26 ` [PING^2 " Paul A. Clarke
2021-11-19 18:15 ` [PATCH] " Segher Boessenkool
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).