From: Bill Schmidt <wschmidt@linux.ibm.com>
To: "Paul A. Clarke" <pc@us.ibm.com>, gcc-patches@gcc.gnu.org
Cc: segher@kernel.crashing.org
Subject: Re: [PATCH v3 6/6] rs6000: Guard some x86 intrinsics implementations
Date: Fri, 27 Aug 2021 10:25:16 -0500 [thread overview]
Message-ID: <146dae52-215d-995d-a783-419b6d208a46@linux.ibm.com> (raw)
In-Reply-To: <20210823190310.1679905-7-pc@us.ibm.com>
Hi Paul,
Thanks for the changes! This looks fine to me, recommend approval.
Thanks,
Bill
On 8/23/21 2:03 PM, Paul A. Clarke wrote:
> Some compatibility implementations of x86 intrinsics include
> Power intrinsics which require POWER8. Guard them.
>
> emmintrin.h:
> - _mm_cmpord_pd: Remove code which was ostensibly for pre-POWER8,
> but which indeed depended on POWER8 (vec_cmpgt(v2du)/vcmpgtud).
> The "POWER8" version works fine on pre-POWER8.
> - _mm_mul_epu32: vec_mule(v4su) uses vmuleuw.
> pmmintrin.h:
> - _mm_movehdup_ps: vec_mergeo(v4su) uses vmrgow.
> - _mm_moveldup_ps: vec_mergee(v4su) uses vmrgew.
> smmintrin.h:
> - _mm_cmpeq_epi64: vec_cmpeq(v2di) uses vcmpequd.
> - _mm_mul_epi32: vec_mule(v4si) uses vmuluwm.
> - _mm_cmpgt_epi64: vec_cmpgt(v2di) uses vcmpgtsd.
> tmmintrin.h:
> - _mm_sign_epi8: vec_neg(v4si) uses vsububm.
> - _mm_sign_epi16: vec_neg(v4si) uses vsubuhm.
> - _mm_sign_epi32: vec_neg(v4si) uses vsubuwm.
> Note that the above three could actually be supported pre-POWER8,
> but current GCC does not support them before POWER8.
> - _mm_sign_pi8: depends on _mm_sign_epi8.
> - _mm_sign_pi16: depends on _mm_sign_epi16.
> - _mm_sign_pi32: depends on _mm_sign_epi32.
>
> 2021-08-20 Paul A. Clarke <pc@us.ibm.com>
>
> gcc
> PR target/101893
> * config/rs6000/emmintrin.h: Guard POWER8 intrinsics.
> * config/rs6000/pmmintrin.h: Same.
> * config/rs6000/smmintrin.h: Same.
> * config/rs6000/tmmintrin.h: Same.
> ---
> v3: No change.
> v2:
> - Ensured that new "#ifdef _ARCH_PWR8" bracket each function so
> impacted, rather than groups of functions, per v1 review.
> - Noted testing in patch series cover letter.
> - Added PR number to commit message.
>
> gcc/config/rs6000/emmintrin.h | 12 ++----------
> gcc/config/rs6000/pmmintrin.h | 4 ++++
> gcc/config/rs6000/smmintrin.h | 4 ++++
> gcc/config/rs6000/tmmintrin.h | 12 ++++++++++++
> 4 files changed, 22 insertions(+), 10 deletions(-)
>
> diff --git a/gcc/config/rs6000/emmintrin.h b/gcc/config/rs6000/emmintrin.h
> index ce1287edf782..32ad72b4cc35 100644
> --- a/gcc/config/rs6000/emmintrin.h
> +++ b/gcc/config/rs6000/emmintrin.h
> @@ -430,20 +430,10 @@ _mm_cmpnge_pd (__m128d __A, __m128d __B)
> extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_cmpord_pd (__m128d __A, __m128d __B)
> {
> -#if _ARCH_PWR8
> __v2du c, d;
> /* Compare against self will return false (0's) if NAN. */
> c = (__v2du)vec_cmpeq (__A, __A);
> d = (__v2du)vec_cmpeq (__B, __B);
> -#else
> - __v2du a, b;
> - __v2du c, d;
> - const __v2du double_exp_mask = {0x7ff0000000000000, 0x7ff0000000000000};
> - a = (__v2du)vec_abs ((__v2df)__A);
> - b = (__v2du)vec_abs ((__v2df)__B);
> - c = (__v2du)vec_cmpgt (double_exp_mask, a);
> - d = (__v2du)vec_cmpgt (double_exp_mask, b);
> -#endif
> /* A != NAN and B != NAN. */
> return ((__m128d)vec_and(c, d));
> }
> @@ -1472,6 +1462,7 @@ _mm_mul_su32 (__m64 __A, __m64 __B)
> return ((__m64)a * (__m64)b);
> }
>
> +#ifdef _ARCH_PWR8
> extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_mul_epu32 (__m128i __A, __m128i __B)
> {
> @@ -1498,6 +1489,7 @@ _mm_mul_epu32 (__m128i __A, __m128i __B)
> return (__m128i) vec_mule ((__v4su)__A, (__v4su)__B);
> #endif
> }
> +#endif
>
> extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_slli_epi16 (__m128i __A, int __B)
> diff --git a/gcc/config/rs6000/pmmintrin.h b/gcc/config/rs6000/pmmintrin.h
> index eab712fdfa66..83dff1d85666 100644
> --- a/gcc/config/rs6000/pmmintrin.h
> +++ b/gcc/config/rs6000/pmmintrin.h
> @@ -123,17 +123,21 @@ _mm_hsub_pd (__m128d __X, __m128d __Y)
> vec_mergel ((__v2df) __X, (__v2df)__Y));
> }
>
> +#ifdef _ARCH_PWR8
> extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_movehdup_ps (__m128 __X)
> {
> return (__m128)vec_mergeo ((__v4su)__X, (__v4su)__X);
> }
> +#endif
>
> +#ifdef _ARCH_PWR8
> extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_moveldup_ps (__m128 __X)
> {
> return (__m128)vec_mergee ((__v4su)__X, (__v4su)__X);
> }
> +#endif
>
> extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_loaddup_pd (double const *__P)
> diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
> index c04d2bb5b6d3..29719367e205 100644
> --- a/gcc/config/rs6000/smmintrin.h
> +++ b/gcc/config/rs6000/smmintrin.h
> @@ -272,6 +272,7 @@ _mm_extract_ps (__m128 __X, const int __N)
> return ((__v4si)__X)[__N & 3];
> }
>
> +#ifdef _ARCH_PWR8
> extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_blend_epi16 (__m128i __A, __m128i __B, const int __imm8)
> {
> @@ -283,6 +284,7 @@ _mm_blend_epi16 (__m128i __A, __m128i __B, const int __imm8)
> #endif
> return (__m128i) vec_sel ((__v8hu) __A, (__v8hu) __B, __shortmask);
> }
> +#endif
>
> extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask)
> @@ -343,6 +345,7 @@ _mm_blend_pd (__m128d __A, __m128d __B, const int __imm8)
> return (__m128d) __r;
> }
>
> +#ifdef _ARCH_PWR8
> __inline __m128d
> __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> _mm_blendv_pd (__m128d __A, __m128d __B, __m128d __mask)
> @@ -351,6 +354,7 @@ _mm_blendv_pd (__m128d __A, __m128d __B, __m128d __mask)
> const __vector __bool long long __boolmask = vec_cmplt ((__v2di) __mask, __zero);
> return (__m128d) vec_sel ((__v2du) __A, (__v2du) __B, (__v2du) __boolmask);
> }
> +#endif
>
> __inline int
> __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> diff --git a/gcc/config/rs6000/tmmintrin.h b/gcc/config/rs6000/tmmintrin.h
> index 971511260b78..a67d88c8079a 100644
> --- a/gcc/config/rs6000/tmmintrin.h
> +++ b/gcc/config/rs6000/tmmintrin.h
> @@ -350,6 +350,7 @@ _mm_shuffle_pi8 (__m64 __A, __m64 __B)
> return (__m64) ((__v2du) (__C))[0];
> }
>
> +#ifdef _ARCH_PWR8
> extern __inline __m128i
> __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_sign_epi8 (__m128i __A, __m128i __B)
> @@ -361,7 +362,9 @@ _mm_sign_epi8 (__m128i __A, __m128i __B)
> __v16qi __conv = vec_add (__selectneg, __selectpos);
> return (__m128i) vec_mul ((__v16qi) __A, (__v16qi) __conv);
> }
> +#endif
>
> +#ifdef _ARCH_PWR8
> extern __inline __m128i
> __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_sign_epi16 (__m128i __A, __m128i __B)
> @@ -373,7 +376,9 @@ _mm_sign_epi16 (__m128i __A, __m128i __B)
> __v8hi __conv = vec_add (__selectneg, __selectpos);
> return (__m128i) vec_mul ((__v8hi) __A, (__v8hi) __conv);
> }
> +#endif
>
> +#ifdef _ARCH_PWR8
> extern __inline __m128i
> __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_sign_epi32 (__m128i __A, __m128i __B)
> @@ -385,7 +390,9 @@ _mm_sign_epi32 (__m128i __A, __m128i __B)
> __v4si __conv = vec_add (__selectneg, __selectpos);
> return (__m128i) vec_mul ((__v4si) __A, (__v4si) __conv);
> }
> +#endif
>
> +#ifdef _ARCH_PWR8
> extern __inline __m64
> __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_sign_pi8 (__m64 __A, __m64 __B)
> @@ -396,7 +403,9 @@ _mm_sign_pi8 (__m64 __A, __m64 __B)
> __C = (__v16qi) _mm_sign_epi8 ((__m128i) __C, (__m128i) __D);
> return (__m64) ((__v2du) (__C))[0];
> }
> +#endif
>
> +#ifdef _ARCH_PWR8
> extern __inline __m64
> __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_sign_pi16 (__m64 __A, __m64 __B)
> @@ -407,7 +416,9 @@ _mm_sign_pi16 (__m64 __A, __m64 __B)
> __C = (__v8hi) _mm_sign_epi16 ((__m128i) __C, (__m128i) __D);
> return (__m64) ((__v2du) (__C))[0];
> }
> +#endif
>
> +#ifdef _ARCH_PWR8
> extern __inline __m64
> __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_sign_pi32 (__m64 __A, __m64 __B)
> @@ -418,6 +429,7 @@ _mm_sign_pi32 (__m64 __A, __m64 __B)
> __C = (__v4si) _mm_sign_epi32 ((__m128i) __C, (__m128i) __D);
> return (__m64) ((__v2du) (__C))[0];
> }
> +#endif
>
> extern __inline __m128i
> __attribute__((__gnu_inline__, __always_inline__, __artificial__))
next prev parent reply other threads:[~2021-08-27 15:25 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-23 19:03 [PATCH v3 0/6] rs6000: Support more SSE4 intrinsics Paul A. Clarke
2021-08-23 19:03 ` [PATCH v3 1/6] rs6000: Support SSE4.1 "round" intrinsics Paul A. Clarke
2021-08-27 13:44 ` Bill Schmidt
2021-08-27 13:47 ` Bill Schmidt
2021-08-30 21:16 ` Paul A. Clarke
2021-08-30 21:24 ` Bill Schmidt
2021-10-07 23:08 ` Segher Boessenkool
2021-10-07 23:39 ` Segher Boessenkool
2021-10-08 1:04 ` Paul A. Clarke
2021-10-08 17:39 ` Segher Boessenkool
2021-10-08 19:27 ` Paul A. Clarke
2021-10-08 22:31 ` Segher Boessenkool
2021-10-11 13:46 ` Paul A. Clarke
2021-10-11 16:28 ` Segher Boessenkool
2021-10-11 17:31 ` Paul A. Clarke
2021-10-11 22:04 ` Segher Boessenkool
2021-10-12 19:35 ` Paul A. Clarke
2021-10-12 22:25 ` Segher Boessenkool
2021-10-19 0:36 ` Paul A. Clarke
2021-08-23 19:03 ` [PATCH v3 2/6] rs6000: Support SSE4.1 "min" and "max" intrinsics Paul A. Clarke
2021-08-27 13:47 ` Bill Schmidt
2021-10-11 19:28 ` Segher Boessenkool
2021-10-12 1:42 ` [COMMITTED v4 " Paul A. Clarke
2021-08-23 19:03 ` [PATCH v3 3/6] rs6000: Simplify some SSE4.1 "test" intrinsics Paul A. Clarke
2021-08-27 13:48 ` Bill Schmidt
2021-10-11 20:50 ` Segher Boessenkool
2021-10-12 1:47 ` [COMMITTED v4 " Paul A. Clarke
2021-08-23 19:03 ` [PATCH v3 4/6] rs6000: Support SSE4.1 "cvt" intrinsics Paul A. Clarke
2021-08-27 13:49 ` Bill Schmidt
2021-10-11 21:52 ` Segher Boessenkool
2021-10-12 1:51 ` [COMMITTED v4 " Paul A. Clarke
2021-08-23 19:03 ` [PATCH v3 5/6] rs6000: Support more SSE4 "cmp", "mul", "pack" intrinsics Paul A. Clarke
2021-08-27 15:21 ` Bill Schmidt
2021-08-27 18:52 ` Paul A. Clarke
2021-10-11 23:07 ` Segher Boessenkool
2021-10-12 1:55 ` [COMMITTED v4 " Paul A. Clarke
2021-08-23 19:03 ` [PATCH v3 6/6] rs6000: Guard some x86 intrinsics implementations Paul A. Clarke
2021-08-27 15:25 ` Bill Schmidt [this message]
2021-10-12 0:11 ` Segher Boessenkool
2021-10-13 17:04 ` Paul A. Clarke
2021-10-13 23:47 ` Segher Boessenkool
2021-10-19 0:26 ` Paul A. Clarke
2021-09-16 14:59 ` [PATCH v3 0/6] rs6000: Support more SSE4 intrinsics Paul A. Clarke
2021-10-04 18:26 ` Paul A. Clarke
2021-10-07 22:25 ` Segher Boessenkool
2021-10-08 0:29 ` Paul A. Clarke
2021-10-12 0:15 ` Segher Boessenkool
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=146dae52-215d-995d-a783-419b6d208a46@linux.ibm.com \
--to=wschmidt@linux.ibm.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=pc@us.ibm.com \
--cc=segher@kernel.crashing.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).