From: Kyrill Tkachov <kyrylo.tkachov@foss.arm.com>
To: charles.baylis@linaro.org, Ramana.Radhakrishnan@arm.com,
richard.earnshaw@arm.com
Cc: rearnsha@arm.com, gcc-patches@gcc.gnu.org, michael.collison@linaro.org
Subject: Re: [PATCH 2/2] [ARM] PR68532 Fix up vzip recognition for big endian
Date: Mon, 08 Feb 2016 11:42:00 -0000 [thread overview]
Message-ID: <56B87F23.4030906@foss.arm.com> (raw)
In-Reply-To: <1454525947-14690-3-git-send-email-charles.baylis@linaro.org>
Hi Charles,
On 03/02/16 18:59, charles.baylis@linaro.org wrote:
> From: Charles Baylis <charles.baylis@linaro.org>
>
> gcc/ChangeLog:
>
> 2016-02-03 Charles Baylis <charles.baylis@linaro.org>
>
> PR target/68532
> * config/arm/arm.c (arm_evpc_neon_vzip): Allow for big endian lane
> order.
> * config/arm/arm_neon.h (vzipq_s8): Adjust shuffle patterns for big
> endian.
> (vzipq_s16): Likewise.
> (vzipq_s32): Likewise.
> (vzipq_f32): Likewise.
> (vzipq_u8): Likewise.
> (vzipq_u16): Likewise.
> (vzipq_u32): Likewise.
> (vzipq_p8): Likewise.
> (vzipq_p16): Likewise.
>
> Change-Id: I327678f5e73c1de2f413c1d22769ab42ce1d6c16
>
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index e9aa982..24239db 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -28318,15 +28318,21 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
> unsigned int i, high, mask, nelt = d->nelt;
> rtx out0, out1, in0, in1;
> rtx (*gen)(rtx, rtx, rtx, rtx);
> + int first_elem;
> + bool is_swapped;
>
> if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
> return false;
>
> + is_swapped = BYTES_BIG_ENDIAN ? true : false;
This is just "is_swapped = BYTES_BIG_ENDIAN;"
> +
> /* Note that these are little-endian tests. Adjust for big-endian later. */
I think you can remove this comment now, like in patch 1/2
> + first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
> +
> high = nelt / 2;
> - if (d->perm[0] == high)
> + if (first_elem == neon_endian_lane_map (d->vmode, high))
> ;
> - else if (d->perm[0] == 0)
> + else if (first_elem == neon_endian_lane_map (d->vmode, 0))
> high = 0;
> else
> return false;
> @@ -28334,11 +28340,16 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
>
> for (i = 0; i < nelt / 2; i++)
> {
> - unsigned elt = (i + high) & mask;
> - if (d->perm[i * 2] != elt)
> + unsigned elt =
> + neon_pair_endian_lane_map (d->vmode, i + high) & mask;
> + if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
> + != elt)
> return false;
> - elt = (elt + nelt) & mask;
> - if (d->perm[i * 2 + 1] != elt)
> + elt =
> + neon_pair_endian_lane_map (d->vmode, i + nelt + high)
> + & mask;
The "& mask" can go on the previous line.
> + if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
> + != elt)
> return false;
> }
>
> @@ -28362,10 +28373,9 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
>
> in0 = d->op0;
> in1 = d->op1;
> - if (BYTES_BIG_ENDIAN)
> + if (is_swapped)
> {
> std::swap (in0, in1);
> - high = !high;
> }
remove the braces around the std::swap.
Ok with these changes.
I've tried out both patch and they do fix execution failures on big-endian
and don't break any NEON intrinsics tests that I threw at them.
>
> out0 = d->target;
> diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
> index 2e014b6..aa17f49 100644
> --- a/gcc/config/arm/arm_neon.h
> +++ b/gcc/config/arm/arm_neon.h
> @@ -8453,9 +8453,9 @@ vzipq_s8 (int8x16_t __a, int8x16_t __b)
> int8x16x2_t __rv;
> #ifdef __ARM_BIG_ENDIAN
> __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
> - { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
> + { 20, 4, 21, 5, 22, 6, 23, 7, 16, 0, 17, 1, 18, 2, 19, 3 });
> __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
> - { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
> + { 28, 12, 29, 13, 30, 14, 31, 15, 24, 8, 25, 9, 26, 10, 27, 11 });
> #else
> __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
> { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
> @@ -8471,9 +8471,9 @@ vzipq_s16 (int16x8_t __a, int16x8_t __b)
> int16x8x2_t __rv;
> #ifdef __ARM_BIG_ENDIAN
> __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
> - { 12, 4, 13, 5, 14, 6, 15, 7 });
> + { 10, 2, 11, 3, 8, 0, 9, 1 });
> __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
> - { 8, 0, 9, 1, 10, 2, 11, 3 });
> + { 14, 6, 15, 7, 12, 4, 13, 5 });
> #else
> __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
> { 0, 8, 1, 9, 2, 10, 3, 11 });
> @@ -8488,8 +8488,8 @@ vzipq_s32 (int32x4_t __a, int32x4_t __b)
> {
> int32x4x2_t __rv;
> #ifdef __ARM_BIG_ENDIAN
> - __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
> - __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
> + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 4, 0 });
> + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 7, 3, 6, 2 });
> #else
> __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
> __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
> @@ -8502,8 +8502,8 @@ vzipq_f32 (float32x4_t __a, float32x4_t __b)
> {
> float32x4x2_t __rv;
> #ifdef __ARM_BIG_ENDIAN
> - __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
> - __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
> + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 4, 0 });
> + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 7, 3, 6, 2 });
> #else
> __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
> __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
> @@ -8517,9 +8517,9 @@ vzipq_u8 (uint8x16_t __a, uint8x16_t __b)
> uint8x16x2_t __rv;
> #ifdef __ARM_BIG_ENDIAN
> __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
> - { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
> + { 20, 4, 21, 5, 22, 6, 23, 7, 16, 0, 17, 1, 18, 2, 19, 3 });
> __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
> - { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
> + { 28, 12, 29, 13, 30, 14, 31, 15, 24, 8, 25, 9, 26, 10, 27, 11 });
> #else
> __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
> { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
> @@ -8535,9 +8535,9 @@ vzipq_u16 (uint16x8_t __a, uint16x8_t __b)
> uint16x8x2_t __rv;
> #ifdef __ARM_BIG_ENDIAN
> __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
> - { 12, 4, 13, 5, 14, 6, 15, 7 });
> + { 10, 2, 11, 3, 8, 0, 9, 1 });
> __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
> - { 8, 0, 9, 1, 10, 2, 11, 3 });
> + { 14, 6, 15, 7, 12, 4, 13, 5 });
> #else
> __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
> { 0, 8, 1, 9, 2, 10, 3, 11 });
> @@ -8552,8 +8552,8 @@ vzipq_u32 (uint32x4_t __a, uint32x4_t __b)
> {
> uint32x4x2_t __rv;
> #ifdef __ARM_BIG_ENDIAN
> - __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
> - __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
> + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 4, 0 });
> + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 7, 3, 6, 2 });
> #else
> __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
> __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
> @@ -8567,9 +8567,9 @@ vzipq_p8 (poly8x16_t __a, poly8x16_t __b)
> poly8x16x2_t __rv;
> #ifdef __ARM_BIG_ENDIAN
> __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
> - { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
> + { 20, 4, 21, 5, 22, 6, 23, 7, 16, 0, 17, 1, 18, 2, 19, 3 });
> __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
> - { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
> + { 28, 12, 29, 13, 30, 14, 31, 15, 24, 8, 25, 9, 26, 10, 27, 11 });
> #else
> __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
> { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
> @@ -8585,9 +8585,9 @@ vzipq_p16 (poly16x8_t __a, poly16x8_t __b)
> poly16x8x2_t __rv;
> #ifdef __ARM_BIG_ENDIAN
> __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
> - { 12, 4, 13, 5, 14, 6, 15, 7 });
> + { 10, 2, 11, 3, 8, 0, 9, 1 });
> __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
> - { 8, 0, 9, 1, 10, 2, 11, 3 });
> + { 14, 6, 15, 7, 12, 4, 13, 5 });
> #else
> __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
> { 0, 8, 1, 9, 2, 10, 3, 11 });
next prev parent reply other threads:[~2016-02-08 11:42 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-02-03 18:59 [ARM, PATCH v2 0/2] PR68532: Fix VZIP/VUZP " charles.baylis
2016-02-03 18:59 ` [PATCH 1/2] [ARM] PR68532: Fix up vuzp " charles.baylis
2016-02-08 11:42 ` Kyrill Tkachov
2016-02-09 17:01 ` Charles Baylis
2016-02-09 17:08 ` Kyrill Tkachov
2016-02-09 18:54 ` Charles Baylis
2016-02-03 18:59 ` [PATCH 2/2] [ARM] PR68532 Fix up vzip recognition " charles.baylis
2016-02-08 11:42 ` Kyrill Tkachov [this message]
2016-02-09 17:07 ` Charles Baylis
2016-02-09 18:53 ` Charles Baylis
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=56B87F23.4030906@foss.arm.com \
--to=kyrylo.tkachov@foss.arm.com \
--cc=Ramana.Radhakrishnan@arm.com \
--cc=charles.baylis@linaro.org \
--cc=gcc-patches@gcc.gnu.org \
--cc=michael.collison@linaro.org \
--cc=rearnsha@arm.com \
--cc=richard.earnshaw@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).