Re: [AArch64_be] Fix vtbl[34] and vtbx4

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: James Greenhalgh <james.greenhalgh@arm.com>
To: Christophe Lyon <christophe.lyon@linaro.org>
Cc: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Subject: Re: [AArch64_be] Fix vtbl[34] and vtbx4
Date: Wed, 07 Oct 2015 15:09:00 -0000	[thread overview]
Message-ID: <20151007150941.GA31205@arm.com> (raw)
In-Reply-To: <CAKdteObt_dP63aqn3eH6mHiK5zXP+Y_rL+DfN55D=WfK_4cVGw@mail.gmail.com>

On Tue, Sep 15, 2015 at 05:25:25PM +0100, Christophe Lyon wrote:
> This patch re-implements vtbl[34] and vtbx4 AdvSIMD intrinsics using
> existing builtins, and fixes the behaviour on aarch64_be.
> 
> Tested on aarch64_be-none-elf and aarch64-none-elf using the Foundation Model.
> 
> OK?

Hi Christophe,

Sorry for the delay getting back to you, comments below.

> 2015-09-15  Christophe Lyon  <christophe.lyon@linaro.org>
> 
> 	* config/aarch64/aarch64-builtins.c
> 	(aarch64_types_tbl_qualifiers): New static data.
> 	(TYPES_TBL): Define.
> 	* config/aarch64/aarch64-simd-builtins.def: Update builtins
> 	tables.
> 	* config/aarch64/aarch64-simd.md (aarch64_tbl3v8qi): New.
> 	* config/aarch64/arm_neon.h (vtbl3_s8, vtbl3_u8, vtbl3_p8)
> 	(vtbl4_s8, vtbl4_u8, vtbl4_p8): Rewrite using builtin functions.
> 	(vtbx4_s8, vtbx4_u8, vtbx4_p8): Emulate behaviour using other
> 	intrinsics.
> 	* config/aarch64/iterators.md (V8Q): New.

> diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
> index 0f4f2b9..7ca3917 100644
> --- a/gcc/config/aarch64/aarch64-builtins.c
> +++ b/gcc/config/aarch64/aarch64-builtins.c
> @@ -253,6 +253,11 @@ aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>        qualifier_none, qualifier_struct_load_store_lane_index };
>  #define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers)
>  
> +static enum aarch64_type_qualifiers
> +aarch64_types_tbl_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> +  = { qualifier_none, qualifier_none, qualifier_none };
> +#define TYPES_TBL (aarch64_types_tbl_qualifiers)
> +

Do we need these? This looks like TYPES_BINOP (the predicate on the
instruction pattern will prevent the "qualifier_maybe_immediate" from
becoming a problem).

>  #define CF0(N, X) CODE_FOR_aarch64_##N##X
>  #define CF1(N, X) CODE_FOR_##N##X##1
>  #define CF2(N, X) CODE_FOR_##N##X##2
> diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
> index d0f298a..62f1b13 100644
> --- a/gcc/config/aarch64/aarch64-simd-builtins.def
> +++ b/gcc/config/aarch64/aarch64-simd-builtins.def
> @@ -405,3 +405,5 @@
>    VAR1 (BINOPP, crypto_pmull, 0, di)
>    VAR1 (BINOPP, crypto_pmull, 0, v2di)
>  
> +  /* Implemented by aarch64_tbl3v8qi.  */
> +  BUILTIN_V8Q (TBL, tbl3, 0)

This can be:

  VAR1 (BINOP, tbl3, 0, v8qi)

It would be good if we could eliminate the casts in arm_neon.h by also
defining a  "BINOPU" version of this, but I imagine that gets stuck on the
types accepted by __builtin_aarch64_set_qregoiv16qi - so don't worry about
making that change.


> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index 9777418..84a61d5 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -4716,6 +4714,16 @@
>    [(set_attr "type" "neon_tbl2_q")]
>  )
>  
> +(define_insn "aarch64_tbl3v8qi"
> +  [(set (match_operand:V8QI 0 "register_operand" "=w")
> +	(unspec:V8QI [(match_operand:OI 1 "register_operand" "w")
> +		      (match_operand:V8QI 2 "register_operand" "w")]
> +		      UNSPEC_TBL))]
> +  "TARGET_SIMD"
> +  "tbl\\t%S0.8b, {%S1.16b - %T1.16b}, %S2.8b"
> +  [(set_attr "type" "neon_tbl3")]
> +)
> +
>  (define_insn_and_split "aarch64_combinev16qi"
>    [(set (match_operand:OI 0 "register_operand" "=w")
>  	(unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 87bbf6e..91704de 100644
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 6dfebe7..e8ee318 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
>  /* End of temporary inline asm.  */
>  
>  /* Start of optimal implementations in approved order.  */
> @@ -23221,6 +23182,36 @@ vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
>    return vbsl_p8 (__mask, __tbl, __r);
>  }
>  
> +/* vtbx4  */
> +
> +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
> +vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx)
> +{
> +  uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
> +			      vmov_n_u8 (32));
> +  int8x8_t __tbl = vtbl4_s8 (__tab, __idx);
> +
> +  return vbsl_s8 (__mask, __tbl, __r);
> +}
> +
> +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
> +vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx)
> +{
> +  uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (32));
> +  uint8x8_t __tbl = vtbl4_u8 (__tab, __idx);
> +
> +  return vbsl_u8 (__mask, __tbl, __r);
> +}
> +
> +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
> +vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx)
> +{
> +  uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (32));
> +  poly8x8_t __tbl = vtbl4_p8 (__tab, __idx);
> +
> +  return vbsl_p8 (__mask, __tbl, __r);
> +}
> +

Why do we want this for vtbx4 rather than putting out a VTBX instruction
directly (as in the inline asm versions you replace)?

This sequence does make sense for vtbx3.

>  /* vtrn */
>  
>  __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index b8a45d1..dfbd9cd 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -100,6 +100,8 @@
>  ;; All modes.
>  (define_mode_iterator VALL [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF])
>  
> +(define_mode_iterator V8Q [V8QI])
> +

This can be dropped if you use VAR1 in aarch64-builtins.c.

Thanks for working on this, with your patch applied, the only
remaining intrinsics I see failing for aarch64_be are:

  vqtbl2_*8
  vqtbl2q_*8
  vqtbl3_*8
  vqtbl3q_*8
  vqtbl4_*8
  vqtbl4q_*8

  vqtbx2_*8
  vqtbx2q_*8
  vqtbx3_*8
  vqtbx3q_*8
  vqtbx4_*8
  vqtbx4q_*8

Thanks,
James

next prev parent reply	other threads:[~2015-10-07 15:09 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-15 16:25 Christophe Lyon
2015-09-29 21:26 ` Christophe Lyon
2015-10-07  9:24   ` Christophe Lyon
2015-10-07 15:09 ` James Greenhalgh [this message]
2015-10-07 20:07   ` Christophe Lyon
2015-10-08  9:12     ` James Greenhalgh
2015-10-09 16:16       ` Christophe Lyon
2015-10-12 13:30         ` James Greenhalgh
2015-10-13 13:05           ` Christophe Lyon
2015-10-13 13:08             ` James Greenhalgh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151007150941.GA31205@arm.com \
    --to=james.greenhalgh@arm.com \
    --cc=christophe.lyon@linaro.org \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).