public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Richard Sandiford <richard.sandiford@linaro.org>
To: gcc-patches@gcc.gnu.org
Subject: Re: [09/13] Use explicit encodings for simple permutes
Date: Tue, 19 Dec 2017 20:37:00 -0000	[thread overview]
Message-ID: <878tdy4fpp.fsf@linaro.org> (raw)
In-Reply-To: <87d13nlc6v.fsf@linaro.org> (Richard Sandiford's message of "Sat,	09 Dec 2017 23:21:44 +0000")

Ping

Richard Sandiford <richard.sandiford@linaro.org> writes:
> This patch makes users of vec_perm_builders use the compressed encoding
> where possible.  This means that they work with variable-length vectors.
>
>
> 2017-12-09  Richard Sandiford  <richard.sandiford@linaro.org>
>
> gcc/
> 	* optabs.c (expand_vec_perm_var): Use an explicit encoding for
> 	the broadcast of the low byte.
> 	(expand_mult_highpart): Use an explicit encoding for the permutes.
> 	* optabs-query.c (can_mult_highpart_p): Likewise.
> 	* tree-vect-loop.c (calc_vec_perm_mask_for_shift): Likewise.
> 	* tree-vect-stmts.c (perm_mask_for_reverse): Likewise.
> 	(vectorizable_bswap): Likewise.
> 	* tree-vect-data-refs.c (vect_grouped_store_supported): Use an
> 	explicit encoding for the power-of-2 permutes.
> 	(vect_permute_store_chain): Likewise.
> 	(vect_grouped_load_supported): Likewise.
> 	(vect_permute_load_chain): Likewise.
>
> Index: gcc/optabs.c
> ===================================================================
> --- gcc/optabs.c	2017-12-09 22:48:47.546825312 +0000
> +++ gcc/optabs.c	2017-12-09 22:48:52.266015836 +0000
> @@ -5625,15 +5625,14 @@ expand_vec_perm_var (machine_mode mode,
>  			       NULL, 0, OPTAB_DIRECT);
>    gcc_assert (sel != NULL);
>  
> -  /* Broadcast the low byte each element into each of its bytes.  */
> -  vec_perm_builder const_sel (w, w, 1);
> -  for (i = 0; i < w; ++i)
> -    {
> -      int this_e = i / u * u;
> -      if (BYTES_BIG_ENDIAN)
> -	this_e += u - 1;
> -      const_sel.quick_push (this_e);
> -    }
> +  /* Broadcast the low byte each element into each of its bytes.
> +     The encoding has U interleaved stepped patterns, one for each
> +     byte of an element.  */
> +  vec_perm_builder const_sel (w, u, 3);
> +  unsigned int low_byte_in_u = BYTES_BIG_ENDIAN ? u - 1 : 0;
> +  for (i = 0; i < 3; ++i)
> +    for (unsigned int j = 0; j < u; ++j)
> +      const_sel.quick_push (i * u + low_byte_in_u);
>    sel = gen_lowpart (qimode, sel);
>    sel = expand_vec_perm_const (qimode, sel, sel, const_sel, qimode, NULL);
>    gcc_assert (sel != NULL);
> @@ -5853,16 +5852,20 @@ expand_mult_highpart (machine_mode mode,
>    expand_insn (optab_handler (tab2, mode), 3, eops);
>    m2 = gen_lowpart (mode, eops[0].value);
>  
> -  vec_perm_builder sel (nunits, nunits, 1);
> +  vec_perm_builder sel;
>    if (method == 2)
>      {
> -      for (i = 0; i < nunits; ++i)
> +      /* The encoding has 2 interleaved stepped patterns.  */
> +      sel.new_vector (nunits, 2, 3);
> +      for (i = 0; i < 6; ++i)
>  	sel.quick_push (!BYTES_BIG_ENDIAN + (i & ~1)
>  			+ ((i & 1) ? nunits : 0));
>      }
>    else
>      {
> -      for (i = 0; i < nunits; ++i)
> +      /* The encoding has a single interleaved stepped pattern.  */
> +      sel.new_vector (nunits, 1, 3);
> +      for (i = 0; i < 3; ++i)
>  	sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1));
>      }
>  
> Index: gcc/optabs-query.c
> ===================================================================
> --- gcc/optabs-query.c	2017-12-09 22:48:47.545825268 +0000
> +++ gcc/optabs-query.c	2017-12-09 22:48:52.265015799 +0000
> @@ -501,8 +501,9 @@ can_mult_highpart_p (machine_mode mode,
>        op = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
>        if (optab_handler (op, mode) != CODE_FOR_nothing)
>  	{
> -	  vec_perm_builder sel (nunits, nunits, 1);
> -	  for (i = 0; i < nunits; ++i)
> +	  /* The encoding has 2 interleaved stepped patterns.  */
> +	  vec_perm_builder sel (nunits, 2, 3);
> +	  for (i = 0; i < 6; ++i)
>  	    sel.quick_push (!BYTES_BIG_ENDIAN
>  			    + (i & ~1)
>  			    + ((i & 1) ? nunits : 0));
> @@ -518,8 +519,9 @@ can_mult_highpart_p (machine_mode mode,
>        op = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
>        if (optab_handler (op, mode) != CODE_FOR_nothing)
>  	{
> -	  vec_perm_builder sel (nunits, nunits, 1);
> -	  for (i = 0; i < nunits; ++i)
> +	  /* The encoding has a single stepped pattern.  */
> +	  vec_perm_builder sel (nunits, 1, 3);
> +	  for (int i = 0; i < 3; ++i)
>  	    sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1));
>  	  vec_perm_indices indices (sel, 2, nunits);
>  	  if (can_vec_perm_const_p (mode, indices))
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c	2017-12-09 22:48:47.547825355 +0000
> +++ gcc/tree-vect-loop.c	2017-12-09 22:48:52.267015873 +0000
> @@ -3716,8 +3716,10 @@ vect_estimate_min_profitable_iters (loop
>  calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt,
>  			      vec_perm_builder *sel)
>  {
> -  sel->new_vector (nelt, nelt, 1);
> -  for (unsigned int i = 0; i < nelt; i++)
> +  /* The encoding is a single stepped pattern.  Any wrap-around is handled
> +     by vec_perm_indices.  */
> +  sel->new_vector (nelt, 1, 3);
> +  for (unsigned int i = 0; i < 3; i++)
>      sel->quick_push (i + offset);
>  }
>  
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> --- gcc/tree-vect-stmts.c	2017-12-09 22:48:50.360942531 +0000
> +++ gcc/tree-vect-stmts.c	2017-12-09 22:48:52.268015910 +0000
> @@ -1717,8 +1717,9 @@ perm_mask_for_reverse (tree vectype)
>  
>    nunits = TYPE_VECTOR_SUBPARTS (vectype);
>  
> -  vec_perm_builder sel (nunits, nunits, 1);
> -  for (i = 0; i < nunits; ++i)
> +  /* The encoding has a single stepped pattern.  */
> +  vec_perm_builder sel (nunits, 1, 3);
> +  for (i = 0; i < 3; ++i)
>      sel.quick_push (nunits - 1 - i);
>  
>    vec_perm_indices indices (sel, 1, nunits);
> @@ -2504,8 +2505,9 @@ vectorizable_bswap (gimple *stmt, gimple
>    unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
>    unsigned word_bytes = num_bytes / nunits;
>  
> -  vec_perm_builder elts (num_bytes, num_bytes, 1);
> -  for (unsigned i = 0; i < nunits; ++i)
> +  /* The encoding uses one stepped pattern for each byte in the word.  */
> +  vec_perm_builder elts (num_bytes, word_bytes, 3);
> +  for (unsigned i = 0; i < 3; ++i)
>      for (unsigned j = 0; j < word_bytes; ++j)
>        elts.quick_push ((i + 1) * word_bytes - j - 1);
>  
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> --- gcc/tree-vect-data-refs.c	2017-12-09 22:48:47.546825312 +0000
> +++ gcc/tree-vect-data-refs.c	2017-12-09 22:48:52.267015873 +0000
> @@ -4566,14 +4566,13 @@ vect_grouped_store_supported (tree vecty
>    if (VECTOR_MODE_P (mode))
>      {
>        unsigned int i, nelt = GET_MODE_NUNITS (mode);
> -      vec_perm_builder sel (nelt, nelt, 1);
> -      sel.quick_grow (nelt);
> -
>        if (count == 3)
>  	{
>  	  unsigned int j0 = 0, j1 = 0, j2 = 0;
>  	  unsigned int i, j;
>  
> +	  vec_perm_builder sel (nelt, nelt, 1);
> +	  sel.quick_grow (nelt);
>  	  vec_perm_indices indices;
>  	  for (j = 0; j < 3; j++)
>  	    {
> @@ -4623,7 +4622,10 @@ vect_grouped_store_supported (tree vecty
>  	  /* If length is not equal to 3 then only power of 2 is supported.  */
>  	  gcc_assert (pow2p_hwi (count));
>  
> -	  for (i = 0; i < nelt / 2; i++)
> +	  /* The encoding has 2 interleaved stepped patterns.  */
> +	  vec_perm_builder sel (nelt, 2, 3);
> +	  sel.quick_grow (6);
> +	  for (i = 0; i < 3; i++)
>  	    {
>  	      sel[i * 2] = i;
>  	      sel[i * 2 + 1] = i + nelt;
> @@ -4631,7 +4633,7 @@ vect_grouped_store_supported (tree vecty
>  	  vec_perm_indices indices (sel, 2, nelt);
>  	  if (can_vec_perm_const_p (mode, indices))
>  	    {
> -	      for (i = 0; i < nelt; i++)
> +	      for (i = 0; i < 6; i++)
>  		sel[i] += nelt / 2;
>  	      indices.new_vector (sel, 2, nelt);
>  	      if (can_vec_perm_const_p (mode, indices))
> @@ -4736,9 +4738,6 @@ vect_permute_store_chain (vec<tree> dr_c
>    unsigned int i, n, log_length = exact_log2 (length);
>    unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
>  
> -  vec_perm_builder sel (nelt, nelt, 1);
> -  sel.quick_grow (nelt);
> -
>    result_chain->quick_grow (length);
>    memcpy (result_chain->address (), dr_chain.address (),
>  	  length * sizeof (tree));
> @@ -4747,6 +4746,8 @@ vect_permute_store_chain (vec<tree> dr_c
>      {
>        unsigned int j0 = 0, j1 = 0, j2 = 0;
>  
> +      vec_perm_builder sel (nelt, nelt, 1);
> +      sel.quick_grow (nelt);
>        vec_perm_indices indices;
>        for (j = 0; j < 3; j++)
>          {
> @@ -4808,7 +4809,10 @@ vect_permute_store_chain (vec<tree> dr_c
>        /* If length is not equal to 3 then only power of 2 is supported.  */
>        gcc_assert (pow2p_hwi (length));
>  
> -      for (i = 0, n = nelt / 2; i < n; i++)
> +      /* The encoding has 2 interleaved stepped patterns.  */
> +      vec_perm_builder sel (nelt, 2, 3);
> +      sel.quick_grow (6);
> +      for (i = 0; i < 3; i++)
>  	{
>  	  sel[i * 2] = i;
>  	  sel[i * 2 + 1] = i + nelt;
> @@ -4816,7 +4820,7 @@ vect_permute_store_chain (vec<tree> dr_c
>  	vec_perm_indices indices (sel, 2, nelt);
>  	perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
>  
> -	for (i = 0; i < nelt; i++)
> +	for (i = 0; i < 6; i++)
>  	  sel[i] += nelt / 2;
>  	indices.new_vector (sel, 2, nelt);
>  	perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
> @@ -5164,11 +5168,11 @@ vect_grouped_load_supported (tree vectyp
>    if (VECTOR_MODE_P (mode))
>      {
>        unsigned int i, j, nelt = GET_MODE_NUNITS (mode);
> -      vec_perm_builder sel (nelt, nelt, 1);
> -      sel.quick_grow (nelt);
>  
>        if (count == 3)
>  	{
> +	  vec_perm_builder sel (nelt, nelt, 1);
> +	  sel.quick_grow (nelt);
>  	  vec_perm_indices indices;
>  	  unsigned int k;
>  	  for (k = 0; k < 3; k++)
> @@ -5209,12 +5213,15 @@ vect_grouped_load_supported (tree vectyp
>  	  /* If length is not equal to 3 then only power of 2 is supported.  */
>  	  gcc_assert (pow2p_hwi (count));
>  
> -	  for (i = 0; i < nelt; i++)
> +	  /* The encoding has a single stepped pattern.  */
> +	  vec_perm_builder sel (nelt, 1, 3);
> +	  sel.quick_grow (3);
> +	  for (i = 0; i < 3; i++)
>  	    sel[i] = i * 2;
>  	  vec_perm_indices indices (sel, 2, nelt);
>  	  if (can_vec_perm_const_p (mode, indices))
>  	    {
> -	      for (i = 0; i < nelt; i++)
> +	      for (i = 0; i < 3; i++)
>  		sel[i] = i * 2 + 1;
>  	      indices.new_vector (sel, 2, nelt);
>  	      if (can_vec_perm_const_p (mode, indices))
> @@ -5332,9 +5339,6 @@ vect_permute_load_chain (vec<tree> dr_ch
>    unsigned int i, j, log_length = exact_log2 (length);
>    unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
>  
> -  vec_perm_builder sel (nelt, nelt, 1);
> -  sel.quick_grow (nelt);
> -
>    result_chain->quick_grow (length);
>    memcpy (result_chain->address (), dr_chain.address (),
>  	  length * sizeof (tree));
> @@ -5343,6 +5347,8 @@ vect_permute_load_chain (vec<tree> dr_ch
>      {
>        unsigned int k;
>  
> +      vec_perm_builder sel (nelt, nelt, 1);
> +      sel.quick_grow (nelt);
>        vec_perm_indices indices;
>        for (k = 0; k < 3; k++)
>  	{
> @@ -5390,12 +5396,15 @@ vect_permute_load_chain (vec<tree> dr_ch
>        /* If length is not equal to 3 then only power of 2 is supported.  */
>        gcc_assert (pow2p_hwi (length));
>  
> -      for (i = 0; i < nelt; ++i)
> +      /* The encoding has a single stepped pattern.  */
> +      vec_perm_builder sel (nelt, 1, 3);
> +      sel.quick_grow (3);
> +      for (i = 0; i < 3; ++i)
>  	sel[i] = i * 2;
>        vec_perm_indices indices (sel, 2, nelt);
>        perm_mask_even = vect_gen_perm_mask_checked (vectype, indices);
>  
> -      for (i = 0; i < nelt; ++i)
> +      for (i = 0; i < 3; ++i)
>  	sel[i] = i * 2 + 1;
>        indices.new_vector (sel, 2, nelt);
>        perm_mask_odd = vect_gen_perm_mask_checked (vectype, indices);

  reply	other threads:[~2017-12-19 20:37 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-09 23:06 [00/13] Make VEC_PERM_EXPR work for variable-length vectors Richard Sandiford
2017-12-09 23:08 ` [01/13] Add a qimode_for_vec_perm helper function Richard Sandiford
2017-12-18 13:34   ` Richard Biener
2017-12-09 23:09 ` [02/13] Pass vec_perm_indices by reference Richard Sandiford
2017-12-12 14:23   ` Richard Biener
2017-12-09 23:11 ` [03/13] Split can_vec_perm_p into can_vec_perm_{var,const}_p Richard Sandiford
2017-12-12 14:25   ` Richard Biener
2017-12-09 23:13 ` [04/13] Refactor expand_vec_perm Richard Sandiford
2017-12-12 15:17   ` Richard Biener
2017-12-09 23:17 ` [05/13] Remove vec_perm_const optab Richard Sandiford
2017-12-12 15:26   ` Richard Biener
2017-12-20 13:42     ` Richard Sandiford
2017-12-09 23:18 ` [06/13] Check whether a vector of QIs can store all indices Richard Sandiford
2017-12-12 15:27   ` Richard Biener
2017-12-09 23:20 ` [08/13] Add a vec_perm_indices_to_tree helper function Richard Sandiford
2017-12-18 13:34   ` Richard Biener
2017-12-09 23:20 ` [07/13] Make vec_perm_indices use new vector encoding Richard Sandiford
2017-12-12 15:32   ` Richard Biener
2017-12-12 15:47     ` Richard Sandiford
2017-12-14 10:37       ` Richard Biener
2017-12-20 13:48         ` Richard Sandiford
2018-01-02 13:15           ` Richard Biener
2018-01-02 18:30             ` Richard Sandiford
2017-12-09 23:21 ` [09/13] Use explicit encodings for simple permutes Richard Sandiford
2017-12-19 20:37   ` Richard Sandiford [this message]
2018-01-02 13:07   ` Richard Biener
2017-12-09 23:23 ` [10/13] Rework VEC_PERM_EXPR folding Richard Sandiford
2017-12-09 23:24   ` [11/13] Use vec_perm_builder::series_p in shift_amt_for_vec_perm_mask Richard Sandiford
2017-12-19 20:37     ` Richard Sandiford
2018-01-02 13:08     ` Richard Biener
2017-12-09 23:25   ` [12/13] Use ssizetype selectors for autovectorised VEC_PERM_EXPRs Richard Sandiford
2017-12-19 20:37     ` Richard Sandiford
2018-01-02 13:09     ` Richard Biener
2017-12-19 20:37   ` [10/13] Rework VEC_PERM_EXPR folding Richard Sandiford
2018-01-02 13:08   ` Richard Biener
2017-12-09 23:27 ` [13/13] [AArch64] Use vec_perm_indices helper routines Richard Sandiford
2017-12-19 20:37   ` Richard Sandiford
2018-01-04 11:28     ` Richard Sandiford
2018-01-09 12:18       ` James Greenhalgh
2018-01-09 16:24         ` RFA: Expand vec_perm_indices::series_p comment Richard Sandiford
2018-01-29 20:56           ` Ping: " Richard Sandiford
2018-01-30  7:20             ` Jeff Law
2017-12-12 14:12 ` [00/13] Make VEC_PERM_EXPR work for variable-length vectors Richard Biener
2017-12-12 15:32   ` Richard Sandiford
2017-12-12 15:38     ` Richard Biener
2017-12-12 15:57       ` Richard Sandiford

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=878tdy4fpp.fsf@linaro.org \
    --to=richard.sandiford@linaro.org \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).