public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [1/2] PR96463 - aarch64 specific changes
@ 2021-12-17 10:04 Prathamesh Kulkarni
  2021-12-17 11:33 ` Richard Sandiford
  0 siblings, 1 reply; 15+ messages in thread
From: Prathamesh Kulkarni @ 2021-12-17 10:04 UTC (permalink / raw)
  To: gcc Patches, Richard Sandiford

[-- Attachment #1: Type: text/plain, Size: 553 bytes --]

Hi,
The patch folds:
lhs = svld1rq ({-1, -1, -1, ...}, &v[0])
into:
lhs = vec_perm_expr<v, v, {0, 1, 2, 3, ... }>
and expands above vec_perm_expr using aarch64_expand_sve_dupq.

With patch, for following test:
#include <arm_sve.h>
#include <arm_neon.h>

svint32_t
foo (int32x4_t x)
{
  return svld1rq (svptrue_b8 (), &x[0]);
}

it generates following code:
foo:
.LFB4350:
        dup     z0.q, z0.q[0]
        ret

and passes bootstrap+test on aarch64-linux-gnu.
But I am not sure if the changes to aarch64_evpc_sve_tbl
are correct.

Thanks,
Prathamesh

[-- Attachment #2: pr96463-3-aarch64.txt --]
[-- Type: text/plain, Size: 3304 bytes --]

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 02e42a71e5e..e21bbec360c 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1207,6 +1207,56 @@ public:
     insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0));
     return e.use_contiguous_load_insn (icode);
   }
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree arg0 = gimple_call_arg (f.call, 0);
+    tree arg1 = gimple_call_arg (f.call, 1);
+
+    /* Transform:
+       lhs = svld1rq ({-1, -1, ... }, &v[0])
+       into:
+       lhs = vec_perm_expr<v, v, {0, 1, 2, 3, ...}>.
+       on little endian target.  */
+
+    if (!BYTES_BIG_ENDIAN
+	&& integer_all_onesp (arg0)
+	&& TREE_CODE (arg1) == ADDR_EXPR)
+      {
+	tree t = TREE_OPERAND (arg1, 0);
+	if (TREE_CODE (t) == ARRAY_REF)
+	  {
+	    tree index = TREE_OPERAND (t, 1);
+	    t = TREE_OPERAND (t, 0);
+	    if (integer_zerop (index) && TREE_CODE (t) == VIEW_CONVERT_EXPR)
+	      {
+		t = TREE_OPERAND (t, 0);
+		tree vectype = TREE_TYPE (t);
+		if (VECTOR_TYPE_P (vectype)
+		    && known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
+		    && wi::to_wide (TYPE_SIZE (vectype)) == 128)
+		  {
+		    tree lhs = gimple_call_lhs (f.call);
+		    tree lhs_type = TREE_TYPE (lhs);
+		    int source_nelts = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
+		    vec_perm_builder sel (TYPE_VECTOR_SUBPARTS (lhs_type), source_nelts, 1);
+		    for (int i = 0; i < source_nelts; i++)
+		      sel.quick_push (i);
+
+		    vec_perm_indices indices (sel, 1, source_nelts);
+		    if (!can_vec_perm_const_p (TYPE_MODE (lhs_type), indices))
+		      return NULL;
+
+		    tree mask = vec_perm_indices_to_tree (lhs_type, indices);
+		    return gimple_build_assign (lhs, VEC_PERM_EXPR, t, t, mask);
+		  }
+	      }
+	  }
+      }
+
+    return NULL;
+  }
 };
 
 class svld1ro_impl : public load_replicate
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index f07330cff4f..af27f550be3 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -23002,8 +23002,32 @@ aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
 
   machine_mode sel_mode = related_int_vector_mode (d->vmode).require ();
   rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
+
   if (d->one_vector_p)
-    emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel));
+    {
+      bool use_dupq = false;
+      /* Check if sel is dup vector with encoded elements {0, 1, 2, ... nelts}  */
+      if (GET_CODE (sel) == CONST_VECTOR
+	  && !GET_MODE_NUNITS (GET_MODE (sel)).is_constant ()
+	  && CONST_VECTOR_DUPLICATE_P (sel))
+	  {
+	    unsigned nelts = const_vector_encoded_nelts (sel);
+	    unsigned i;
+	    for (i = 0; i < nelts; i++)
+	      {
+		rtx elem = CONST_VECTOR_ENCODED_ELT(sel, i);
+		if (!(CONST_INT_P (elem) && INTVAL(elem) == i))
+		  break;
+	      }
+	    if (i == nelts)
+	      use_dupq = true;
+	  }
+
+      if (use_dupq)
+	aarch64_expand_sve_dupq (d->target, GET_MODE (d->target), d->op0);
+      else
+	emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel));
+    }
   else
     aarch64_expand_sve_vec_perm (d->target, d->op0, d->op1, sel);
   return true;

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2022-06-07 11:02 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-17 10:04 [1/2] PR96463 - aarch64 specific changes Prathamesh Kulkarni
2021-12-17 11:33 ` Richard Sandiford
2021-12-27 10:24   ` Prathamesh Kulkarni
2022-05-03 10:40     ` Prathamesh Kulkarni
2022-05-06 10:30       ` Richard Sandiford
2022-05-11  6:24         ` Prathamesh Kulkarni
2022-05-11  7:14           ` Richard Sandiford
2022-05-12  9:12             ` Prathamesh Kulkarni
2022-05-12 10:44               ` Richard Sandiford
2022-05-31 11:32                 ` Prathamesh Kulkarni
2022-06-01  8:42                   ` Richard Sandiford
2022-06-05 10:15                     ` Prathamesh Kulkarni
2022-06-06 10:59                       ` Richard Sandiford
2022-06-07 10:47                         ` Prathamesh Kulkarni
2022-06-07 11:02                           ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).