[PATCH] MIPS: Support more cases with alien mode of SHF.DF

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: YunQiang Su <syq@gcc.gnu.org>
To: gcc-patches@gcc.gnu.org
Cc: YunQiang Su <syq@gcc.gnu.org>
Subject: [PATCH] MIPS: Support more cases with alien mode of SHF.DF
Date: Fri, 28 Jun 2024 10:19:31 +0800	[thread overview]
Message-ID: <20240628021931.76303-1-syq@gcc.gnu.org> (raw)

Currently, we support the cases that strictly fit for the instructions.
For example, for V16QImode, we only support shuffle like
(0<=N0, N1, N2, N3<=3 here)
	N0,	N1,	N2,	N3
	N0+4	N1+4	N2+4,	N3+4
	N0+8	N1+8	N2+8,	N3+8
	N0+12	N1+12	N2+12,	N3+12

While in fact we can support more cases to try use other SHF.DF
instructions not strictly fitting the mode.

1) We can use SHF.H to support more cases for V16QImode:
(M0/M1/M2/M3 are 0 or 2 or 4 or 6)
	M0	M0+1,	M1,	M1+1
	M2	M2+1,	M3,	M3+1
	M0+8	M0+9,	M1+8,	M1+9
	M2+8	M2+9,	M3+8,	M3+9

2) We can use SHF.W to support some cases for V16QImode:
(M0/M1/M2/M3 are 0 or 4 or 8 or 12)
	M0,	M0+1,	M0+2,	M0+3
	M1,	M1+1,	M1+2,	M1+3
	M2,	M2+1,	M2+2,	M2+3
	M3,	M3+1,	M3+2,	M3+3

3) We can use SHF.W to support some cases for V8HImode:
(M0/M1/M2/M3 are 0 or 2 or 4 or 6)
	M0,	M0+1
	M1,	M1+1
	M2,	M2+1
	M3,	M3+1

4) We can also use SHF.W to swap the 2 parts of V2DF or V2DI.

gcc
	* config/mips/mips-protos.h: New function mips_msa_shf_i8.
	* config/mips/mips.cc(mips_const_vector_shuffle_set_p):
	Support more cases try to use alien mode instruction;
	(mips_msa_shf_i8): New function to get the correct MSA SHF
	instruction and IMM.
---
 gcc/config/mips/mips-msa.md   |  35 ++++----
 gcc/config/mips/mips-protos.h |   1 +
 gcc/config/mips/mips.cc       | 149 ++++++++++++++++++++++++++++++++++
 3 files changed, 170 insertions(+), 15 deletions(-)

diff --git a/gcc/config/mips/mips-msa.md b/gcc/config/mips/mips-msa.md
index 0081b688ce9..377c63f0d35 100644
--- a/gcc/config/mips/mips-msa.md
+++ b/gcc/config/mips/mips-msa.md
@@ -125,9 +125,6 @@ (define_mode_iterator IMSA_WH  [V4SI V8HI])
 ;; Only floating-point modes.
 (define_mode_iterator FMSA     [V2DF V4SF])
 
-;; Only used for immediate set shuffle elements instruction.
-(define_mode_iterator MSA_WHB_W [V4SI V8HI V16QI V4SF])
-
 ;; The attribute gives the integer vector mode with same size.
 (define_mode_attr VIMODE
   [(V2DF "V2DI")
@@ -2520,21 +2517,29 @@ (define_insn "msa_sat_u_<msafmt>"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "msa_shf_<msafmt_f>"
-  [(set (match_operand:MSA_WHB_W 0 "register_operand" "=f")
-	(vec_select:MSA_WHB_W
-	  (match_operand:MSA_WHB_W 1 "register_operand" "f")
+  [(set (match_operand:MSA 0 "register_operand" "=f")
+	(vec_select:MSA
+	  (match_operand:MSA 1 "register_operand" "f")
 	  (match_operand 2 "par_const_vector_shf_set_operand" "")))]
   "ISA_HAS_MSA"
 {
-  HOST_WIDE_INT val = 0;
-  unsigned int i;
-
-  /* We convert the selection to an immediate.  */
-  for (i = 0; i < 4; i++)
-    val |= INTVAL (XVECEXP (operands[2], 0, i)) << (2 * i);
-
-  operands[2] = GEN_INT (val);
-  return "shf.<msafmt>\t%w0,%w1,%X2";
+  HOST_WIDE_INT rval = mips_msa_shf_i8 (operands);
+  /* 0b11100100 means that there is no shf needed at all.  This RTL
+     should be optimized out in some pass.  */
+  if ((rval & 0xff) == 0xe4)
+    gcc_unreachable ();
+  operands[2] = GEN_INT (rval & 0xff);
+  switch (rval & 0xff00)
+  {
+  default: gcc_unreachable ();
+  case 0x400:
+    return "shf.w\t%w0,%w1,%X2";
+  case 0x200:
+    return "shf.h\t%w0,%w1,%X2";
+  case 0x100:
+    return "shf.b\t%w0,%w1,%X2";
+  }
+  gcc_unreachable ();
 }
   [(set_attr "type" "simd_shf")
    (set_attr "mode" "<MODE>")])
diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index 75f80984c03..90b4c87fdea 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -387,6 +387,7 @@ extern mulsidi3_gen_fn mips_mulsidi3_gen_fn (enum rtx_code);
 extern void mips_register_frame_header_opt (void);
 extern void mips_expand_vec_cond_expr (machine_mode, machine_mode, rtx *, bool);
 extern void mips_expand_vec_cmp_expr (rtx *);
+extern HOST_WIDE_INT mips_msa_shf_i8 (rtx *);
 
 extern void mips_emit_speculation_barrier_function (void);
 
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 7d4791157d1..6c797b62164 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -2079,6 +2079,72 @@ mips_const_vector_shuffle_set_p (rtx op, machine_mode mode)
   int nsets = nunits / 4;
   int set = 0;
   int i, j;
+  int val[4];
+  bool ok;
+
+  /* We support swapping 2 Doubleword part with shf.w.  */
+  if (ISA_HAS_MSA && (mode == V2DFmode || mode == V2DImode))
+    {
+      if (!IN_RANGE (INTVAL (XVECEXP (op, 0, 0)), 0, 1)
+	  || !IN_RANGE (INTVAL (XVECEXP (op, 0, 1)), 0, 1))
+	return false;
+    }
+
+  if (ISA_HAS_MSA && mode == V16QImode)
+    {
+     /* We can use shf.w if the elements are in-order inner 32bit.  */
+      ok = true;
+      for (j = 0; j < 4; j++)
+	{
+	  val[0] = INTVAL (XVECEXP (op, 0, j * 4));
+	  val[1] = INTVAL (XVECEXP (op, 0, j * 4 + 1));
+	  val[2] = INTVAL (XVECEXP (op, 0, j * 4 + 2));
+	  val[3] = INTVAL (XVECEXP (op, 0, j * 4 + 3));
+	  if (val[0] != val[1] - 1
+	      || val[1] != val[2] - 1
+	      || val[2] != val[3] - 1)
+	    ok = false;
+	  if (val[0] != 0 && val[0] != 4 && val[0] != 8 && val[0] != 12)
+	    ok = false;
+	}
+      if (ok)
+	return ok;
+
+      /* We can use shf.h if the elements are in order inner 16bit.  */
+      ok = true;
+      for (j = 0; j < 4; j++)
+	{
+	  val[0] = INTVAL (XVECEXP (op, 0, j * 2));
+	  val[1] = INTVAL (XVECEXP (op, 0, j * 2 + 1));
+	  val[2] = INTVAL (XVECEXP (op, 0, j * 2 + 8));
+	  val[3] = INTVAL (XVECEXP (op, 0, j * 2 + 1 + 8));
+	  if (val[0] != val[1] - 1 || val[2] != val[3] - 1)
+	    ok = false;
+	  if (val[0] != val[2] - 8 || val[1] != val[3] - 8)
+	    ok = false;
+	  if (val[0] != 0 && val[0] != 2 && val[0] != 4 && val[0] != 6)
+	    ok = false;
+	}
+      if (ok)
+	return ok;
+    }
+
+  if (ISA_HAS_MSA && mode == V8HImode)
+    {
+     /* We can use shf.w if the elements are in-order inner 32bit.  */
+      ok = true;
+      for (j = 0; j < 4; j++)
+	{
+	  val[0] = INTVAL (XVECEXP (op, 0, j * 2));
+	  val[1] = INTVAL (XVECEXP (op, 0, j * 2 + 1));
+	  if (val[0] != val[1] - 1)
+	    ok = false;
+	  if (val[0] != 0 && val[0] != 2 && val[0] != 4 && val[0] != 6)
+	    ok = false;
+	}
+      if (ok)
+	return ok;
+    }
 
   /* Check if we have the same 4-element sets.  */
   for (j = 0; j < nsets; j++, set = 4 * j)
@@ -22304,6 +22370,89 @@ mips_msa_vec_parallel_const_half (machine_mode mode, bool high_p)
   return gen_rtx_PARALLEL (VOIDmode, v);
 }
 
+/* Construct and return i8 of SHF.df.  No error will happen since tt has
+   been constrained by mips_const_vector_shuffle_set_p.
+   Return (IMM | (INSN << 8)):  The range of IMM is [0, 0xFF].
+   The INSN can be 0 (error)/1 (SHF.B)/2 (SHF.H)/4 (SHF.W).  */
+
+HOST_WIDE_INT
+mips_msa_shf_i8 (rtx *operands)
+{
+  HOST_WIDE_INT rval = 0, val[16];
+  unsigned int i;
+  machine_mode mode = GET_MODE (operands[0]);
+  int which_op = 0;
+
+  /* We use shf.w to swap 2 doubleword part.  */
+  if (mode == V2DImode || mode == V2DFmode)
+    {
+      val[0] = INTVAL (XVECEXP (operands[2], 0, 0));
+      val[1] = INTVAL (XVECEXP (operands[2], 0, 1));
+      val[3] = val[1] == 0 ? 1 : 3;
+      val[2] = val[1] == 0 ? 0 : 2;
+      val[1] = val[0] == 0 ? 1 : 3;
+      val[0] = val[0] == 0 ? 0 : 2;
+      which_op = 4;
+    }
+  else if (mode == V16QImode)
+    {
+      for (i = 0; i < 16; i++)
+	val[i] = INTVAL (XVECEXP (operands[2], 0, i));
+      if (val[1] - val[0] == 1
+	  && val[2] - val[1] == 1
+	  && val[3] - val[2] == 1)
+	{
+	  which_op = 4;
+	  val[0] = val[0] / 4;
+	  val[1] = val[4] / 4;
+	  val[2] = val[8] / 4;
+	  val[3] = val[12] / 4;
+	}
+      else if (val[1] - val[0] == 1
+	  && val[3] - val[2] == 1)
+	{
+	  which_op = 2;
+	  val[0] = val[0] / 2;
+	  val[1] = val[2] / 2;
+	  val[2] = val[4] / 2;
+	  val[3] = val[6] / 2;
+	}
+      else
+	which_op = 1;
+    }
+  else if (mode == V8HImode)
+    {
+      for (i = 0; i < 8; i++)
+	val[i] = INTVAL (XVECEXP (operands[2], 0, i));
+      if (val[1] - val[0] == 1
+	  && val[3] - val[2] == 1
+	  && val[5] - val[4] == 1
+	  && val[7] - val[6] == 1)
+	{
+	  which_op = 4;
+	  val[0] = val[0] / 2;
+	  val[1] = val[2] / 2;
+	  val[2] = val[4] / 2;
+	  val[3] = val[6] / 2;
+	}
+      else
+	which_op = 2;
+    }
+  else if (mode == V4SImode || mode == V4SFmode)
+    {
+      for (i = 0; i < 4; i++)
+	val[i] = INTVAL (XVECEXP (operands[2], 0, i));
+      which_op = 4;
+    }
+
+  /* We convert the selection to an immediate.  */
+  for (i = 0; i < 4; i++)
+    rval |= val[i] << (2 * i);
+
+  rval |= (which_op << 8);
+  return rval;
+}
+
 /* A subroutine of mips_expand_vec_init, match constant vector elements.  */
 
 static inline bool
-- 
2.39.3 (Apple Git-146)

                 reply	other threads:[~2024-06-28  2:19 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240628021931.76303-1-syq@gcc.gnu.org \
    --to=syq@gcc.gnu.org \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).