From: YunQiang Su <syq@gcc.gnu.org>
To: gcc-patches@gcc.gnu.org
Cc: YunQiang Su <syq@gcc.gnu.org>
Subject: [PATCH] MIPS: Support more cases with alien mode of SHF.DF
Date: Fri, 28 Jun 2024 10:19:31 +0800 [thread overview]
Message-ID: <20240628021931.76303-1-syq@gcc.gnu.org> (raw)
Currently, we support the cases that strictly fit for the instructions.
For example, for V16QImode, we only support shuffle like
(0<=N0, N1, N2, N3<=3 here)
N0, N1, N2, N3
N0+4 N1+4 N2+4, N3+4
N0+8 N1+8 N2+8, N3+8
N0+12 N1+12 N2+12, N3+12
While in fact we can support more cases to try use other SHF.DF
instructions not strictly fitting the mode.
1) We can use SHF.H to support more cases for V16QImode:
(M0/M1/M2/M3 are 0 or 2 or 4 or 6)
M0 M0+1, M1, M1+1
M2 M2+1, M3, M3+1
M0+8 M0+9, M1+8, M1+9
M2+8 M2+9, M3+8, M3+9
2) We can use SHF.W to support some cases for V16QImode:
(M0/M1/M2/M3 are 0 or 4 or 8 or 12)
M0, M0+1, M0+2, M0+3
M1, M1+1, M1+2, M1+3
M2, M2+1, M2+2, M2+3
M3, M3+1, M3+2, M3+3
3) We can use SHF.W to support some cases for V8HImode:
(M0/M1/M2/M3 are 0 or 2 or 4 or 6)
M0, M0+1
M1, M1+1
M2, M2+1
M3, M3+1
4) We can also use SHF.W to swap the 2 parts of V2DF or V2DI.
gcc
* config/mips/mips-protos.h: New function mips_msa_shf_i8.
* config/mips/mips.cc(mips_const_vector_shuffle_set_p):
Support more cases try to use alien mode instruction;
(mips_msa_shf_i8): New function to get the correct MSA SHF
instruction and IMM.
---
gcc/config/mips/mips-msa.md | 35 ++++----
gcc/config/mips/mips-protos.h | 1 +
gcc/config/mips/mips.cc | 149 ++++++++++++++++++++++++++++++++++
3 files changed, 170 insertions(+), 15 deletions(-)
diff --git a/gcc/config/mips/mips-msa.md b/gcc/config/mips/mips-msa.md
index 0081b688ce9..377c63f0d35 100644
--- a/gcc/config/mips/mips-msa.md
+++ b/gcc/config/mips/mips-msa.md
@@ -125,9 +125,6 @@ (define_mode_iterator IMSA_WH [V4SI V8HI])
;; Only floating-point modes.
(define_mode_iterator FMSA [V2DF V4SF])
-;; Only used for immediate set shuffle elements instruction.
-(define_mode_iterator MSA_WHB_W [V4SI V8HI V16QI V4SF])
-
;; The attribute gives the integer vector mode with same size.
(define_mode_attr VIMODE
[(V2DF "V2DI")
@@ -2520,21 +2517,29 @@ (define_insn "msa_sat_u_<msafmt>"
(set_attr "mode" "<MODE>")])
(define_insn "msa_shf_<msafmt_f>"
- [(set (match_operand:MSA_WHB_W 0 "register_operand" "=f")
- (vec_select:MSA_WHB_W
- (match_operand:MSA_WHB_W 1 "register_operand" "f")
+ [(set (match_operand:MSA 0 "register_operand" "=f")
+ (vec_select:MSA
+ (match_operand:MSA 1 "register_operand" "f")
(match_operand 2 "par_const_vector_shf_set_operand" "")))]
"ISA_HAS_MSA"
{
- HOST_WIDE_INT val = 0;
- unsigned int i;
-
- /* We convert the selection to an immediate. */
- for (i = 0; i < 4; i++)
- val |= INTVAL (XVECEXP (operands[2], 0, i)) << (2 * i);
-
- operands[2] = GEN_INT (val);
- return "shf.<msafmt>\t%w0,%w1,%X2";
+ HOST_WIDE_INT rval = mips_msa_shf_i8 (operands);
+ /* 0b11100100 means that there is no shf needed at all. This RTL
+ should be optimized out in some pass. */
+ if ((rval & 0xff) == 0xe4)
+ gcc_unreachable ();
+ operands[2] = GEN_INT (rval & 0xff);
+ switch (rval & 0xff00)
+ {
+ default: gcc_unreachable ();
+ case 0x400:
+ return "shf.w\t%w0,%w1,%X2";
+ case 0x200:
+ return "shf.h\t%w0,%w1,%X2";
+ case 0x100:
+ return "shf.b\t%w0,%w1,%X2";
+ }
+ gcc_unreachable ();
}
[(set_attr "type" "simd_shf")
(set_attr "mode" "<MODE>")])
diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index 75f80984c03..90b4c87fdea 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -387,6 +387,7 @@ extern mulsidi3_gen_fn mips_mulsidi3_gen_fn (enum rtx_code);
extern void mips_register_frame_header_opt (void);
extern void mips_expand_vec_cond_expr (machine_mode, machine_mode, rtx *, bool);
extern void mips_expand_vec_cmp_expr (rtx *);
+extern HOST_WIDE_INT mips_msa_shf_i8 (rtx *);
extern void mips_emit_speculation_barrier_function (void);
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 7d4791157d1..6c797b62164 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -2079,6 +2079,72 @@ mips_const_vector_shuffle_set_p (rtx op, machine_mode mode)
int nsets = nunits / 4;
int set = 0;
int i, j;
+ int val[4];
+ bool ok;
+
+ /* We support swapping 2 Doubleword part with shf.w. */
+ if (ISA_HAS_MSA && (mode == V2DFmode || mode == V2DImode))
+ {
+ if (!IN_RANGE (INTVAL (XVECEXP (op, 0, 0)), 0, 1)
+ || !IN_RANGE (INTVAL (XVECEXP (op, 0, 1)), 0, 1))
+ return false;
+ }
+
+ if (ISA_HAS_MSA && mode == V16QImode)
+ {
+ /* We can use shf.w if the elements are in-order inner 32bit. */
+ ok = true;
+ for (j = 0; j < 4; j++)
+ {
+ val[0] = INTVAL (XVECEXP (op, 0, j * 4));
+ val[1] = INTVAL (XVECEXP (op, 0, j * 4 + 1));
+ val[2] = INTVAL (XVECEXP (op, 0, j * 4 + 2));
+ val[3] = INTVAL (XVECEXP (op, 0, j * 4 + 3));
+ if (val[0] != val[1] - 1
+ || val[1] != val[2] - 1
+ || val[2] != val[3] - 1)
+ ok = false;
+ if (val[0] != 0 && val[0] != 4 && val[0] != 8 && val[0] != 12)
+ ok = false;
+ }
+ if (ok)
+ return ok;
+
+ /* We can use shf.h if the elements are in order inner 16bit. */
+ ok = true;
+ for (j = 0; j < 4; j++)
+ {
+ val[0] = INTVAL (XVECEXP (op, 0, j * 2));
+ val[1] = INTVAL (XVECEXP (op, 0, j * 2 + 1));
+ val[2] = INTVAL (XVECEXP (op, 0, j * 2 + 8));
+ val[3] = INTVAL (XVECEXP (op, 0, j * 2 + 1 + 8));
+ if (val[0] != val[1] - 1 || val[2] != val[3] - 1)
+ ok = false;
+ if (val[0] != val[2] - 8 || val[1] != val[3] - 8)
+ ok = false;
+ if (val[0] != 0 && val[0] != 2 && val[0] != 4 && val[0] != 6)
+ ok = false;
+ }
+ if (ok)
+ return ok;
+ }
+
+ if (ISA_HAS_MSA && mode == V8HImode)
+ {
+ /* We can use shf.w if the elements are in-order inner 32bit. */
+ ok = true;
+ for (j = 0; j < 4; j++)
+ {
+ val[0] = INTVAL (XVECEXP (op, 0, j * 2));
+ val[1] = INTVAL (XVECEXP (op, 0, j * 2 + 1));
+ if (val[0] != val[1] - 1)
+ ok = false;
+ if (val[0] != 0 && val[0] != 2 && val[0] != 4 && val[0] != 6)
+ ok = false;
+ }
+ if (ok)
+ return ok;
+ }
/* Check if we have the same 4-element sets. */
for (j = 0; j < nsets; j++, set = 4 * j)
@@ -22304,6 +22370,89 @@ mips_msa_vec_parallel_const_half (machine_mode mode, bool high_p)
return gen_rtx_PARALLEL (VOIDmode, v);
}
+/* Construct and return i8 of SHF.df. No error will happen since tt has
+ been constrained by mips_const_vector_shuffle_set_p.
+ Return (IMM | (INSN << 8)): The range of IMM is [0, 0xFF].
+ The INSN can be 0 (error)/1 (SHF.B)/2 (SHF.H)/4 (SHF.W). */
+
+HOST_WIDE_INT
+mips_msa_shf_i8 (rtx *operands)
+{
+ HOST_WIDE_INT rval = 0, val[16];
+ unsigned int i;
+ machine_mode mode = GET_MODE (operands[0]);
+ int which_op = 0;
+
+ /* We use shf.w to swap 2 doubleword part. */
+ if (mode == V2DImode || mode == V2DFmode)
+ {
+ val[0] = INTVAL (XVECEXP (operands[2], 0, 0));
+ val[1] = INTVAL (XVECEXP (operands[2], 0, 1));
+ val[3] = val[1] == 0 ? 1 : 3;
+ val[2] = val[1] == 0 ? 0 : 2;
+ val[1] = val[0] == 0 ? 1 : 3;
+ val[0] = val[0] == 0 ? 0 : 2;
+ which_op = 4;
+ }
+ else if (mode == V16QImode)
+ {
+ for (i = 0; i < 16; i++)
+ val[i] = INTVAL (XVECEXP (operands[2], 0, i));
+ if (val[1] - val[0] == 1
+ && val[2] - val[1] == 1
+ && val[3] - val[2] == 1)
+ {
+ which_op = 4;
+ val[0] = val[0] / 4;
+ val[1] = val[4] / 4;
+ val[2] = val[8] / 4;
+ val[3] = val[12] / 4;
+ }
+ else if (val[1] - val[0] == 1
+ && val[3] - val[2] == 1)
+ {
+ which_op = 2;
+ val[0] = val[0] / 2;
+ val[1] = val[2] / 2;
+ val[2] = val[4] / 2;
+ val[3] = val[6] / 2;
+ }
+ else
+ which_op = 1;
+ }
+ else if (mode == V8HImode)
+ {
+ for (i = 0; i < 8; i++)
+ val[i] = INTVAL (XVECEXP (operands[2], 0, i));
+ if (val[1] - val[0] == 1
+ && val[3] - val[2] == 1
+ && val[5] - val[4] == 1
+ && val[7] - val[6] == 1)
+ {
+ which_op = 4;
+ val[0] = val[0] / 2;
+ val[1] = val[2] / 2;
+ val[2] = val[4] / 2;
+ val[3] = val[6] / 2;
+ }
+ else
+ which_op = 2;
+ }
+ else if (mode == V4SImode || mode == V4SFmode)
+ {
+ for (i = 0; i < 4; i++)
+ val[i] = INTVAL (XVECEXP (operands[2], 0, i));
+ which_op = 4;
+ }
+
+ /* We convert the selection to an immediate. */
+ for (i = 0; i < 4; i++)
+ rval |= val[i] << (2 * i);
+
+ rval |= (which_op << 8);
+ return rval;
+}
+
/* A subroutine of mips_expand_vec_init, match constant vector elements. */
static inline bool
--
2.39.3 (Apple Git-146)
reply other threads:[~2024-06-28 2:19 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240628021931.76303-1-syq@gcc.gnu.org \
--to=syq@gcc.gnu.org \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).