public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-5750] LoongArch: Optimize LSX vector shuffle on floating-point vector
@ 2023-11-22  9:09 Xi Ruoyao
  0 siblings, 0 replies; only message in thread
From: Xi Ruoyao @ 2023-11-22  9:09 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:fce367810149580da1bb0cb0c3cd4fb00b968f1c

commit r14-5750-gfce367810149580da1bb0cb0c3cd4fb00b968f1c
Author: Xi Ruoyao <xry111@xry111.site>
Date:   Sun Nov 19 06:12:22 2023 +0800

    LoongArch: Optimize LSX vector shuffle on floating-point vector
    
    The vec_perm expander was wrongly defined.  GCC internal says:
    
    Operand 3 is the “selector”.  It is an integral mode vector of the same
    width and number of elements as mode M.
    
    But we made operand 3 in the same mode as the shuffled vectors, so it
    would be a FP mode vector if the shuffled vectors are FP mode.
    
    With this mistake, the generic code manages to work around and it ends
    up creating some very nasty code for a simple __builtin_shuffle (a, b,
    c) where a and b are V4SF, c is V4SI:
    
        la.local    $r12,.LANCHOR0
        la.local    $r13,.LANCHOR1
        vld $vr1,$r12,48
        vslli.w $vr1,$vr1,2
        vld $vr2,$r12,16
        vld $vr0,$r13,0
        vld $vr3,$r13,16
        vshuf.b $vr0,$vr1,$vr1,$vr0
        vld $vr1,$r12,32
        vadd.b  $vr0,$vr0,$vr3
        vandi.b $vr0,$vr0,31
        vshuf.b $vr0,$vr1,$vr2,$vr0
        vst $vr0,$r12,0
        jr  $r1
    
    This is obviously stupid.  Fix the expander definition and adjust
    loongarch_expand_vec_perm to handle it correctly.
    
    gcc/ChangeLog:
    
            * config/loongarch/lsx.md (vec_perm<mode:LSX>): Make the
            selector VIMODE.
            * config/loongarch/loongarch.cc (loongarch_expand_vec_perm):
            Use the mode of the selector (instead of the shuffled vector)
            for truncating it.  Operate on subregs in the selector mode if
            the shuffled vector has a different mode (i. e. it's a
            floating-point vector).
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/loongarch/vect-shuf-fp.c: New test.

Diff:
---
 gcc/config/loongarch/loongarch.cc                 | 18 ++++++++++--------
 gcc/config/loongarch/lsx.md                       |  2 +-
 gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c | 16 ++++++++++++++++
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index ce601a331f7..33357c670e1 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -8607,8 +8607,9 @@ void
 loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
 {
   machine_mode vmode = GET_MODE (target);
+  machine_mode vimode = GET_MODE (sel);
   auto nelt = GET_MODE_NUNITS (vmode);
-  auto round_reg = gen_reg_rtx (vmode);
+  auto round_reg = gen_reg_rtx (vimode);
   rtx round_data[MAX_VECT_LEN];
 
   for (int i = 0; i < nelt; i += 1)
@@ -8616,9 +8617,16 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
       round_data[i] = GEN_INT (0x1f);
     }
 
-  rtx round_data_rtx = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, round_data));
+  rtx round_data_rtx = gen_rtx_CONST_VECTOR (vimode, gen_rtvec_v (nelt, round_data));
   emit_move_insn (round_reg, round_data_rtx);
 
+  if (vmode != vimode)
+    {
+      target = lowpart_subreg (vimode, target, vmode);
+      op0 = lowpart_subreg (vimode, op0, vmode);
+      op1 = lowpart_subreg (vimode, op1, vmode);
+    }
+
   switch (vmode)
     {
     case E_V16QImode:
@@ -8626,17 +8634,11 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
       emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel));
       break;
     case E_V2DFmode:
-      emit_insn (gen_andv2di3 (sel, sel, round_reg));
-      emit_insn (gen_lsx_vshuf_d_f (target, sel, op1, op0));
-      break;
     case E_V2DImode:
       emit_insn (gen_andv2di3 (sel, sel, round_reg));
       emit_insn (gen_lsx_vshuf_d (target, sel, op1, op0));
       break;
     case E_V4SFmode:
-      emit_insn (gen_andv4si3 (sel, sel, round_reg));
-      emit_insn (gen_lsx_vshuf_w_f (target, sel, op1, op0));
-      break;
     case E_V4SImode:
       emit_insn (gen_andv4si3 (sel, sel, round_reg));
       emit_insn (gen_lsx_vshuf_w (target, sel, op1, op0));
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index 8ea41c85b01..5e8d8d74b43 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -837,7 +837,7 @@
  [(match_operand:LSX 0 "register_operand")
   (match_operand:LSX 1 "register_operand")
   (match_operand:LSX 2 "register_operand")
-  (match_operand:LSX 3 "register_operand")]
+  (match_operand:<VIMODE> 3 "register_operand")]
   "ISA_HAS_LSX"
 {
   loongarch_expand_vec_perm (operands[0], operands[1],
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c b/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c
new file mode 100644
index 00000000000..7acc2113afe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mlasx -O3" } */
+/* { dg-final { scan-assembler "vshuf\.w" } } */
+
+#define V __attribute__ ((vector_size (16)))
+
+int a V;
+float b V;
+float c V;
+float d V;
+
+void
+test (void)
+{
+  d = __builtin_shuffle (b, c, a);
+}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-11-22  9:09 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-22  9:09 [gcc r14-5750] LoongArch: Optimize LSX vector shuffle on floating-point vector Xi Ruoyao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).