public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Kito Cheng <kito.cheng@gmail.com>
To: Jiawei <jiawei@iscas.ac.cn>
Cc: gcc-patches@gcc.gnu.org, kito.cheng@sifive.com,
	palmer@dabbelt.com,  christoph.muellner@vrull.eu,
	jeremy.bennett@embecosm.com,  mary.bennett@embecosm.com,
	nandni.jamnadas@embecosm.com,  charlie.keaney@embecosm.com,
	simon.cook@embecosm.com, tariq.kurd@codasip.com,
	 ibrahim.abu.kharmeh1@huawei.com, sinan.lin@linux.alibaba.com,
	 wuwei2016@iscas.ac.cn, shihua@iscas.ac.cn,
	shiyulong@iscas.ac.cn,  chenyixuan@iscas.ac.cn
Subject: Re: [PATCH 4/5] RISC-V: Add Zcmp extension supports.
Date: Thu, 4 May 2023 17:03:50 +0800	[thread overview]
Message-ID: <CA+yXCZD_BNz-OqqYvZ=eC70TE0d-nf4440B+SrfNMKc_iPZwZQ@mail.gmail.com> (raw)
In-Reply-To: <20230406062118.47431-5-jiawei@iscas.ac.cn>

Could you rebase this patch, we have some changes on

> All "zcmpe" means Zcmp with RVE extension.

Use zcmp_rve instead, zcmpe seems like a new ext. name

> diff --git a/gcc/config/riscv/riscv-zcmp-popret.cc b/gcc/config/riscv/riscv-zcmp-popret.cc
> new file mode 100644
> index 00000000000..d7b40f6a3e2
> --- /dev/null
> +++ b/gcc/config/riscv/riscv-zcmp-popret.cc
> @@ -0,0 +1,260 @@

Need a header here like "^#$% for RISC-V Copyright (C) 2023 Free
Software Foundation, Inc." here

> +#include "config.h"
...
> +#include "cfgrtl.h"
> +
> +#define IN_TARGET_CODE 1

This should appear before include anything.

> +
> +namespace {
> +
> +/*
> +  1. preprocessing:
> +    1.1. if there is no push rtx, then just return. e.g.
> +    (note 5 1 22 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +    (insn/f 22 5 23 2 (set (reg/f:SI 2 sp)
> +      (plus:SI (reg/f:SI 2 sp)
> +       (const_int -32 [0xffffffffffffffe0])))
> +    (nil))
> +    (note 23 22 2 2 NOTE_INSN_PROLOGUE_END)
> +    1.2. if push rtx exists, then we compute the number of
> +    pushed s-registers, n_sreg.
> +
> +  push rtx should be find before NOTE_INSN_PROLOGUE_END tag
> +
> +  [2 and 3 happend simultaneously]
> +  2. find valid move pattern, mv sN, aN, where N < n_sreg,
> +    and aN is not used the move pattern, and sN is not
> +    defined before the move pattern (from prologue to the
> +    position of move pattern).
> +  3. analysis use and reach of every instruction from prologue
> +    to the position of move pattern.
> +    if any sN is used, then we mark the corresponding argument list
> +    candidate as invalid.
> +    e.g.
> +       push  {ra,s0-s3}, {}, -32
> +       sw      s0,44(sp) # s0 is used, then argument list is invalid
> +       mv      a0,a5     # a0 is defined, then argument list is invalid
> +       ...
> +       mv      s0,a0
> +       mv      s1,a1
> +       mv      s2,a2
> +
> +  4. if there is a valid argument list, then replace the pop
> +    push parallel insn, and delete mv pattern.
> +     if not, skip.
> +*/

I am not sure I understand this optimization pass correctly,
could you give more example or indicate which testcase can demonstrate
this pass?

And I would prefer this pass split from this patch, let it become a separated
patch including testcase.


> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 5f8cbfc15ed..17df2f3f8cf 100644
> +/* Order for the CLOBBERs/USEs of push/pop.  */
> +static const unsigned push_save_reg_order[] = {

push_save_reg_order -> zcmp_push_save_reg_order

> +  INVALID_REGNUM, RETURN_ADDR_REGNUM, S0_REGNUM,
> +  S1_REGNUM, S2_REGNUM, S3_REGNUM, S4_REGNUM,
> +  S5_REGNUM, S6_REGNUM, S7_REGNUM, S8_REGNUM,
> +  S9_REGNUM, S10_REGNUM, S11_REGNUM
> +};
> +
> +/* Order for the CLOBBERs/USEs of push/pop in rve.  */
> +static const unsigned push_save_reg_order_zcmpe[] = {

push_save_reg_order_zcmpe -> zcmp_rve_push_save_reg_order

> @@ -4777,6 +4881,66 @@ riscv_use_save_libcall (const struct riscv_frame_info *frame)
>    return frame->save_libcall_adjustment != 0;
>  }
>
> +/* Determine how many instructions related to push/pop instructions.  */
> +
> +static unsigned
> +riscv_save_push_pop_count (unsigned mask)
> +{
> +  if (!BITSET_P (mask, GP_REG_FIRST + RETURN_ADDR_REGNUM))
> +    return 0;
> +  for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--)
> +    if (BITSET_P (mask, n)
> +       && !call_used_regs [n])
> +      /* add ra saving and sp adjust. */
> +      return CALLEE_SAVED_REG_NUMBER (n) + 1 + 2;

What the magic number of `+ 1 + 2`?

> +  abort ();
> +}
> +
> +/* Calculate the maximum sp adjustment of push/pop instruction. */
> +
> +static unsigned
> +riscv_push_pop_base_sp_adjust (unsigned mask)
> +{
> +  unsigned n_regs = riscv_save_push_pop_count (mask) - 1;
> +  return (n_regs * UNITS_PER_WORD + 15) & (~0xf);

Use ROUND_UP

> @@ -5171,6 +5337,86 @@ riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn,
>        }
>  }
>
> +static void
> +riscv_emit_pop_insn (struct riscv_frame_info *frame, HOST_WIDE_INT offset, HOST_WIDE_INT size)
> +{
> +  unsigned int veclen = riscv_save_push_pop_count (frame->mask);
> +  unsigned int n_reg = veclen - 1;
> +  rtvec vec = rtvec_alloc (veclen);
> +  HOST_WIDE_INT sp_adjust;
> +  rtx dwarf = NULL_RTX;
> +
> +  const unsigned *reg_order = (TARGET_ZCMP && TARGET_RVE)
> +       ? push_save_reg_order_zcmpe
> +       : push_save_reg_order;
> +
> +  gcc_assert (n_reg >= 1
> +       && TARGET_ZCMP
> +       && ((TARGET_RVE && (n_reg <= ARRAY_SIZE (push_save_reg_order_zcmpe)))
> +           || (TARGET_ZCMP && (n_reg <= ARRAY_SIZE (push_save_reg_order)))));
> +
> +  /* sp adjust pattern */
> +  int max_allow_sp_adjust = riscv_push_pop_base_sp_adjust (frame->mask) + 48;
> +  int aligned_size = size;
> +
> +  /* if sp adjustment is too large, we should split it first. */
> +  if (aligned_size > max_allow_sp_adjust)
> +    {
> +      rtx dwarf_pre_sp_adjust = NULL_RTX;
> +      rtx pre_adjust_rtx = gen_add3_insn (stack_pointer_rtx,
> +                       stack_pointer_rtx,
> +                       GEN_INT (aligned_size - max_allow_sp_adjust));
> +      rtx insn = emit_insn (pre_adjust_rtx);
> +
> +      rtx cfa_pre_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
> +                       GEN_INT (aligned_size - max_allow_sp_adjust));
> +      dwarf_pre_sp_adjust = alloc_reg_note (REG_CFA_DEF_CFA,
> +               cfa_pre_adjust_rtx,
> +               dwarf_pre_sp_adjust);
> +
> +      RTX_FRAME_RELATED_P (insn) = 1;
> +      REG_NOTES (insn) = dwarf_pre_sp_adjust;
> +
> +      sp_adjust = max_allow_sp_adjust;
> +    }
> +  else
> +    sp_adjust = (aligned_size + 15) & (~0xf);

Use ROUND_UP

> @@ -5270,6 +5516,146 @@ riscv_emit_stack_tie (void)
>      emit_insn (gen_stack_tiedi (stack_pointer_rtx, hard_frame_pointer_rtx));
>  }
>
> +bool
> +riscv_check_regno(rtx pat, unsigned regno)
> +{
> +  return REG_P (pat)
> +      && REGNO (pat) == regno;
> +}
> +
> +/* Function to check whether the OP is a valid stack push/pop operation.
> +   This part is borrowed from nds32 nds32_valid_stack_push_pop_p */
> +
> +bool
> +riscv_valid_stack_push_pop_p (rtx op, bool push_p)
> +{
> +  int index;
> +  int total_count;
> +  int sp_adjust_rtx_index;
> +  rtx elt;
> +  rtx elt_reg;
> +  rtx elt_plus;
> +
> +  if (!TARGET_ZCMP)
> +    return false;
> +
> +  total_count = XVECLEN (op, 0);
> +  sp_adjust_rtx_index = push_p ? 0 : total_count - 1;
> +
> +  /* At least sp + one callee save/restore register rtx */
> +  if (total_count < 2)
> +    return false;
> +
> +  /* Perform some quick check for that every element should be 'set',
> +     for pop, it might contain `ret` and `ret value` pattern.  */
> +  for (index = 0; index < total_count; index++)
> +    {
> +      elt = XVECEXP (op, 0, index);
> +
> +      /* skip pop return value rtx */
> +      if (!push_p && GET_CODE (elt) == SET
> +         && riscv_check_regno (SET_DEST (elt), RETURN_VALUE_REGNUM)
> +         && total_count >= 4
> +         && index + 1 < total_count
> +         && GET_CODE (XVECEXP (op, 0, index + 1)) == USE)
> +       {
> +         rtx use_reg = XEXP (XVECEXP (op, 0, index + 1), 0);
> +
> +         if (!riscv_check_regno (use_reg, RETURN_VALUE_REGNUM))
> +           return false;
> +
> +         index += 1;
> +         continue;
> +       }
> +
> +      /* skip ret rtx */
> +      if (!push_p && GET_CODE (elt) == SIMPLE_RETURN
> +         && total_count >= 4
> +         && index + 1 < total_count
> +         && GET_CODE (XVECEXP (op, 0, index + 1)) == USE)
> +       {
> +         rtx use_reg = XEXP (XVECEXP (op, 0, index + 1), 0);
> +
> +         if (!riscv_check_regno (use_reg, RETURN_ADDR_REGNUM))
> +           return false;
> +
> +         index += 1;
> +         sp_adjust_rtx_index -= 2;
> +         continue;
> +       }
> +
> +      if (GET_CODE (elt) != SET)
> +       return false;
> +    }
> +
> +  elt = XVECEXP (op, 0, sp_adjust_rtx_index);
> +  elt_reg  = SET_DEST (elt);
> +  elt_plus = SET_SRC (elt);
> +
> +  /* Check this is (set (stack_reg) (plus stack_reg const)) pattern.  */
> +  if (GET_CODE (elt_plus) != PLUS
> +      || !riscv_check_regno (elt_reg, STACK_POINTER_REGNUM))
> +    return false;
> +
> +  /* Pass all test, this is a valid rtx.  */
> +  return true;
> +}
> +
> +/* Generate push/pop rtx */
> +
> +static void
> +riscv_emit_push_insn (struct riscv_frame_info *frame, HOST_WIDE_INT size)
> +{
> +  unsigned int veclen = riscv_save_push_pop_count (frame->mask);
> +  unsigned int n_reg = veclen - 1;

Need comment to explain why `- 1` here.

> +  rtvec vec = rtvec_alloc (veclen);
> +
> +  const unsigned *reg_order = (TARGET_ZCMP && TARGET_RVE)
> +       ? push_save_reg_order_zcmpe
> +       : push_save_reg_order;
> +
> +  int aligned_size = (size + 15) & (~0xf);

Use ROUND_UP

> +
> +  gcc_assert (n_reg >= 1
> +       && TARGET_ZCMP
> +       && ((TARGET_RVE && (n_reg <= ARRAY_SIZE (push_save_reg_order_zcmpe)))
> +           || (TARGET_ZCMP && (n_reg <= ARRAY_SIZE (push_save_reg_order)))));
> +
> +  /* sp adjust pattern */
> +  int max_allow_sp_adjust = riscv_push_pop_base_sp_adjust (frame->mask) + 48;

What's the magic number of 48?

> +  int sp_adjust = aligned_size > max_allow_sp_adjust ?
> +      max_allow_sp_adjust
> +      : aligned_size;
> +
> +  /*TODO: move this part to frame computation function. */

Is it possible to resolve this TODO?

> +  frame->gp_sp_offset = (veclen - 1) * UNITS_PER_WORD;
> +  frame->push_pop_sp_adjust = sp_adjust;

> diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
> index d05b1d59853..6e6e3ee2c25 100644
> --- a/gcc/config/riscv/riscv.h
> +++ b/gcc/config/riscv/riscv.h
> @@ -383,6 +383,7 @@ ASM_MISA_SPEC
>  #define HARD_FRAME_POINTER_REGNUM 8
>  #define STACK_POINTER_REGNUM 2
>  #define THREAD_POINTER_REGNUM 4
> +#define RETURN_VALUE_REGNUM 10
>
>  /* These two registers don't really exist: they get eliminated to either
>     the stack or hard frame pointer.  */
> @@ -1097,4 +1098,7 @@ extern void riscv_remove_unneeded_save_restore_calls (void);
>  #define DWARF_REG_TO_UNWIND_COLUMN(REGNO) \
>    ((REGNO == RISCV_DWARF_VLENB) ? (FIRST_PSEUDO_REGISTER + 1) : REGNO)
>
> +#define RISCV_ZCE_PUSH_POP_MASK 0x0ffc0302u

RISCV_ZCE_PUSH_POP_MASK -> RISCV_ZCMP_PUSH_POP_MASK


> +#define RISCV_ZCMPE_PUSH_POP_MASK 0x302u

RISCV_ZCMPE_PUSH_POP_MASK -> RISCV_ZCMP_RVE_PUSH_POP_MASK

> diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
> index 6e326fc7e02..9ef522306a5 100644
> --- a/gcc/config/riscv/t-riscv
> +++ b/gcc/config/riscv/t-riscv
> @@ -90,6 +90,10 @@ riscv-v.o: $(srcdir)/config/riscv/riscv-v.cc \
>         $(COMPILE) $<
>         $(POSTCOMPILE)
>
> +riscv-zcmp-popret.o: $(srcdir)/config/riscv/riscv-zcmp-popret.cc

Plz add right dependency here.

> diff --git a/gcc/config/riscv/zc.md b/gcc/config/riscv/zc.md
> new file mode 100644
> index 00000000000..3ad34dacd49
> --- /dev/null
> +++ b/gcc/config/riscv/zc.md
> @@ -0,0 +1,47 @@
> +;; Machine description for ZCE extension.

ZCE extension. -> Zc* extension

  reply	other threads:[~2023-05-04  9:04 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-06  6:21 [PATCH 0/5] RISC-V: Support ZC* extensions Jiawei
2023-04-06  6:21 ` [PATCH 1/5] RISC-V: Minimal support for ZC extensions Jiawei
2023-05-04  8:33   ` Kito Cheng
2023-04-06  6:21 ` [PATCH 2/5] RISC-V: Enable compressible features when use ZC* extensions Jiawei
2023-04-06  6:21 ` [PATCH 3/5] RISC-V: Add ZC* test for march args being passed Jiawei
2023-05-04  8:37   ` Kito Cheng
2023-04-06  6:21 ` [PATCH 4/5] RISC-V: Add Zcmp extension supports Jiawei
2023-05-04  9:03   ` Kito Cheng [this message]
     [not found]   ` <07720619-dd69-4816-987e-ff0e14d9a348.>
2023-05-12  8:53     ` Sinan
2023-04-06  6:21 ` [PATCH 5/5] RISC-V: Add ZCMP push/pop testcases Jiawei
     [not found] <2023042517370879865929@eswincomputing.com>
2023-04-25  9:52 ` [PATCH 4/5] RISC-V: Add Zcmp extension supports Fei Gao
2023-04-25 10:11 Fei Gao
2023-05-05 15:57 ` Sinan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CA+yXCZD_BNz-OqqYvZ=eC70TE0d-nf4440B+SrfNMKc_iPZwZQ@mail.gmail.com' \
    --to=kito.cheng@gmail.com \
    --cc=charlie.keaney@embecosm.com \
    --cc=chenyixuan@iscas.ac.cn \
    --cc=christoph.muellner@vrull.eu \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=ibrahim.abu.kharmeh1@huawei.com \
    --cc=jeremy.bennett@embecosm.com \
    --cc=jiawei@iscas.ac.cn \
    --cc=kito.cheng@sifive.com \
    --cc=mary.bennett@embecosm.com \
    --cc=nandni.jamnadas@embecosm.com \
    --cc=palmer@dabbelt.com \
    --cc=shihua@iscas.ac.cn \
    --cc=shiyulong@iscas.ac.cn \
    --cc=simon.cook@embecosm.com \
    --cc=sinan.lin@linux.alibaba.com \
    --cc=tariq.kurd@codasip.com \
    --cc=wuwei2016@iscas.ac.cn \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).