public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Jiufu Guo <guojiufu@linux.ibm.com>
To: gcc-patches@gcc.gnu.org
Cc: segher@kernel.crashing.org, dje.gcc@gmail.com, linkw@gcc.gnu.org,
	rguenther@suse.de, jeffreyalaw@gmail.com
Subject: Re: [PATCH V2] Use subscalar mode to move struct block for parameter
Date: Mon, 21 Nov 2022 11:07:05 +0800	[thread overview]
Message-ID: <7ea64lroo6.fsf@pike.rch.stglabs.ibm.com> (raw)
In-Reply-To: <20221117061549.178481-1-guojiufu@linux.ibm.com> (Jiufu Guo's message of "Thu, 17 Nov 2022 14:15:49 +0800")

Jiufu Guo <guojiufu@linux.ibm.com> writes:

> Hi,
>
> As mentioned in the previous version patch:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-October/604646.html
> The suboptimal code is generated for "assigning from parameter" or
> "assigning to return value".
> This patch enhances the assignment from parameters like the below
> cases:
> /////case1.c
> typedef struct SA {double a[3];long l; } A;
> A ret_arg (A a) {return a;}
> void st_arg (A a, A *p) {*p = a;}
>
> ////case2.c
> typedef struct SA {double a[3];} A;
> A ret_arg (A a) {return a;}
> void st_arg (A a, A *p) {*p = a;}
>
> For this patch, bootstrap and regtest pass on ppc64{,le}
> and x86_64.
> * Besides asking for help reviewing this patch, I would like to
> consult comments about enhancing for "assigning to returns".

I updated the patch to fix the issue for returns.  This patch
adds a flag DECL_USEDBY_RETURN_P to indicate if a var is used
by a return stmt.  This patch fix the issue in expand pass only,
so, we would try to update the patch to avoid this flag.

diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index dd29ffffc03..09b8ec64cea 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -2158,6 +2158,20 @@ expand_used_vars (bitmap forced_stack_vars)
     frame_phase = off ? align - off : 0;
   }
 
+  /* Collect VARs on returns.  */
+  if (DECL_RESULT (current_function_decl))
+    {
+      edge_iterator ei;
+      edge e;
+      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
+	if (greturn *ret = safe_dyn_cast<greturn *> (last_stmt (e->src)))
+	  {
+	    tree val = gimple_return_retval (ret);
+	    if (val && VAR_P (val))
+	      DECL_USEDBY_RETURN_P (val) = 1;
+	  }
+    }
+
   /* Set TREE_USED on all variables in the local_decls.  */
   FOR_EACH_LOCAL_DECL (cfun, i, var)
     TREE_USED (var) = 1;
diff --git a/gcc/expr.cc b/gcc/expr.cc
index d9407432ea5..20973649963 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -6045,6 +6045,52 @@ expand_assignment (tree to, tree from, bool nontemporal)
       return;
     }
 
+  if ((TREE_CODE (from) == PARM_DECL && DECL_INCOMING_RTL (from)
+       && TYPE_MODE (TREE_TYPE (from)) == BLKmode
+       && (GET_CODE (DECL_INCOMING_RTL (from)) == PARALLEL
+	   || REG_P (DECL_INCOMING_RTL (from))))
+      || (VAR_P (to) && DECL_USEDBY_RETURN_P (to)
+	  && TYPE_MODE (TREE_TYPE (to)) == BLKmode
+	  && GET_CODE (DECL_RTL (DECL_RESULT (current_function_decl)))
+	       == PARALLEL))
+    {
+      push_temp_slots ();
+      rtx par_ret;
+      machine_mode mode;
+      par_ret = TREE_CODE (from) == PARM_DECL
+		  ? DECL_INCOMING_RTL (from)
+		  : DECL_RTL (DECL_RESULT (current_function_decl));
+      mode = GET_CODE (par_ret) == PARALLEL
+	       ? GET_MODE (XEXP (XVECEXP (par_ret, 0, 0), 0))
+	       : word_mode;
+      int mode_size = GET_MODE_SIZE (mode).to_constant ();
+      int size = INTVAL (expr_size (from));
+
+      /* If/How the parameter using submode, it dependes on the size and
+	 position of the parameter.  Here using heurisitic number.  */
+      int hurstc_num = 8;
+      if (size < mode_size || (size % mode_size) != 0
+	  || size > (mode_size * hurstc_num))
+	result = store_expr (from, to_rtx, 0, nontemporal, false);
+      else
+	{
+	  rtx from_rtx
+	    = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL);
+	  for (int i = 0; i < size / mode_size; i++)
+	    {
+	      rtx temp = gen_reg_rtx (mode);
+	      rtx src = adjust_address (from_rtx, mode, mode_size * i);
+	      rtx dest = adjust_address (to_rtx, mode, mode_size * i);
+	      emit_move_insn (temp, src);
+	      emit_move_insn (dest, temp);
+	    }
+	  result = to_rtx;
+	}
+      preserve_temp_slots (result);
+      pop_temp_slots ();
+      return;
+    }
+
   /* Compute FROM and store the value in the rtx we got.  */
 
   push_temp_slots ();
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index af75522504f..be42e1464de 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -1808,7 +1808,8 @@ struct GTY(()) tree_decl_common {
      In VAR_DECL, PARM_DECL and RESULT_DECL, this is
      DECL_HAS_VALUE_EXPR_P.  */
   unsigned decl_flag_2 : 1;
-  /* In FIELD_DECL, this is DECL_PADDING_P.  */
+  /* In FIELD_DECL, this is DECL_PADDING_P
+     In VAR_DECL, this is DECL_USEDBY_RETURN_P.  */
   unsigned decl_flag_3 : 1;
   /* Logically, these two would go in a theoretical base shared by var and
      parm decl. */
diff --git a/gcc/tree.h b/gcc/tree.h
index a863d2e50e5..73c0314dac1 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -3011,6 +3011,10 @@ extern void decl_value_expr_insert (tree, tree);
 #define DECL_PADDING_P(NODE) \
   (FIELD_DECL_CHECK (NODE)->decl_common.decl_flag_3)
 
+/* Used in a VAR_DECL to indicate that it is used by a return stmt.  */
+#define DECL_USEDBY_RETURN_P(NODE) \
+  (VAR_DECL_CHECK (NODE)->decl_common.decl_flag_3)
+
 /* Used in a FIELD_DECL to indicate whether this field is not a flexible
    array member. This is only valid for the last array type field of a
    structure.  */

>
> On some targets(ppc64), for below case:
> ////case3.c
> typedef struct SA {double a[3]; long l; } A;
> A ret_arg_pt (A *a) {return *a;}
>
> The optimized GIMPLE code looks like:
>   <retval> = *a_2(D);
>   return <retval>;
> Here, <retval>(aka. RESULT_DECL) is MEM, and "aggregate_value_p"
> returns true for <retval>.
>
> * While for below case, the generated code is still suboptimal.
> ////case4.c
> typedef struct SA {double a[3];} A;
> A ret_arg_pt (A *a) {return *a;}
>
> The optimized GIMPLE code looks like:
>   D.3951 = *a_2(D);
>   return D.3951;
> The "return/assign" stmts are using D.3951(VAR_DECL) instead
> "<retval>(RESULT_DECL)".  The mode of D.3951/<retval> is BLK.
> The RTL of D.3951 is MEM, and RTL of <retval> is PARALLEL. For
> PARALLEL, aggregate_value_p returns false.
>
> In function expand_assignment, there is code:
>   if (TREE_CODE (to) == RESULT_DECL
>       && (REG_P (to_rtx) || GET_CODE (to_rtx) == PARALLEL))
> This code can handle "<retval>", but can not handle "D.3951".
>
> I'm thinking of one way to handle this issue is to update the
> GIMPLE sequence as: "<retval> = *a_2(D); return <retval>;"
> Or, collecting VARs which are used by return stmts; and for
> assignments to those VARs, using sub scalar mode for the block
> move.
>
> Thanks for any comments and suggestions!
>
>
> BR,
> Jeff (Jiufu)
>
> ---
>  gcc/expr.cc | 40 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 40 insertions(+)
>
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index d9407432ea5..420f9cf3662 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -6045,6 +6045,46 @@ expand_assignment (tree to, tree from, bool nontemporal)
>        return;
>      }
>  
> +  if (TREE_CODE (from) == PARM_DECL && DECL_INCOMING_RTL (from)
> +      && TYPE_MODE (TREE_TYPE (from)) == BLKmode
> +      && (GET_CODE (DECL_INCOMING_RTL (from)) == PARALLEL
> +	  || REG_P (DECL_INCOMING_RTL (from))))
> +    {
> +      rtx parm = DECL_INCOMING_RTL (from);
> +
> +      push_temp_slots ();
> +      machine_mode mode;
> +      mode = GET_CODE (parm) == PARALLEL
> +	       ? GET_MODE (XEXP (XVECEXP (parm, 0, 0), 0))
> +	       : word_mode;
> +      int mode_size = GET_MODE_SIZE (mode).to_constant ();
> +      int size = INTVAL (expr_size (from));
> +
> +      /* If/How the parameter using submode, it dependes on the size and
> +	 position of the parameter.  Here using heurisitic number.  */
> +      int hurstc_num = 8;
> +      if (size < mode_size || (size % mode_size) != 0
> +	  || size > (mode_size * hurstc_num))
> +	result = store_expr (from, to_rtx, 0, nontemporal, false);
> +      else
> +	{
> +	  rtx from_rtx
> +	    = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL);
> +	  for (int i = 0; i < size / mode_size; i++)
> +	    {
> +	      rtx temp = gen_reg_rtx (mode);
> +	      rtx src = adjust_address (from_rtx, mode, mode_size * i);
> +	      rtx dest = adjust_address (to_rtx, mode, mode_size * i);
> +	      emit_move_insn (temp, src);
> +	      emit_move_insn (dest, temp);
> +	    }
> +	  result = to_rtx;
> +	}
> +      preserve_temp_slots (result);
> +      pop_temp_slots ();
> +      return;
> +    }
> +
>    /* Compute FROM and store the value in the rtx we got.  */
>  
>    push_temp_slots ();

  reply	other threads:[~2022-11-21  3:07 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-17  6:15 Jiufu Guo
2022-11-21  3:07 ` Jiufu Guo [this message]
2022-11-22 21:57   ` Jeff Law
2022-11-23  2:58     ` Jiufu Guo
2022-11-24  7:31       ` Richard Biener
2022-11-25  5:05         ` Jiufu Guo
2022-11-25 12:29           ` Jiufu Guo
2022-11-28 17:00       ` Jeff Law
2022-11-29  3:53         ` Jiufu Guo
2022-12-05 16:48           ` Jeff Law
2022-12-06  2:36             ` Jiufu Guo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7ea64lroo6.fsf@pike.rch.stglabs.ibm.com \
    --to=guojiufu@linux.ibm.com \
    --cc=dje.gcc@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=jeffreyalaw@gmail.com \
    --cc=linkw@gcc.gnu.org \
    --cc=rguenther@suse.de \
    --cc=segher@kernel.crashing.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).