From: Jiufu Guo <guojiufu@linux.ibm.com>
To: gcc-patches@gcc.gnu.org
Cc: segher@kernel.crashing.org, dje.gcc@gmail.com, linkw@gcc.gnu.org,
guojiufu@linux.ibm.com, rguenther@suse.de, jeffreyalaw@gmail.com
Subject: [PATCH V2] Update block move for struct param or returns
Date: Thu, 24 Nov 2022 17:41:48 +0800 [thread overview]
Message-ID: <20221124094148.125303-1-guojiufu@linux.ibm.com> (raw)
Hi,
When assigning a parameter to a variable, or assigning a variable to
return value with struct type, "block move" are used to expand
the assignment. It would be better to use the register mode according
to the target/ABI to move the blocks. And then this would raise more
opportunities for other optimization passes(cse/dse/xprop).
As the example code (like code in PR65421):
typedef struct SA {double a[3];} A;
A ret_arg_pt (A *a){return *a;} // on ppc64le, only 3 lfd(s)
A ret_arg (A a) {return a;} // just empty fun body
void st_arg (A a, A *p) {*p = a;} //only 3 stfd(s)
This patch is based on the previous version which supports assignments
from parameter:
https://gcc.gnu.org/pipermail/gcc-patches/2022-November/605709.html
This patch also supports returns.
I also tried to update gimplify/nrv to replace "return D.xxx;" with
"return <retval>;". While there is one issue: "<retval>" with
PARALLEL code can not be accessed through address/component_ref.
This issue blocks a few passes (e.g. sra, expand).
On ppc64, some dead stores are not eliminated. e.g. for ret_arg:
.cfi_startproc
std 4,56(1)//reductant
std 5,64(1)//reductant
std 6,72(1)//reductant
std 4,0(3)
std 5,8(3)
std 6,16(3)
blr
Bootstraped and regtested on ppc64le and x86_64.
I'm wondering if this patch could be committed first.
Thanks for the comments and suggestions.
BR,
Jeff (Jiufu)
PR target/65421
gcc/ChangeLog:
* cfgexpand.cc (expand_used_vars): Add collecting return VARs.
(expand_gimple_stmt_1): Call expand_special_struct_assignment.
(pass_expand::execute): Free collections of return VARs.
* expr.cc (expand_special_struct_assignment): New function.
* expr.h (expand_special_struct_assignment): Declare.
gcc/testsuite/ChangeLog:
* gcc.target/powerpc/pr65421-1.c: New test.
* gcc.target/powerpc/pr65421.c: New test.
---
gcc/cfgexpand.cc | 37 +++++++++++++++++
gcc/expr.cc | 43 ++++++++++++++++++++
gcc/expr.h | 3 ++
gcc/testsuite/gcc.target/powerpc/pr65421-1.c | 21 ++++++++++
gcc/testsuite/gcc.target/powerpc/pr65421.c | 19 +++++++++
5 files changed, 123 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421-1.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421.c
diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index dd29ffffc03..f185de39341 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -341,6 +341,9 @@ static hash_map<tree, size_t> *decl_to_stack_part;
all of them in one big sweep. */
static bitmap_obstack stack_var_bitmap_obstack;
+/* Those VARs on returns. */
+static bitmap return_vars;
+
/* An array of indices such that stack_vars[stack_vars_sorted[i]].size
is non-decreasing. */
static size_t *stack_vars_sorted;
@@ -2158,6 +2161,24 @@ expand_used_vars (bitmap forced_stack_vars)
frame_phase = off ? align - off : 0;
}
+ /* Collect VARs on returns. */
+ return_vars = NULL;
+ if (DECL_RESULT (current_function_decl)
+ && TYPE_MODE (TREE_TYPE (DECL_RESULT (current_function_decl))) == BLKmode)
+ {
+ return_vars = BITMAP_ALLOC (NULL);
+
+ edge_iterator ei;
+ edge e;
+ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
+ if (greturn *ret = safe_dyn_cast<greturn *> (last_stmt (e->src)))
+ {
+ tree val = gimple_return_retval (ret);
+ if (val && VAR_P (val))
+ bitmap_set_bit (return_vars, DECL_UID (val));
+ }
+ }
+
/* Set TREE_USED on all variables in the local_decls. */
FOR_EACH_LOCAL_DECL (cfun, i, var)
TREE_USED (var) = 1;
@@ -3942,6 +3963,17 @@ expand_gimple_stmt_1 (gimple *stmt)
/* This is a clobber to mark the going out of scope for
this LHS. */
expand_clobber (lhs);
+ else if ((TREE_CODE (rhs) == PARM_DECL && DECL_INCOMING_RTL (rhs)
+ && TYPE_MODE (TREE_TYPE (rhs)) == BLKmode
+ && (GET_CODE (DECL_INCOMING_RTL (rhs)) == PARALLEL
+ || REG_P (DECL_INCOMING_RTL (rhs))))
+ || (VAR_P (lhs) && return_vars
+ && DECL_RTL_SET_P (DECL_RESULT (current_function_decl))
+ && GET_CODE (
+ DECL_RTL (DECL_RESULT (current_function_decl)))
+ == PARALLEL
+ && bitmap_bit_p (return_vars, DECL_UID (lhs))))
+ expand_special_struct_assignment (lhs, rhs);
else
expand_assignment (lhs, rhs,
gimple_assign_nontemporal_move_p (
@@ -7025,6 +7057,11 @@ pass_expand::execute (function *fun)
/* After expanding, the return labels are no longer needed. */
return_label = NULL;
naked_return_label = NULL;
+ if (return_vars)
+ {
+ BITMAP_FREE (return_vars);
+ return_vars = NULL;
+ }
/* After expanding, the tm_restart map is no longer needed. */
if (fun->gimple_df->tm_restart)
diff --git a/gcc/expr.cc b/gcc/expr.cc
index d9407432ea5..6ffd9439188 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -5559,6 +5559,49 @@ mem_ref_refers_to_non_mem_p (tree ref)
return non_mem_decl_p (base);
}
+/* Expand the assignment from parameter or to returns if it needs
+ "block move" on struct type. */
+
+void
+expand_special_struct_assignment (tree to, tree from)
+{
+ rtx result;
+
+ push_temp_slots ();
+ rtx par_ret = TREE_CODE (from) == PARM_DECL
+ ? DECL_INCOMING_RTL (from)
+ : DECL_RTL (DECL_RESULT (current_function_decl));
+ machine_mode mode = GET_CODE (par_ret) == PARALLEL
+ ? GET_MODE (XEXP (XVECEXP (par_ret, 0, 0), 0))
+ : word_mode;
+ int mode_size = GET_MODE_SIZE (mode).to_constant ();
+ int size = INTVAL (expr_size (from));
+ rtx to_rtx = expand_expr (to, NULL_RTX, VOIDmode, EXPAND_WRITE);
+
+ /* Here using a heurisitic number for how many words may pass via gprs. */
+ int hurstc_num = 8;
+ if (size < mode_size || (size % mode_size) != 0
+ || (GET_CODE (par_ret) != PARALLEL && size > (mode_size * hurstc_num)))
+ result = store_expr (from, to_rtx, 0, false, false);
+ else
+ {
+ rtx from_rtx
+ = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL);
+ for (int i = 0; i < size / mode_size; i++)
+ {
+ rtx temp = gen_reg_rtx (mode);
+ rtx src = adjust_address (from_rtx, mode, mode_size * i);
+ rtx dest = adjust_address (to_rtx, mode, mode_size * i);
+ emit_move_insn (temp, src);
+ emit_move_insn (dest, temp);
+ }
+ result = to_rtx;
+ }
+
+ preserve_temp_slots (result);
+ pop_temp_slots ();
+}
+
/* Expand an assignment that stores the value of FROM into TO. If NONTEMPORAL
is true, try generating a nontemporal store. */
diff --git a/gcc/expr.h b/gcc/expr.h
index 08b59b8d869..10527f23a56 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -281,6 +281,9 @@ extern void get_bit_range (poly_uint64_pod *, poly_uint64_pod *, tree,
/* Expand an assignment that stores the value of FROM into TO. */
extern void expand_assignment (tree, tree, bool);
+/* Expand an assignment from parameters or to returns. */
+extern void expand_special_struct_assignment (tree, tree);
+
/* Generate code for computing expression EXP,
and storing the value into TARGET.
If SUGGEST_REG is nonzero, copy the value through a register
diff --git a/gcc/testsuite/gcc.target/powerpc/pr65421-1.c b/gcc/testsuite/gcc.target/powerpc/pr65421-1.c
new file mode 100644
index 00000000000..f55a0fe0002
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr65421-1.c
@@ -0,0 +1,21 @@
+/* PR target/65421 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -m64" } */
+
+typedef struct SA
+{
+ double a[3];
+ long l;
+} A;
+
+A ret_arg_pt (A *a){return *a;}
+
+A ret_arg (A a) {return a;}
+
+void st_arg (A a, A *p) {*p = a;}
+
+/* { dg-final { scan-assembler-times {\mlxvd2x\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxvd2x\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstd\M} 8 } } */
+/* { dg-final { scan-assembler-times {\mblr\M} 3 } } */
+/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 16 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr65421.c b/gcc/testsuite/gcc.target/powerpc/pr65421.c
new file mode 100644
index 00000000000..26e85468470
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr65421.c
@@ -0,0 +1,19 @@
+/* PR target/65421 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -m64" } */
+
+typedef struct SA
+{
+ double a[3];
+} A;
+
+A ret_arg_pt (A *a){return *a;}
+
+A ret_arg (A a) {return a;}
+
+void st_arg (A a, A *p) {*p = a;}
+
+/* { dg-final { scan-assembler-times {\mlfd\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mstfd\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mblr\M} 3 } } */
+/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 9 } } */
--
2.17.1
next reply other threads:[~2022-11-24 9:41 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-24 9:41 Jiufu Guo [this message]
2022-11-25 5:12 ` Jiufu Guo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221124094148.125303-1-guojiufu@linux.ibm.com \
--to=guojiufu@linux.ibm.com \
--cc=dje.gcc@gmail.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=jeffreyalaw@gmail.com \
--cc=linkw@gcc.gnu.org \
--cc=rguenther@suse.de \
--cc=segher@kernel.crashing.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).