* [PATCH] RISC-V: Enable overlap-by-pieces via tune param
@ 2021-07-21 23:32 Christoph Muellner
2021-07-22 8:52 ` Kito Cheng
0 siblings, 1 reply; 6+ messages in thread
From: Christoph Muellner @ 2021-07-21 23:32 UTC (permalink / raw)
To: gcc-patches; +Cc: Jim Wilson, Kito Cheng, Christoph Muellner
This patch adds the field overlap_op_by_pieces to the struct
riscv_tune_param, which allows to enable the overlap_op_by_pieces
feature of the by-pieces infrastructure.
gcc/ChangeLog:
* config/riscv/riscv.c (struct riscv_tune_param): New field.
(riscv_overlap_op_by_pieces): New function.
(TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
riscv_overlap_op_by_pieces.
Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>
---
gcc/config/riscv/riscv.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 576960bb37c..824e930ef05 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -220,6 +220,7 @@ struct riscv_tune_param
unsigned short branch_cost;
unsigned short memory_cost;
bool slow_unaligned_access;
+ bool overlap_op_by_pieces;
};
/* Information about one micro-arch we know about. */
@@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {
3, /* branch_cost */
5, /* memory_cost */
true, /* slow_unaligned_access */
+ false, /* overlap_op_by_pieces */
};
/* Costs to use when optimizing for Sifive 7 Series. */
@@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
4, /* branch_cost */
3, /* memory_cost */
true, /* slow_unaligned_access */
+ false, /* overlap_op_by_pieces */
};
/* Costs to use when optimizing for T-HEAD c906. */
@@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
3, /* branch_cost */
5, /* memory_cost */
false, /* slow_unaligned_access */
+ false, /* overlap_op_by_pieces */
};
/* Costs to use when optimizing for size. */
@@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
1, /* branch_cost */
2, /* memory_cost */
false, /* slow_unaligned_access */
+ false, /* overlap_op_by_pieces */
};
static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
@@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
return riscv_slow_unaligned_access_p;
}
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+ return tune_param->overlap_op_by_pieces;
+}
+
/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
static bool
@@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)
#undef TARGET_SLOW_UNALIGNED_ACCESS
#define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
#undef TARGET_SECONDARY_MEMORY_NEEDED
#define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
--
2.31.1
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] RISC-V: Enable overlap-by-pieces via tune param
2021-07-21 23:32 [PATCH] RISC-V: Enable overlap-by-pieces via tune param Christoph Muellner
@ 2021-07-22 8:52 ` Kito Cheng
2021-07-22 9:20 ` Christoph Müllner
0 siblings, 1 reply; 6+ messages in thread
From: Kito Cheng @ 2021-07-22 8:52 UTC (permalink / raw)
To: Christoph Muellner; +Cc: GCC Patches, Kito Cheng
It's my first time seeing this hook :p Did you mind describing when we
need to set it to true?
I mean when a CPU has some feature then we can/should set it to true?
On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> This patch adds the field overlap_op_by_pieces to the struct
> riscv_tune_param, which allows to enable the overlap_op_by_pieces
> feature of the by-pieces infrastructure.
>
> gcc/ChangeLog:
>
> * config/riscv/riscv.c (struct riscv_tune_param): New field.
> (riscv_overlap_op_by_pieces): New function.
> (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
> riscv_overlap_op_by_pieces.
>
> Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>
> ---
> gcc/config/riscv/riscv.c | 14 ++++++++++++++
> 1 file changed, 14 insertions(+)
>
> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> index 576960bb37c..824e930ef05 100644
> --- a/gcc/config/riscv/riscv.c
> +++ b/gcc/config/riscv/riscv.c
> @@ -220,6 +220,7 @@ struct riscv_tune_param
> unsigned short branch_cost;
> unsigned short memory_cost;
> bool slow_unaligned_access;
> + bool overlap_op_by_pieces;
> };
>
> /* Information about one micro-arch we know about. */
> @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {
> 3, /* branch_cost */
> 5, /* memory_cost */
> true, /* slow_unaligned_access */
> + false, /* overlap_op_by_pieces */
> };
>
> /* Costs to use when optimizing for Sifive 7 Series. */
> @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
> 4, /* branch_cost */
> 3, /* memory_cost */
> true, /* slow_unaligned_access */
> + false, /* overlap_op_by_pieces */
> };
>
> /* Costs to use when optimizing for T-HEAD c906. */
> @@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
> 3, /* branch_cost */
> 5, /* memory_cost */
> false, /* slow_unaligned_access */
> + false, /* overlap_op_by_pieces */
> };
>
> /* Costs to use when optimizing for size. */
> @@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
> 1, /* branch_cost */
> 2, /* memory_cost */
> false, /* slow_unaligned_access */
> + false, /* overlap_op_by_pieces */
> };
>
> static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
> @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
> return riscv_slow_unaligned_access_p;
> }
>
> +static bool
> +riscv_overlap_op_by_pieces (void)
> +{
> + return tune_param->overlap_op_by_pieces;
> +}
> +
> /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
>
> static bool
> @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)
> #undef TARGET_SLOW_UNALIGNED_ACCESS
> #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
>
> +#undef TARGET_OVERLAP_OP_BY_PIECES_P
> +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
> +
> #undef TARGET_SECONDARY_MEMORY_NEEDED
> #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
>
> --
> 2.31.1
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] RISC-V: Enable overlap-by-pieces via tune param
2021-07-22 8:52 ` Kito Cheng
@ 2021-07-22 9:20 ` Christoph Müllner
2021-07-22 9:28 ` Kito Cheng
0 siblings, 1 reply; 6+ messages in thread
From: Christoph Müllner @ 2021-07-22 9:20 UTC (permalink / raw)
To: Kito Cheng; +Cc: GCC Patches, Kito Cheng
On Thu, Jul 22, 2021 at 10:53 AM Kito Cheng <kito.cheng@gmail.com> wrote:
>
> It's my first time seeing this hook :p Did you mind describing when we
> need to set it to true?
> I mean when a CPU has some feature then we can/should set it to true?
The by-pieces infrastructure allows to inline builtins quite well and
uses slow_unaligned_access and overlap_op_by_pieces to tune the
emitted instruction sequence.
In case unaligned accesses are fast, then overlap_op_by_pieces can reduce
the number of instructions (emitted by by-pieces for e.g. memset) for the cost
of overlapping memory accesses.
E.g. you want to clear 15-bytes with memset (and cheap unaligned accesses):
Without overlap_op_by_pieces you will get:
8e: 00053023 sd zero,0(a0)
92: 00052423 sw zero,8(a0)
96: 00051623 sh zero,12(a0)
9a: 00050723 sb zero,14(a0)
With overlap_op_by_pieces you will get:
7e: 00053023 sd zero,0(a0)
82: 000533a3 sd zero,7(a0)
BR
Christoph
>
>
> On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > This patch adds the field overlap_op_by_pieces to the struct
> > riscv_tune_param, which allows to enable the overlap_op_by_pieces
> > feature of the by-pieces infrastructure.
> >
> > gcc/ChangeLog:
> >
> > * config/riscv/riscv.c (struct riscv_tune_param): New field.
> > (riscv_overlap_op_by_pieces): New function.
> > (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
> > riscv_overlap_op_by_pieces.
> >
> > Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>
> > ---
> > gcc/config/riscv/riscv.c | 14 ++++++++++++++
> > 1 file changed, 14 insertions(+)
> >
> > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> > index 576960bb37c..824e930ef05 100644
> > --- a/gcc/config/riscv/riscv.c
> > +++ b/gcc/config/riscv/riscv.c
> > @@ -220,6 +220,7 @@ struct riscv_tune_param
> > unsigned short branch_cost;
> > unsigned short memory_cost;
> > bool slow_unaligned_access;
> > + bool overlap_op_by_pieces;
> > };
> >
> > /* Information about one micro-arch we know about. */
> > @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {
> > 3, /* branch_cost */
> > 5, /* memory_cost */
> > true, /* slow_unaligned_access */
> > + false, /* overlap_op_by_pieces */
> > };
> >
> > /* Costs to use when optimizing for Sifive 7 Series. */
> > @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
> > 4, /* branch_cost */
> > 3, /* memory_cost */
> > true, /* slow_unaligned_access */
> > + false, /* overlap_op_by_pieces */
> > };
> >
> > /* Costs to use when optimizing for T-HEAD c906. */
> > @@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
> > 3, /* branch_cost */
> > 5, /* memory_cost */
> > false, /* slow_unaligned_access */
> > + false, /* overlap_op_by_pieces */
> > };
> >
> > /* Costs to use when optimizing for size. */
> > @@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
> > 1, /* branch_cost */
> > 2, /* memory_cost */
> > false, /* slow_unaligned_access */
> > + false, /* overlap_op_by_pieces */
> > };
> >
> > static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
> > @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
> > return riscv_slow_unaligned_access_p;
> > }
> >
> > +static bool
> > +riscv_overlap_op_by_pieces (void)
> > +{
> > + return tune_param->overlap_op_by_pieces;
> > +}
> > +
> > /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
> >
> > static bool
> > @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)
> > #undef TARGET_SLOW_UNALIGNED_ACCESS
> > #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
> >
> > +#undef TARGET_OVERLAP_OP_BY_PIECES_P
> > +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
> > +
> > #undef TARGET_SECONDARY_MEMORY_NEEDED
> > #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
> >
> > --
> > 2.31.1
> >
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] RISC-V: Enable overlap-by-pieces via tune param
2021-07-22 9:20 ` Christoph Müllner
@ 2021-07-22 9:28 ` Kito Cheng
2021-07-22 12:27 ` Christoph Müllner
0 siblings, 1 reply; 6+ messages in thread
From: Kito Cheng @ 2021-07-22 9:28 UTC (permalink / raw)
To: Christoph Müllner; +Cc: Kito Cheng, GCC Patches, Jim Wilson
Sounds like we could just use !tune_param->slow_unaligned_access for
TARGET_OVERLAP_OP_BY_PIECES_P?
since it improves both performance and code size if we have cheap
unaligned accesses.
On Thu, Jul 22, 2021 at 5:23 PM Christoph Müllner via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Thu, Jul 22, 2021 at 10:53 AM Kito Cheng <kito.cheng@gmail.com> wrote:
> >
> > It's my first time seeing this hook :p Did you mind describing when we
> > need to set it to true?
> > I mean when a CPU has some feature then we can/should set it to true?
>
> The by-pieces infrastructure allows to inline builtins quite well and
> uses slow_unaligned_access and overlap_op_by_pieces to tune the
> emitted instruction sequence.
>
> In case unaligned accesses are fast, then overlap_op_by_pieces can reduce
> the number of instructions (emitted by by-pieces for e.g. memset) for the cost
> of overlapping memory accesses.
>
> E.g. you want to clear 15-bytes with memset (and cheap unaligned accesses):
> Without overlap_op_by_pieces you will get:
> 8e: 00053023 sd zero,0(a0)
> 92: 00052423 sw zero,8(a0)
> 96: 00051623 sh zero,12(a0)
> 9a: 00050723 sb zero,14(a0)
> With overlap_op_by_pieces you will get:
> 7e: 00053023 sd zero,0(a0)
> 82: 000533a3 sd zero,7(a0)
>
> BR
> Christoph
>
> >
> >
> > On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > This patch adds the field overlap_op_by_pieces to the struct
> > > riscv_tune_param, which allows to enable the overlap_op_by_pieces
> > > feature of the by-pieces infrastructure.
> > >
> > > gcc/ChangeLog:
> > >
> > > * config/riscv/riscv.c (struct riscv_tune_param): New field.
> > > (riscv_overlap_op_by_pieces): New function.
> > > (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
> > > riscv_overlap_op_by_pieces.
> > >
> > > Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>
> > > ---
> > > gcc/config/riscv/riscv.c | 14 ++++++++++++++
> > > 1 file changed, 14 insertions(+)
> > >
> > > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> > > index 576960bb37c..824e930ef05 100644
> > > --- a/gcc/config/riscv/riscv.c
> > > +++ b/gcc/config/riscv/riscv.c
> > > @@ -220,6 +220,7 @@ struct riscv_tune_param
> > > unsigned short branch_cost;
> > > unsigned short memory_cost;
> > > bool slow_unaligned_access;
> > > + bool overlap_op_by_pieces;
> > > };
> > >
> > > /* Information about one micro-arch we know about. */
> > > @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {
> > > 3, /* branch_cost */
> > > 5, /* memory_cost */
> > > true, /* slow_unaligned_access */
> > > + false, /* overlap_op_by_pieces */
> > > };
> > >
> > > /* Costs to use when optimizing for Sifive 7 Series. */
> > > @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
> > > 4, /* branch_cost */
> > > 3, /* memory_cost */
> > > true, /* slow_unaligned_access */
> > > + false, /* overlap_op_by_pieces */
> > > };
> > >
> > > /* Costs to use when optimizing for T-HEAD c906. */
> > > @@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
> > > 3, /* branch_cost */
> > > 5, /* memory_cost */
> > > false, /* slow_unaligned_access */
> > > + false, /* overlap_op_by_pieces */
> > > };
> > >
> > > /* Costs to use when optimizing for size. */
> > > @@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
> > > 1, /* branch_cost */
> > > 2, /* memory_cost */
> > > false, /* slow_unaligned_access */
> > > + false, /* overlap_op_by_pieces */
> > > };
> > >
> > > static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
> > > @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
> > > return riscv_slow_unaligned_access_p;
> > > }
> > >
> > > +static bool
> > > +riscv_overlap_op_by_pieces (void)
> > > +{
> > > + return tune_param->overlap_op_by_pieces;
> > > +}
> > > +
> > > /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
> > >
> > > static bool
> > > @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)
> > > #undef TARGET_SLOW_UNALIGNED_ACCESS
> > > #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
> > >
> > > +#undef TARGET_OVERLAP_OP_BY_PIECES_P
> > > +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
> > > +
> > > #undef TARGET_SECONDARY_MEMORY_NEEDED
> > > #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
> > >
> > > --
> > > 2.31.1
> > >
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] RISC-V: Enable overlap-by-pieces via tune param
2021-07-22 9:28 ` Kito Cheng
@ 2021-07-22 12:27 ` Christoph Müllner
2021-07-22 12:54 ` Christoph Müllner
0 siblings, 1 reply; 6+ messages in thread
From: Christoph Müllner @ 2021-07-22 12:27 UTC (permalink / raw)
To: Kito Cheng; +Cc: Kito Cheng, GCC Patches, Jim Wilson
On Thu, Jul 22, 2021 at 11:29 AM Kito Cheng <kito.cheng@gmail.com> wrote:
>
> Sounds like we could just use !tune_param->slow_unaligned_access for
> TARGET_OVERLAP_OP_BY_PIECES_P?
> since it improves both performance and code size if we have cheap
> unaligned accesses.
Fine for me as well.
I'll prepare a v2, that uses enables overlap_op_by_pieces if
slow_unaligned_access==false.
>
> On Thu, Jul 22, 2021 at 5:23 PM Christoph Müllner via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Thu, Jul 22, 2021 at 10:53 AM Kito Cheng <kito.cheng@gmail.com> wrote:
> > >
> > > It's my first time seeing this hook :p Did you mind describing when we
> > > need to set it to true?
> > > I mean when a CPU has some feature then we can/should set it to true?
> >
> > The by-pieces infrastructure allows to inline builtins quite well and
> > uses slow_unaligned_access and overlap_op_by_pieces to tune the
> > emitted instruction sequence.
> >
> > In case unaligned accesses are fast, then overlap_op_by_pieces can reduce
> > the number of instructions (emitted by by-pieces for e.g. memset) for the cost
> > of overlapping memory accesses.
> >
> > E.g. you want to clear 15-bytes with memset (and cheap unaligned accesses):
> > Without overlap_op_by_pieces you will get:
> > 8e: 00053023 sd zero,0(a0)
> > 92: 00052423 sw zero,8(a0)
> > 96: 00051623 sh zero,12(a0)
> > 9a: 00050723 sb zero,14(a0)
> > With overlap_op_by_pieces you will get:
> > 7e: 00053023 sd zero,0(a0)
> > 82: 000533a3 sd zero,7(a0)
> >
> > BR
> > Christoph
> >
> > >
> > >
> > > On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches
> > > <gcc-patches@gcc.gnu.org> wrote:
> > > >
> > > > This patch adds the field overlap_op_by_pieces to the struct
> > > > riscv_tune_param, which allows to enable the overlap_op_by_pieces
> > > > feature of the by-pieces infrastructure.
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > > * config/riscv/riscv.c (struct riscv_tune_param): New field.
> > > > (riscv_overlap_op_by_pieces): New function.
> > > > (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
> > > > riscv_overlap_op_by_pieces.
> > > >
> > > > Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>
> > > > ---
> > > > gcc/config/riscv/riscv.c | 14 ++++++++++++++
> > > > 1 file changed, 14 insertions(+)
> > > >
> > > > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> > > > index 576960bb37c..824e930ef05 100644
> > > > --- a/gcc/config/riscv/riscv.c
> > > > +++ b/gcc/config/riscv/riscv.c
> > > > @@ -220,6 +220,7 @@ struct riscv_tune_param
> > > > unsigned short branch_cost;
> > > > unsigned short memory_cost;
> > > > bool slow_unaligned_access;
> > > > + bool overlap_op_by_pieces;
> > > > };
> > > >
> > > > /* Information about one micro-arch we know about. */
> > > > @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {
> > > > 3, /* branch_cost */
> > > > 5, /* memory_cost */
> > > > true, /* slow_unaligned_access */
> > > > + false, /* overlap_op_by_pieces */
> > > > };
> > > >
> > > > /* Costs to use when optimizing for Sifive 7 Series. */
> > > > @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
> > > > 4, /* branch_cost */
> > > > 3, /* memory_cost */
> > > > true, /* slow_unaligned_access */
> > > > + false, /* overlap_op_by_pieces */
> > > > };
> > > >
> > > > /* Costs to use when optimizing for T-HEAD c906. */
> > > > @@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
> > > > 3, /* branch_cost */
> > > > 5, /* memory_cost */
> > > > false, /* slow_unaligned_access */
> > > > + false, /* overlap_op_by_pieces */
> > > > };
> > > >
> > > > /* Costs to use when optimizing for size. */
> > > > @@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
> > > > 1, /* branch_cost */
> > > > 2, /* memory_cost */
> > > > false, /* slow_unaligned_access */
> > > > + false, /* overlap_op_by_pieces */
> > > > };
> > > >
> > > > static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
> > > > @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
> > > > return riscv_slow_unaligned_access_p;
> > > > }
> > > >
> > > > +static bool
> > > > +riscv_overlap_op_by_pieces (void)
> > > > +{
> > > > + return tune_param->overlap_op_by_pieces;
> > > > +}
> > > > +
> > > > /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
> > > >
> > > > static bool
> > > > @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)
> > > > #undef TARGET_SLOW_UNALIGNED_ACCESS
> > > > #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
> > > >
> > > > +#undef TARGET_OVERLAP_OP_BY_PIECES_P
> > > > +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
> > > > +
> > > > #undef TARGET_SECONDARY_MEMORY_NEEDED
> > > > #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
> > > >
> > > > --
> > > > 2.31.1
> > > >
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] RISC-V: Enable overlap-by-pieces via tune param
2021-07-22 12:27 ` Christoph Müllner
@ 2021-07-22 12:54 ` Christoph Müllner
0 siblings, 0 replies; 6+ messages in thread
From: Christoph Müllner @ 2021-07-22 12:54 UTC (permalink / raw)
To: Kito Cheng; +Cc: Kito Cheng, GCC Patches, Jim Wilson
On Thu, Jul 22, 2021 at 2:27 PM Christoph Müllner <cmuellner@gcc.gnu.org> wrote:
>
> On Thu, Jul 22, 2021 at 11:29 AM Kito Cheng <kito.cheng@gmail.com> wrote:
> >
> > Sounds like we could just use !tune_param->slow_unaligned_access for
> > TARGET_OVERLAP_OP_BY_PIECES_P?
> > since it improves both performance and code size if we have cheap
> > unaligned accesses.
>
> Fine for me as well.
> I'll prepare a v2, that uses enables overlap_op_by_pieces if
> slow_unaligned_access==false.
The new patch can be found here:
https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575832.html
>
> >
> > On Thu, Jul 22, 2021 at 5:23 PM Christoph Müllner via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > On Thu, Jul 22, 2021 at 10:53 AM Kito Cheng <kito.cheng@gmail.com> wrote:
> > > >
> > > > It's my first time seeing this hook :p Did you mind describing when we
> > > > need to set it to true?
> > > > I mean when a CPU has some feature then we can/should set it to true?
> > >
> > > The by-pieces infrastructure allows to inline builtins quite well and
> > > uses slow_unaligned_access and overlap_op_by_pieces to tune the
> > > emitted instruction sequence.
> > >
> > > In case unaligned accesses are fast, then overlap_op_by_pieces can reduce
> > > the number of instructions (emitted by by-pieces for e.g. memset) for the cost
> > > of overlapping memory accesses.
> > >
> > > E.g. you want to clear 15-bytes with memset (and cheap unaligned accesses):
> > > Without overlap_op_by_pieces you will get:
> > > 8e: 00053023 sd zero,0(a0)
> > > 92: 00052423 sw zero,8(a0)
> > > 96: 00051623 sh zero,12(a0)
> > > 9a: 00050723 sb zero,14(a0)
> > > With overlap_op_by_pieces you will get:
> > > 7e: 00053023 sd zero,0(a0)
> > > 82: 000533a3 sd zero,7(a0)
> > >
> > > BR
> > > Christoph
> > >
> > > >
> > > >
> > > > On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches
> > > > <gcc-patches@gcc.gnu.org> wrote:
> > > > >
> > > > > This patch adds the field overlap_op_by_pieces to the struct
> > > > > riscv_tune_param, which allows to enable the overlap_op_by_pieces
> > > > > feature of the by-pieces infrastructure.
> > > > >
> > > > > gcc/ChangeLog:
> > > > >
> > > > > * config/riscv/riscv.c (struct riscv_tune_param): New field.
> > > > > (riscv_overlap_op_by_pieces): New function.
> > > > > (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
> > > > > riscv_overlap_op_by_pieces.
> > > > >
> > > > > Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>
> > > > > ---
> > > > > gcc/config/riscv/riscv.c | 14 ++++++++++++++
> > > > > 1 file changed, 14 insertions(+)
> > > > >
> > > > > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> > > > > index 576960bb37c..824e930ef05 100644
> > > > > --- a/gcc/config/riscv/riscv.c
> > > > > +++ b/gcc/config/riscv/riscv.c
> > > > > @@ -220,6 +220,7 @@ struct riscv_tune_param
> > > > > unsigned short branch_cost;
> > > > > unsigned short memory_cost;
> > > > > bool slow_unaligned_access;
> > > > > + bool overlap_op_by_pieces;
> > > > > };
> > > > >
> > > > > /* Information about one micro-arch we know about. */
> > > > > @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {
> > > > > 3, /* branch_cost */
> > > > > 5, /* memory_cost */
> > > > > true, /* slow_unaligned_access */
> > > > > + false, /* overlap_op_by_pieces */
> > > > > };
> > > > >
> > > > > /* Costs to use when optimizing for Sifive 7 Series. */
> > > > > @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
> > > > > 4, /* branch_cost */
> > > > > 3, /* memory_cost */
> > > > > true, /* slow_unaligned_access */
> > > > > + false, /* overlap_op_by_pieces */
> > > > > };
> > > > >
> > > > > /* Costs to use when optimizing for T-HEAD c906. */
> > > > > @@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
> > > > > 3, /* branch_cost */
> > > > > 5, /* memory_cost */
> > > > > false, /* slow_unaligned_access */
> > > > > + false, /* overlap_op_by_pieces */
> > > > > };
> > > > >
> > > > > /* Costs to use when optimizing for size. */
> > > > > @@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
> > > > > 1, /* branch_cost */
> > > > > 2, /* memory_cost */
> > > > > false, /* slow_unaligned_access */
> > > > > + false, /* overlap_op_by_pieces */
> > > > > };
> > > > >
> > > > > static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
> > > > > @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
> > > > > return riscv_slow_unaligned_access_p;
> > > > > }
> > > > >
> > > > > +static bool
> > > > > +riscv_overlap_op_by_pieces (void)
> > > > > +{
> > > > > + return tune_param->overlap_op_by_pieces;
> > > > > +}
> > > > > +
> > > > > /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
> > > > >
> > > > > static bool
> > > > > @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)
> > > > > #undef TARGET_SLOW_UNALIGNED_ACCESS
> > > > > #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
> > > > >
> > > > > +#undef TARGET_OVERLAP_OP_BY_PIECES_P
> > > > > +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
> > > > > +
> > > > > #undef TARGET_SECONDARY_MEMORY_NEEDED
> > > > > #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
> > > > >
> > > > > --
> > > > > 2.31.1
> > > > >
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2021-07-22 12:54 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-21 23:32 [PATCH] RISC-V: Enable overlap-by-pieces via tune param Christoph Muellner
2021-07-22 8:52 ` Kito Cheng
2021-07-22 9:20 ` Christoph Müllner
2021-07-22 9:28 ` Kito Cheng
2021-07-22 12:27 ` Christoph Müllner
2021-07-22 12:54 ` Christoph Müllner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).