public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] RISC-V: Enable overlap-by-pieces via tune param
@ 2021-07-21 23:32 Christoph Muellner
  2021-07-22  8:52 ` Kito Cheng
  0 siblings, 1 reply; 6+ messages in thread
From: Christoph Muellner @ 2021-07-21 23:32 UTC (permalink / raw)
  To: gcc-patches; +Cc: Jim Wilson, Kito Cheng, Christoph Muellner

This patch adds the field overlap_op_by_pieces to the struct
riscv_tune_param, which allows to enable the overlap_op_by_pieces
feature of the by-pieces infrastructure.

gcc/ChangeLog:

	* config/riscv/riscv.c (struct riscv_tune_param): New field.
	(riscv_overlap_op_by_pieces): New function.
	(TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
	riscv_overlap_op_by_pieces.

Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>
---
 gcc/config/riscv/riscv.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 576960bb37c..824e930ef05 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -220,6 +220,7 @@ struct riscv_tune_param
   unsigned short branch_cost;
   unsigned short memory_cost;
   bool slow_unaligned_access;
+  bool overlap_op_by_pieces;
 };
 
 /* Information about one micro-arch we know about.  */
@@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   3,						/* branch_cost */
   5,						/* memory_cost */
   true,						/* slow_unaligned_access */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   4,						/* branch_cost */
   3,						/* memory_cost */
   true,						/* slow_unaligned_access */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
   3,            /* branch_cost */
   5,            /* memory_cost */
   false,            /* slow_unaligned_access */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for size.  */
@@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
   1,						/* branch_cost */
   2,						/* memory_cost */
   false,					/* slow_unaligned_access */
+  false,					/* overlap_op_by_pieces */
 };
 
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
@@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
   return riscv_slow_unaligned_access_p;
 }
 
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+  return tune_param->overlap_op_by_pieces;
+}
+
 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
 
 static bool
@@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)
 #undef TARGET_SLOW_UNALIGNED_ACCESS
 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
 
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
 #undef TARGET_SECONDARY_MEMORY_NEEDED
 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
 
-- 
2.31.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] RISC-V: Enable overlap-by-pieces via tune param
  2021-07-21 23:32 [PATCH] RISC-V: Enable overlap-by-pieces via tune param Christoph Muellner
@ 2021-07-22  8:52 ` Kito Cheng
  2021-07-22  9:20   ` Christoph Müllner
  0 siblings, 1 reply; 6+ messages in thread
From: Kito Cheng @ 2021-07-22  8:52 UTC (permalink / raw)
  To: Christoph Muellner; +Cc: GCC Patches, Kito Cheng

It's my first time seeing this hook :p Did you mind describing when we
need to set it to true?
I mean when a CPU has some feature then we can/should set it to true?


On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> This patch adds the field overlap_op_by_pieces to the struct
> riscv_tune_param, which allows to enable the overlap_op_by_pieces
> feature of the by-pieces infrastructure.
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv.c (struct riscv_tune_param): New field.
>         (riscv_overlap_op_by_pieces): New function.
>         (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
>         riscv_overlap_op_by_pieces.
>
> Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>
> ---
>  gcc/config/riscv/riscv.c | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
>
> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> index 576960bb37c..824e930ef05 100644
> --- a/gcc/config/riscv/riscv.c
> +++ b/gcc/config/riscv/riscv.c
> @@ -220,6 +220,7 @@ struct riscv_tune_param
>    unsigned short branch_cost;
>    unsigned short memory_cost;
>    bool slow_unaligned_access;
> +  bool overlap_op_by_pieces;
>  };
>
>  /* Information about one micro-arch we know about.  */
> @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {
>    3,                                           /* branch_cost */
>    5,                                           /* memory_cost */
>    true,                                                /* slow_unaligned_access */
> +  false,                                       /* overlap_op_by_pieces */
>  };
>
>  /* Costs to use when optimizing for Sifive 7 Series.  */
> @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
>    4,                                           /* branch_cost */
>    3,                                           /* memory_cost */
>    true,                                                /* slow_unaligned_access */
> +  false,                                       /* overlap_op_by_pieces */
>  };
>
>  /* Costs to use when optimizing for T-HEAD c906.  */
> @@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
>    3,            /* branch_cost */
>    5,            /* memory_cost */
>    false,            /* slow_unaligned_access */
> +  false,                                       /* overlap_op_by_pieces */
>  };
>
>  /* Costs to use when optimizing for size.  */
> @@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
>    1,                                           /* branch_cost */
>    2,                                           /* memory_cost */
>    false,                                       /* slow_unaligned_access */
> +  false,                                       /* overlap_op_by_pieces */
>  };
>
>  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
> @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
>    return riscv_slow_unaligned_access_p;
>  }
>
> +static bool
> +riscv_overlap_op_by_pieces (void)
> +{
> +  return tune_param->overlap_op_by_pieces;
> +}
> +
>  /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
>
>  static bool
> @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)
>  #undef TARGET_SLOW_UNALIGNED_ACCESS
>  #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
>
> +#undef TARGET_OVERLAP_OP_BY_PIECES_P
> +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
> +
>  #undef TARGET_SECONDARY_MEMORY_NEEDED
>  #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
>
> --
> 2.31.1
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] RISC-V: Enable overlap-by-pieces via tune param
  2021-07-22  8:52 ` Kito Cheng
@ 2021-07-22  9:20   ` Christoph Müllner
  2021-07-22  9:28     ` Kito Cheng
  0 siblings, 1 reply; 6+ messages in thread
From: Christoph Müllner @ 2021-07-22  9:20 UTC (permalink / raw)
  To: Kito Cheng; +Cc: GCC Patches, Kito Cheng

On Thu, Jul 22, 2021 at 10:53 AM Kito Cheng <kito.cheng@gmail.com> wrote:
>
> It's my first time seeing this hook :p Did you mind describing when we
> need to set it to true?
> I mean when a CPU has some feature then we can/should set it to true?

The by-pieces infrastructure allows to inline builtins quite well and
uses slow_unaligned_access and overlap_op_by_pieces to tune the
emitted instruction sequence.

In case unaligned accesses are fast, then overlap_op_by_pieces can reduce
the number of instructions (emitted by by-pieces for e.g. memset) for the cost
of overlapping memory accesses.

E.g. you want to clear 15-bytes with memset (and cheap unaligned accesses):
Without overlap_op_by_pieces you will get:
  8e:   00053023                sd      zero,0(a0)
  92:   00052423                sw      zero,8(a0)
  96:   00051623                sh      zero,12(a0)
  9a:   00050723                sb      zero,14(a0)
With overlap_op_by_pieces you will get:
  7e:   00053023                sd      zero,0(a0)
  82:   000533a3                sd      zero,7(a0)

BR
Christoph

>
>
> On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > This patch adds the field overlap_op_by_pieces to the struct
> > riscv_tune_param, which allows to enable the overlap_op_by_pieces
> > feature of the by-pieces infrastructure.
> >
> > gcc/ChangeLog:
> >
> >         * config/riscv/riscv.c (struct riscv_tune_param): New field.
> >         (riscv_overlap_op_by_pieces): New function.
> >         (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
> >         riscv_overlap_op_by_pieces.
> >
> > Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>
> > ---
> >  gcc/config/riscv/riscv.c | 14 ++++++++++++++
> >  1 file changed, 14 insertions(+)
> >
> > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> > index 576960bb37c..824e930ef05 100644
> > --- a/gcc/config/riscv/riscv.c
> > +++ b/gcc/config/riscv/riscv.c
> > @@ -220,6 +220,7 @@ struct riscv_tune_param
> >    unsigned short branch_cost;
> >    unsigned short memory_cost;
> >    bool slow_unaligned_access;
> > +  bool overlap_op_by_pieces;
> >  };
> >
> >  /* Information about one micro-arch we know about.  */
> > @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {
> >    3,                                           /* branch_cost */
> >    5,                                           /* memory_cost */
> >    true,                                                /* slow_unaligned_access */
> > +  false,                                       /* overlap_op_by_pieces */
> >  };
> >
> >  /* Costs to use when optimizing for Sifive 7 Series.  */
> > @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
> >    4,                                           /* branch_cost */
> >    3,                                           /* memory_cost */
> >    true,                                                /* slow_unaligned_access */
> > +  false,                                       /* overlap_op_by_pieces */
> >  };
> >
> >  /* Costs to use when optimizing for T-HEAD c906.  */
> > @@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
> >    3,            /* branch_cost */
> >    5,            /* memory_cost */
> >    false,            /* slow_unaligned_access */
> > +  false,                                       /* overlap_op_by_pieces */
> >  };
> >
> >  /* Costs to use when optimizing for size.  */
> > @@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
> >    1,                                           /* branch_cost */
> >    2,                                           /* memory_cost */
> >    false,                                       /* slow_unaligned_access */
> > +  false,                                       /* overlap_op_by_pieces */
> >  };
> >
> >  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
> > @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
> >    return riscv_slow_unaligned_access_p;
> >  }
> >
> > +static bool
> > +riscv_overlap_op_by_pieces (void)
> > +{
> > +  return tune_param->overlap_op_by_pieces;
> > +}
> > +
> >  /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
> >
> >  static bool
> > @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)
> >  #undef TARGET_SLOW_UNALIGNED_ACCESS
> >  #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
> >
> > +#undef TARGET_OVERLAP_OP_BY_PIECES_P
> > +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
> > +
> >  #undef TARGET_SECONDARY_MEMORY_NEEDED
> >  #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
> >
> > --
> > 2.31.1
> >

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] RISC-V: Enable overlap-by-pieces via tune param
  2021-07-22  9:20   ` Christoph Müllner
@ 2021-07-22  9:28     ` Kito Cheng
  2021-07-22 12:27       ` Christoph Müllner
  0 siblings, 1 reply; 6+ messages in thread
From: Kito Cheng @ 2021-07-22  9:28 UTC (permalink / raw)
  To: Christoph Müllner; +Cc: Kito Cheng, GCC Patches, Jim Wilson

Sounds like we could just use !tune_param->slow_unaligned_access for
TARGET_OVERLAP_OP_BY_PIECES_P?
since it improves both performance and code size if we have cheap
unaligned accesses.

On Thu, Jul 22, 2021 at 5:23 PM Christoph Müllner via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Thu, Jul 22, 2021 at 10:53 AM Kito Cheng <kito.cheng@gmail.com> wrote:
> >
> > It's my first time seeing this hook :p Did you mind describing when we
> > need to set it to true?
> > I mean when a CPU has some feature then we can/should set it to true?
>
> The by-pieces infrastructure allows to inline builtins quite well and
> uses slow_unaligned_access and overlap_op_by_pieces to tune the
> emitted instruction sequence.
>
> In case unaligned accesses are fast, then overlap_op_by_pieces can reduce
> the number of instructions (emitted by by-pieces for e.g. memset) for the cost
> of overlapping memory accesses.
>
> E.g. you want to clear 15-bytes with memset (and cheap unaligned accesses):
> Without overlap_op_by_pieces you will get:
>   8e:   00053023                sd      zero,0(a0)
>   92:   00052423                sw      zero,8(a0)
>   96:   00051623                sh      zero,12(a0)
>   9a:   00050723                sb      zero,14(a0)
> With overlap_op_by_pieces you will get:
>   7e:   00053023                sd      zero,0(a0)
>   82:   000533a3                sd      zero,7(a0)
>
> BR
> Christoph
>
> >
> >
> > On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > This patch adds the field overlap_op_by_pieces to the struct
> > > riscv_tune_param, which allows to enable the overlap_op_by_pieces
> > > feature of the by-pieces infrastructure.
> > >
> > > gcc/ChangeLog:
> > >
> > >         * config/riscv/riscv.c (struct riscv_tune_param): New field.
> > >         (riscv_overlap_op_by_pieces): New function.
> > >         (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
> > >         riscv_overlap_op_by_pieces.
> > >
> > > Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>
> > > ---
> > >  gcc/config/riscv/riscv.c | 14 ++++++++++++++
> > >  1 file changed, 14 insertions(+)
> > >
> > > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> > > index 576960bb37c..824e930ef05 100644
> > > --- a/gcc/config/riscv/riscv.c
> > > +++ b/gcc/config/riscv/riscv.c
> > > @@ -220,6 +220,7 @@ struct riscv_tune_param
> > >    unsigned short branch_cost;
> > >    unsigned short memory_cost;
> > >    bool slow_unaligned_access;
> > > +  bool overlap_op_by_pieces;
> > >  };
> > >
> > >  /* Information about one micro-arch we know about.  */
> > > @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {
> > >    3,                                           /* branch_cost */
> > >    5,                                           /* memory_cost */
> > >    true,                                                /* slow_unaligned_access */
> > > +  false,                                       /* overlap_op_by_pieces */
> > >  };
> > >
> > >  /* Costs to use when optimizing for Sifive 7 Series.  */
> > > @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
> > >    4,                                           /* branch_cost */
> > >    3,                                           /* memory_cost */
> > >    true,                                                /* slow_unaligned_access */
> > > +  false,                                       /* overlap_op_by_pieces */
> > >  };
> > >
> > >  /* Costs to use when optimizing for T-HEAD c906.  */
> > > @@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
> > >    3,            /* branch_cost */
> > >    5,            /* memory_cost */
> > >    false,            /* slow_unaligned_access */
> > > +  false,                                       /* overlap_op_by_pieces */
> > >  };
> > >
> > >  /* Costs to use when optimizing for size.  */
> > > @@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
> > >    1,                                           /* branch_cost */
> > >    2,                                           /* memory_cost */
> > >    false,                                       /* slow_unaligned_access */
> > > +  false,                                       /* overlap_op_by_pieces */
> > >  };
> > >
> > >  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
> > > @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
> > >    return riscv_slow_unaligned_access_p;
> > >  }
> > >
> > > +static bool
> > > +riscv_overlap_op_by_pieces (void)
> > > +{
> > > +  return tune_param->overlap_op_by_pieces;
> > > +}
> > > +
> > >  /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
> > >
> > >  static bool
> > > @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)
> > >  #undef TARGET_SLOW_UNALIGNED_ACCESS
> > >  #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
> > >
> > > +#undef TARGET_OVERLAP_OP_BY_PIECES_P
> > > +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
> > > +
> > >  #undef TARGET_SECONDARY_MEMORY_NEEDED
> > >  #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
> > >
> > > --
> > > 2.31.1
> > >

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] RISC-V: Enable overlap-by-pieces via tune param
  2021-07-22  9:28     ` Kito Cheng
@ 2021-07-22 12:27       ` Christoph Müllner
  2021-07-22 12:54         ` Christoph Müllner
  0 siblings, 1 reply; 6+ messages in thread
From: Christoph Müllner @ 2021-07-22 12:27 UTC (permalink / raw)
  To: Kito Cheng; +Cc: Kito Cheng, GCC Patches, Jim Wilson

On Thu, Jul 22, 2021 at 11:29 AM Kito Cheng <kito.cheng@gmail.com> wrote:
>
> Sounds like we could just use !tune_param->slow_unaligned_access for
> TARGET_OVERLAP_OP_BY_PIECES_P?
> since it improves both performance and code size if we have cheap
> unaligned accesses.

Fine for me as well.
I'll prepare a v2, that uses enables overlap_op_by_pieces if
slow_unaligned_access==false.

>
> On Thu, Jul 22, 2021 at 5:23 PM Christoph Müllner via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Thu, Jul 22, 2021 at 10:53 AM Kito Cheng <kito.cheng@gmail.com> wrote:
> > >
> > > It's my first time seeing this hook :p Did you mind describing when we
> > > need to set it to true?
> > > I mean when a CPU has some feature then we can/should set it to true?
> >
> > The by-pieces infrastructure allows to inline builtins quite well and
> > uses slow_unaligned_access and overlap_op_by_pieces to tune the
> > emitted instruction sequence.
> >
> > In case unaligned accesses are fast, then overlap_op_by_pieces can reduce
> > the number of instructions (emitted by by-pieces for e.g. memset) for the cost
> > of overlapping memory accesses.
> >
> > E.g. you want to clear 15-bytes with memset (and cheap unaligned accesses):
> > Without overlap_op_by_pieces you will get:
> >   8e:   00053023                sd      zero,0(a0)
> >   92:   00052423                sw      zero,8(a0)
> >   96:   00051623                sh      zero,12(a0)
> >   9a:   00050723                sb      zero,14(a0)
> > With overlap_op_by_pieces you will get:
> >   7e:   00053023                sd      zero,0(a0)
> >   82:   000533a3                sd      zero,7(a0)
> >
> > BR
> > Christoph
> >
> > >
> > >
> > > On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches
> > > <gcc-patches@gcc.gnu.org> wrote:
> > > >
> > > > This patch adds the field overlap_op_by_pieces to the struct
> > > > riscv_tune_param, which allows to enable the overlap_op_by_pieces
> > > > feature of the by-pieces infrastructure.
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > >         * config/riscv/riscv.c (struct riscv_tune_param): New field.
> > > >         (riscv_overlap_op_by_pieces): New function.
> > > >         (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
> > > >         riscv_overlap_op_by_pieces.
> > > >
> > > > Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>
> > > > ---
> > > >  gcc/config/riscv/riscv.c | 14 ++++++++++++++
> > > >  1 file changed, 14 insertions(+)
> > > >
> > > > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> > > > index 576960bb37c..824e930ef05 100644
> > > > --- a/gcc/config/riscv/riscv.c
> > > > +++ b/gcc/config/riscv/riscv.c
> > > > @@ -220,6 +220,7 @@ struct riscv_tune_param
> > > >    unsigned short branch_cost;
> > > >    unsigned short memory_cost;
> > > >    bool slow_unaligned_access;
> > > > +  bool overlap_op_by_pieces;
> > > >  };
> > > >
> > > >  /* Information about one micro-arch we know about.  */
> > > > @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {
> > > >    3,                                           /* branch_cost */
> > > >    5,                                           /* memory_cost */
> > > >    true,                                                /* slow_unaligned_access */
> > > > +  false,                                       /* overlap_op_by_pieces */
> > > >  };
> > > >
> > > >  /* Costs to use when optimizing for Sifive 7 Series.  */
> > > > @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
> > > >    4,                                           /* branch_cost */
> > > >    3,                                           /* memory_cost */
> > > >    true,                                                /* slow_unaligned_access */
> > > > +  false,                                       /* overlap_op_by_pieces */
> > > >  };
> > > >
> > > >  /* Costs to use when optimizing for T-HEAD c906.  */
> > > > @@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
> > > >    3,            /* branch_cost */
> > > >    5,            /* memory_cost */
> > > >    false,            /* slow_unaligned_access */
> > > > +  false,                                       /* overlap_op_by_pieces */
> > > >  };
> > > >
> > > >  /* Costs to use when optimizing for size.  */
> > > > @@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
> > > >    1,                                           /* branch_cost */
> > > >    2,                                           /* memory_cost */
> > > >    false,                                       /* slow_unaligned_access */
> > > > +  false,                                       /* overlap_op_by_pieces */
> > > >  };
> > > >
> > > >  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
> > > > @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
> > > >    return riscv_slow_unaligned_access_p;
> > > >  }
> > > >
> > > > +static bool
> > > > +riscv_overlap_op_by_pieces (void)
> > > > +{
> > > > +  return tune_param->overlap_op_by_pieces;
> > > > +}
> > > > +
> > > >  /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
> > > >
> > > >  static bool
> > > > @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)
> > > >  #undef TARGET_SLOW_UNALIGNED_ACCESS
> > > >  #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
> > > >
> > > > +#undef TARGET_OVERLAP_OP_BY_PIECES_P
> > > > +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
> > > > +
> > > >  #undef TARGET_SECONDARY_MEMORY_NEEDED
> > > >  #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
> > > >
> > > > --
> > > > 2.31.1
> > > >

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] RISC-V: Enable overlap-by-pieces via tune param
  2021-07-22 12:27       ` Christoph Müllner
@ 2021-07-22 12:54         ` Christoph Müllner
  0 siblings, 0 replies; 6+ messages in thread
From: Christoph Müllner @ 2021-07-22 12:54 UTC (permalink / raw)
  To: Kito Cheng; +Cc: Kito Cheng, GCC Patches, Jim Wilson

On Thu, Jul 22, 2021 at 2:27 PM Christoph Müllner <cmuellner@gcc.gnu.org> wrote:
>
> On Thu, Jul 22, 2021 at 11:29 AM Kito Cheng <kito.cheng@gmail.com> wrote:
> >
> > Sounds like we could just use !tune_param->slow_unaligned_access for
> > TARGET_OVERLAP_OP_BY_PIECES_P?
> > since it improves both performance and code size if we have cheap
> > unaligned accesses.
>
> Fine for me as well.
> I'll prepare a v2, that uses enables overlap_op_by_pieces if
> slow_unaligned_access==false.

The new patch can be found here:
https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575832.html

>
> >
> > On Thu, Jul 22, 2021 at 5:23 PM Christoph Müllner via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > On Thu, Jul 22, 2021 at 10:53 AM Kito Cheng <kito.cheng@gmail.com> wrote:
> > > >
> > > > It's my first time seeing this hook :p Did you mind describing when we
> > > > need to set it to true?
> > > > I mean when a CPU has some feature then we can/should set it to true?
> > >
> > > The by-pieces infrastructure allows to inline builtins quite well and
> > > uses slow_unaligned_access and overlap_op_by_pieces to tune the
> > > emitted instruction sequence.
> > >
> > > In case unaligned accesses are fast, then overlap_op_by_pieces can reduce
> > > the number of instructions (emitted by by-pieces for e.g. memset) for the cost
> > > of overlapping memory accesses.
> > >
> > > E.g. you want to clear 15-bytes with memset (and cheap unaligned accesses):
> > > Without overlap_op_by_pieces you will get:
> > >   8e:   00053023                sd      zero,0(a0)
> > >   92:   00052423                sw      zero,8(a0)
> > >   96:   00051623                sh      zero,12(a0)
> > >   9a:   00050723                sb      zero,14(a0)
> > > With overlap_op_by_pieces you will get:
> > >   7e:   00053023                sd      zero,0(a0)
> > >   82:   000533a3                sd      zero,7(a0)
> > >
> > > BR
> > > Christoph
> > >
> > > >
> > > >
> > > > On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches
> > > > <gcc-patches@gcc.gnu.org> wrote:
> > > > >
> > > > > This patch adds the field overlap_op_by_pieces to the struct
> > > > > riscv_tune_param, which allows to enable the overlap_op_by_pieces
> > > > > feature of the by-pieces infrastructure.
> > > > >
> > > > > gcc/ChangeLog:
> > > > >
> > > > >         * config/riscv/riscv.c (struct riscv_tune_param): New field.
> > > > >         (riscv_overlap_op_by_pieces): New function.
> > > > >         (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
> > > > >         riscv_overlap_op_by_pieces.
> > > > >
> > > > > Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>
> > > > > ---
> > > > >  gcc/config/riscv/riscv.c | 14 ++++++++++++++
> > > > >  1 file changed, 14 insertions(+)
> > > > >
> > > > > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> > > > > index 576960bb37c..824e930ef05 100644
> > > > > --- a/gcc/config/riscv/riscv.c
> > > > > +++ b/gcc/config/riscv/riscv.c
> > > > > @@ -220,6 +220,7 @@ struct riscv_tune_param
> > > > >    unsigned short branch_cost;
> > > > >    unsigned short memory_cost;
> > > > >    bool slow_unaligned_access;
> > > > > +  bool overlap_op_by_pieces;
> > > > >  };
> > > > >
> > > > >  /* Information about one micro-arch we know about.  */
> > > > > @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {
> > > > >    3,                                           /* branch_cost */
> > > > >    5,                                           /* memory_cost */
> > > > >    true,                                                /* slow_unaligned_access */
> > > > > +  false,                                       /* overlap_op_by_pieces */
> > > > >  };
> > > > >
> > > > >  /* Costs to use when optimizing for Sifive 7 Series.  */
> > > > > @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
> > > > >    4,                                           /* branch_cost */
> > > > >    3,                                           /* memory_cost */
> > > > >    true,                                                /* slow_unaligned_access */
> > > > > +  false,                                       /* overlap_op_by_pieces */
> > > > >  };
> > > > >
> > > > >  /* Costs to use when optimizing for T-HEAD c906.  */
> > > > > @@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
> > > > >    3,            /* branch_cost */
> > > > >    5,            /* memory_cost */
> > > > >    false,            /* slow_unaligned_access */
> > > > > +  false,                                       /* overlap_op_by_pieces */
> > > > >  };
> > > > >
> > > > >  /* Costs to use when optimizing for size.  */
> > > > > @@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
> > > > >    1,                                           /* branch_cost */
> > > > >    2,                                           /* memory_cost */
> > > > >    false,                                       /* slow_unaligned_access */
> > > > > +  false,                                       /* overlap_op_by_pieces */
> > > > >  };
> > > > >
> > > > >  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
> > > > > @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
> > > > >    return riscv_slow_unaligned_access_p;
> > > > >  }
> > > > >
> > > > > +static bool
> > > > > +riscv_overlap_op_by_pieces (void)
> > > > > +{
> > > > > +  return tune_param->overlap_op_by_pieces;
> > > > > +}
> > > > > +
> > > > >  /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
> > > > >
> > > > >  static bool
> > > > > @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)
> > > > >  #undef TARGET_SLOW_UNALIGNED_ACCESS
> > > > >  #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
> > > > >
> > > > > +#undef TARGET_OVERLAP_OP_BY_PIECES_P
> > > > > +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
> > > > > +
> > > > >  #undef TARGET_SECONDARY_MEMORY_NEEDED
> > > > >  #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
> > > > >
> > > > > --
> > > > > 2.31.1
> > > > >

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-07-22 12:54 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-21 23:32 [PATCH] RISC-V: Enable overlap-by-pieces via tune param Christoph Muellner
2021-07-22  8:52 ` Kito Cheng
2021-07-22  9:20   ` Christoph Müllner
2021-07-22  9:28     ` Kito Cheng
2021-07-22 12:27       ` Christoph Müllner
2021-07-22 12:54         ` Christoph Müllner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).