public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Enable overlap-by-pieces via tune param
@ 2022-11-15 15:01 Philipp Tomsich
  0 siblings, 0 replies; 7+ messages in thread
From: Philipp Tomsich @ 2022-11-15 15:01 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:1ab285cf54d5ff31ee03232fe2bb4a978473abfa

commit 1ab285cf54d5ff31ee03232fe2bb4a978473abfa
Author: Christoph Müllner <christoph.muellner@vrull.eu>
Date:   Wed Oct 5 02:10:14 2022 +0200

    riscv: Enable overlap-by-pieces via tune param
    
    This patch adds the field overlap_op_by_pieces to the struct
    riscv_tune_param, which allows to enable the overlap_op_by_pieces
    infrastructure.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv.c (struct riscv_tune_param): New field.
            (riscv_overlap_op_by_pieces): New function.
            (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
            riscv_overlap_op_by_pieces.
    
    Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>

Diff:
---
 gcc/config/riscv/riscv.cc                          | 17 ++++++-
 .../gcc.target/riscv/memcpy-nonoverlapping.c       | 53 ++++++++++++++++++++++
 .../riscv/memcpy-overlapping-strictalign.c         | 53 ++++++++++++++++++++++
 .../gcc.target/riscv/memcpy-overlapping.c          | 50 ++++++++++++++++++++
 .../gcc.target/riscv/memset-nonoverlapping.c       | 45 ++++++++++++++++++
 .../gcc.target/riscv/memset-overlapping.c          | 43 ++++++++++++++++++
 6 files changed, 260 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0d0ed0ed31b6..82382450a0d1 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -243,6 +243,7 @@ struct riscv_tune_param
   unsigned short fmv_cost;
   bool slow_unaligned_access;
   unsigned int fusible_ops;
+  bool overlap_op_by_pieces;
 };
 
 /* Information about one micro-arch we know about.  */
@@ -331,6 +332,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -346,6 +348,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -361,6 +364,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
   8,		/* fmv_cost */
   false,            /* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for size.  */
@@ -376,6 +380,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
   8,						/* fmv_cost */
   false,					/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for Ventana Micro VT1.  */
@@ -393,7 +398,8 @@ static const struct riscv_tune_param ventana_vt1_tune_info = {
   ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |       /* fusible_ops */
     RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
     RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
-    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
+    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD ),
+  true,						/* overlap_op_by_pieces */
 };
 
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
@@ -6480,6 +6486,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
   return riscv_slow_unaligned_access_p;
 }
 
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+  return tune_param->overlap_op_by_pieces && !TARGET_STRICT_ALIGN;
+}
+
 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
 
 static bool
@@ -7018,6 +7030,9 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
 #undef TARGET_SLOW_UNALIGNED_ACCESS
 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
 
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
 #undef TARGET_SECONDARY_MEMORY_NEEDED
 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
 
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
new file mode 100644
index 000000000000..fb84d14e5058
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(11)
+
+/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(13)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}.  */
+COPY_N(14)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(19)
+
+/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 17 } } */
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+
+/* { dg-final { scan-assembler-times "lhu\t" 7 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 8 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c
new file mode 100644
index 000000000000..a3ad971edb30
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64 -mstrict-align" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(11)
+
+/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(13)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}.  */
+COPY_N(14)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(19)
+
+/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 17 } } */
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+
+/* { dg-final { scan-assembler-times "lhu\t" 7 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 8 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c
new file mode 100644
index 000000000000..ffb7248bfd14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lw,sw}.  */
+COPY_N(11)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(13)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(14)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lw,sw}.  */
+COPY_N(19)
+
+/* Emits 3x ld and 3x sd.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 2x {lw,sw}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 21 } } */
+/* { dg-final { scan-assembler-times "sd\t" 21 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 5 } } */
+/* { dg-final { scan-assembler-times "sw\t" 5 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 2 } } */
+/* { dg-final { scan-assembler-times "sb\t" 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
new file mode 100644
index 000000000000..c4311c7a8d03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define ZERO_N(N)				\
+void zero##N (char *dst)			\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  __builtin_memset (dst, 0, N);			\
+}
+
+/* Emits 1x sd and 1x {sh,sb}.  */
+ZERO_N(11)
+
+/* Emits 1x sd and 1x {sw,sb}.  */
+ZERO_N(13)
+
+/* Emits 1x sd and 1x {sw,sh}.  */
+ZERO_N(14)
+
+/* Emits 1x sd and 1x {sw,sh,sb}.  */
+ZERO_N(15)
+
+/* Emits 2x sd and 1x {sh,sb}.  */
+ZERO_N(19)
+
+/* Emits 2x sd and 1x {sw,sh,sb}.  */
+ZERO_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x sd and 1x {sh,sb}.  */
+ZERO_N(27)
+
+/* Emits 3x sd and 1x {sw,sb}.  */
+ZERO_N(29)
+
+/* Emits 3x sd and 1x {sw,sh,sb}.  */
+ZERO_N(31)
+
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memset-overlapping.c b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c
new file mode 100644
index 000000000000..793766b52626
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define ZERO_N(N)				\
+void zero##N (char *dst)			\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  __builtin_memset (dst, 0, N);			\
+}
+
+/* Emits 1x sd and 1x sw.  */
+ZERO_N(11)
+
+/* Emits 2x sd.  */
+ZERO_N(13)
+
+/* Emits 2x sd.  */
+ZERO_N(14)
+
+/* Emits 2x sd.  */
+ZERO_N(15)
+
+/* Emits 2x sd and 1x sw.  */
+ZERO_N(19)
+
+/* Emits 3x sd.  */
+ZERO_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x sd and 1x sw.  */
+ZERO_N(27)
+
+/* Emits 4x sd.  */
+ZERO_N(29)
+
+/* Emits 4x sd.  */
+ZERO_N(31)
+
+/* { dg-final { scan-assembler-times "sd\t" 23 } } */
+/* { dg-final { scan-assembler-times "sw\t" 3 } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Enable overlap-by-pieces via tune param
@ 2022-12-01 13:24 Philipp Tomsich
  0 siblings, 0 replies; 7+ messages in thread
From: Philipp Tomsich @ 2022-12-01 13:24 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:bf45c8e6154344175775c532b503d3a12343c013

commit bf45c8e6154344175775c532b503d3a12343c013
Author: Christoph Müllner <christoph.muellner@vrull.eu>
Date:   Wed Oct 5 02:10:14 2022 +0200

    riscv: Enable overlap-by-pieces via tune param
    
    This patch adds the field overlap_op_by_pieces to the struct
    riscv_tune_param, which allows to enable the overlap_op_by_pieces
    infrastructure.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv.c (struct riscv_tune_param): New field.
            (riscv_overlap_op_by_pieces): New function.
            (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
            riscv_overlap_op_by_pieces.
    
    Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>

Diff:
---
 gcc/config/riscv/riscv.cc                          | 17 ++++++-
 .../gcc.target/riscv/memcpy-nonoverlapping.c       | 53 ++++++++++++++++++++++
 .../riscv/memcpy-overlapping-strictalign.c         | 53 ++++++++++++++++++++++
 .../gcc.target/riscv/memcpy-overlapping.c          | 50 ++++++++++++++++++++
 .../gcc.target/riscv/memset-nonoverlapping.c       | 45 ++++++++++++++++++
 .../gcc.target/riscv/memset-overlapping.c          | 43 ++++++++++++++++++
 6 files changed, 260 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 2c4b47dca0e..d7e26e84ca9 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -251,6 +251,7 @@ struct riscv_tune_param
   unsigned short fmv_cost;
   bool slow_unaligned_access;
   unsigned int fusible_ops;
+  bool overlap_op_by_pieces;
 };
 
 /* Information about one micro-arch we know about.  */
@@ -339,6 +340,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -354,6 +356,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -369,6 +372,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
   8,		/* fmv_cost */
   false,            /* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for size.  */
@@ -384,6 +388,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
   8,						/* fmv_cost */
   false,					/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for Ventana Micro VT1.  */
@@ -401,7 +406,8 @@ static const struct riscv_tune_param ventana_vt1_tune_info = {
   ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |       /* fusible_ops */
     RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
     RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
-    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
+    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD ),
+  true,						/* overlap_op_by_pieces */
 };
 
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
@@ -6667,6 +6673,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
   return riscv_slow_unaligned_access_p;
 }
 
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+  return tune_param->overlap_op_by_pieces && !TARGET_STRICT_ALIGN;
+}
+
 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
 
 static bool
@@ -7230,6 +7242,9 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
 #undef TARGET_SLOW_UNALIGNED_ACCESS
 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
 
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
 #undef TARGET_SECONDARY_MEMORY_NEEDED
 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
 
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
new file mode 100644
index 00000000000..fb84d14e505
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(11)
+
+/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(13)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}.  */
+COPY_N(14)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(19)
+
+/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 17 } } */
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+
+/* { dg-final { scan-assembler-times "lhu\t" 7 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 8 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c
new file mode 100644
index 00000000000..a3ad971edb3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64 -mstrict-align" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(11)
+
+/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(13)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}.  */
+COPY_N(14)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(19)
+
+/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 17 } } */
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+
+/* { dg-final { scan-assembler-times "lhu\t" 7 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 8 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c
new file mode 100644
index 00000000000..ffb7248bfd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lw,sw}.  */
+COPY_N(11)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(13)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(14)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lw,sw}.  */
+COPY_N(19)
+
+/* Emits 3x ld and 3x sd.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 2x {lw,sw}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 21 } } */
+/* { dg-final { scan-assembler-times "sd\t" 21 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 5 } } */
+/* { dg-final { scan-assembler-times "sw\t" 5 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 2 } } */
+/* { dg-final { scan-assembler-times "sb\t" 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
new file mode 100644
index 00000000000..c4311c7a8d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define ZERO_N(N)				\
+void zero##N (char *dst)			\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  __builtin_memset (dst, 0, N);			\
+}
+
+/* Emits 1x sd and 1x {sh,sb}.  */
+ZERO_N(11)
+
+/* Emits 1x sd and 1x {sw,sb}.  */
+ZERO_N(13)
+
+/* Emits 1x sd and 1x {sw,sh}.  */
+ZERO_N(14)
+
+/* Emits 1x sd and 1x {sw,sh,sb}.  */
+ZERO_N(15)
+
+/* Emits 2x sd and 1x {sh,sb}.  */
+ZERO_N(19)
+
+/* Emits 2x sd and 1x {sw,sh,sb}.  */
+ZERO_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x sd and 1x {sh,sb}.  */
+ZERO_N(27)
+
+/* Emits 3x sd and 1x {sw,sb}.  */
+ZERO_N(29)
+
+/* Emits 3x sd and 1x {sw,sh,sb}.  */
+ZERO_N(31)
+
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memset-overlapping.c b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c
new file mode 100644
index 00000000000..793766b5262
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define ZERO_N(N)				\
+void zero##N (char *dst)			\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  __builtin_memset (dst, 0, N);			\
+}
+
+/* Emits 1x sd and 1x sw.  */
+ZERO_N(11)
+
+/* Emits 2x sd.  */
+ZERO_N(13)
+
+/* Emits 2x sd.  */
+ZERO_N(14)
+
+/* Emits 2x sd.  */
+ZERO_N(15)
+
+/* Emits 2x sd and 1x sw.  */
+ZERO_N(19)
+
+/* Emits 3x sd.  */
+ZERO_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x sd and 1x sw.  */
+ZERO_N(27)
+
+/* Emits 4x sd.  */
+ZERO_N(29)
+
+/* Emits 4x sd.  */
+ZERO_N(31)
+
+/* { dg-final { scan-assembler-times "sd\t" 23 } } */
+/* { dg-final { scan-assembler-times "sw\t" 3 } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Enable overlap-by-pieces via tune param
@ 2022-11-18 20:26 Philipp Tomsich
  0 siblings, 0 replies; 7+ messages in thread
From: Philipp Tomsich @ 2022-11-18 20:26 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:72b806ca02701e4db52c6548f50efdb51150dbd0

commit 72b806ca02701e4db52c6548f50efdb51150dbd0
Author: Christoph Müllner <christoph.muellner@vrull.eu>
Date:   Wed Oct 5 02:10:14 2022 +0200

    riscv: Enable overlap-by-pieces via tune param
    
    This patch adds the field overlap_op_by_pieces to the struct
    riscv_tune_param, which allows to enable the overlap_op_by_pieces
    infrastructure.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv.c (struct riscv_tune_param): New field.
            (riscv_overlap_op_by_pieces): New function.
            (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
            riscv_overlap_op_by_pieces.
    
    Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>

Diff:
---
 gcc/config/riscv/riscv.cc                          | 17 ++++++-
 .../gcc.target/riscv/memcpy-nonoverlapping.c       | 53 ++++++++++++++++++++++
 .../riscv/memcpy-overlapping-strictalign.c         | 53 ++++++++++++++++++++++
 .../gcc.target/riscv/memcpy-overlapping.c          | 50 ++++++++++++++++++++
 .../gcc.target/riscv/memset-nonoverlapping.c       | 45 ++++++++++++++++++
 .../gcc.target/riscv/memset-overlapping.c          | 43 ++++++++++++++++++
 6 files changed, 260 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 69e86a59f60..2d973c38ab4 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -250,6 +250,7 @@ struct riscv_tune_param
   unsigned short fmv_cost;
   bool slow_unaligned_access;
   unsigned int fusible_ops;
+  bool overlap_op_by_pieces;
 };
 
 /* Information about one micro-arch we know about.  */
@@ -338,6 +339,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -353,6 +355,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -368,6 +371,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
   8,		/* fmv_cost */
   false,            /* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for size.  */
@@ -383,6 +387,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
   8,						/* fmv_cost */
   false,					/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for Ventana Micro VT1.  */
@@ -400,7 +405,8 @@ static const struct riscv_tune_param ventana_vt1_tune_info = {
   ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |       /* fusible_ops */
     RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
     RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
-    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
+    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD ),
+  true,						/* overlap_op_by_pieces */
 };
 
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
@@ -6649,6 +6655,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
   return riscv_slow_unaligned_access_p;
 }
 
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+  return tune_param->overlap_op_by_pieces && !TARGET_STRICT_ALIGN;
+}
+
 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
 
 static bool
@@ -7212,6 +7224,9 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
 #undef TARGET_SLOW_UNALIGNED_ACCESS
 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
 
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
 #undef TARGET_SECONDARY_MEMORY_NEEDED
 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
 
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
new file mode 100644
index 00000000000..fb84d14e505
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(11)
+
+/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(13)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}.  */
+COPY_N(14)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(19)
+
+/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 17 } } */
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+
+/* { dg-final { scan-assembler-times "lhu\t" 7 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 8 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c
new file mode 100644
index 00000000000..a3ad971edb3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64 -mstrict-align" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(11)
+
+/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(13)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}.  */
+COPY_N(14)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(19)
+
+/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 17 } } */
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+
+/* { dg-final { scan-assembler-times "lhu\t" 7 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 8 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c
new file mode 100644
index 00000000000..ffb7248bfd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lw,sw}.  */
+COPY_N(11)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(13)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(14)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lw,sw}.  */
+COPY_N(19)
+
+/* Emits 3x ld and 3x sd.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 2x {lw,sw}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 21 } } */
+/* { dg-final { scan-assembler-times "sd\t" 21 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 5 } } */
+/* { dg-final { scan-assembler-times "sw\t" 5 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 2 } } */
+/* { dg-final { scan-assembler-times "sb\t" 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
new file mode 100644
index 00000000000..c4311c7a8d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define ZERO_N(N)				\
+void zero##N (char *dst)			\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  __builtin_memset (dst, 0, N);			\
+}
+
+/* Emits 1x sd and 1x {sh,sb}.  */
+ZERO_N(11)
+
+/* Emits 1x sd and 1x {sw,sb}.  */
+ZERO_N(13)
+
+/* Emits 1x sd and 1x {sw,sh}.  */
+ZERO_N(14)
+
+/* Emits 1x sd and 1x {sw,sh,sb}.  */
+ZERO_N(15)
+
+/* Emits 2x sd and 1x {sh,sb}.  */
+ZERO_N(19)
+
+/* Emits 2x sd and 1x {sw,sh,sb}.  */
+ZERO_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x sd and 1x {sh,sb}.  */
+ZERO_N(27)
+
+/* Emits 3x sd and 1x {sw,sb}.  */
+ZERO_N(29)
+
+/* Emits 3x sd and 1x {sw,sh,sb}.  */
+ZERO_N(31)
+
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memset-overlapping.c b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c
new file mode 100644
index 00000000000..793766b5262
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define ZERO_N(N)				\
+void zero##N (char *dst)			\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  __builtin_memset (dst, 0, N);			\
+}
+
+/* Emits 1x sd and 1x sw.  */
+ZERO_N(11)
+
+/* Emits 2x sd.  */
+ZERO_N(13)
+
+/* Emits 2x sd.  */
+ZERO_N(14)
+
+/* Emits 2x sd.  */
+ZERO_N(15)
+
+/* Emits 2x sd and 1x sw.  */
+ZERO_N(19)
+
+/* Emits 3x sd.  */
+ZERO_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x sd and 1x sw.  */
+ZERO_N(27)
+
+/* Emits 4x sd.  */
+ZERO_N(29)
+
+/* Emits 4x sd.  */
+ZERO_N(31)
+
+/* { dg-final { scan-assembler-times "sd\t" 23 } } */
+/* { dg-final { scan-assembler-times "sw\t" 3 } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Enable overlap-by-pieces via tune param
@ 2022-11-18 20:23 Philipp Tomsich
  0 siblings, 0 replies; 7+ messages in thread
From: Philipp Tomsich @ 2022-11-18 20:23 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8f6a6b1e2336d737889103ec28c2a42f52037423

commit 8f6a6b1e2336d737889103ec28c2a42f52037423
Author: Christoph Müllner <christoph.muellner@vrull.eu>
Date:   Wed Oct 5 02:10:14 2022 +0200

    riscv: Enable overlap-by-pieces via tune param
    
    This patch adds the field overlap_op_by_pieces to the struct
    riscv_tune_param, which allows to enable the overlap_op_by_pieces
    infrastructure.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv.c (struct riscv_tune_param): New field.
            (riscv_overlap_op_by_pieces): New function.
            (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
            riscv_overlap_op_by_pieces.
    
    Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>

Diff:
---
 gcc/config/riscv/riscv.cc                          | 17 ++++++-
 .../gcc.target/riscv/memcpy-nonoverlapping.c       | 53 ++++++++++++++++++++++
 .../riscv/memcpy-overlapping-strictalign.c         | 53 ++++++++++++++++++++++
 .../gcc.target/riscv/memcpy-overlapping.c          | 50 ++++++++++++++++++++
 .../gcc.target/riscv/memset-nonoverlapping.c       | 45 ++++++++++++++++++
 .../gcc.target/riscv/memset-overlapping.c          | 43 ++++++++++++++++++
 6 files changed, 260 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 69e86a59f60..2d973c38ab4 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -250,6 +250,7 @@ struct riscv_tune_param
   unsigned short fmv_cost;
   bool slow_unaligned_access;
   unsigned int fusible_ops;
+  bool overlap_op_by_pieces;
 };
 
 /* Information about one micro-arch we know about.  */
@@ -338,6 +339,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -353,6 +355,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -368,6 +371,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
   8,		/* fmv_cost */
   false,            /* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for size.  */
@@ -383,6 +387,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
   8,						/* fmv_cost */
   false,					/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for Ventana Micro VT1.  */
@@ -400,7 +405,8 @@ static const struct riscv_tune_param ventana_vt1_tune_info = {
   ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |       /* fusible_ops */
     RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
     RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
-    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
+    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD ),
+  true,						/* overlap_op_by_pieces */
 };
 
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
@@ -6649,6 +6655,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
   return riscv_slow_unaligned_access_p;
 }
 
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+  return tune_param->overlap_op_by_pieces && !TARGET_STRICT_ALIGN;
+}
+
 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
 
 static bool
@@ -7212,6 +7224,9 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
 #undef TARGET_SLOW_UNALIGNED_ACCESS
 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
 
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
 #undef TARGET_SECONDARY_MEMORY_NEEDED
 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
 
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
new file mode 100644
index 00000000000..fb84d14e505
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(11)
+
+/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(13)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}.  */
+COPY_N(14)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(19)
+
+/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 17 } } */
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+
+/* { dg-final { scan-assembler-times "lhu\t" 7 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 8 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c
new file mode 100644
index 00000000000..a3ad971edb3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64 -mstrict-align" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(11)
+
+/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(13)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}.  */
+COPY_N(14)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(19)
+
+/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 17 } } */
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+
+/* { dg-final { scan-assembler-times "lhu\t" 7 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 8 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c
new file mode 100644
index 00000000000..ffb7248bfd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lw,sw}.  */
+COPY_N(11)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(13)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(14)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lw,sw}.  */
+COPY_N(19)
+
+/* Emits 3x ld and 3x sd.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 2x {lw,sw}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 21 } } */
+/* { dg-final { scan-assembler-times "sd\t" 21 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 5 } } */
+/* { dg-final { scan-assembler-times "sw\t" 5 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 2 } } */
+/* { dg-final { scan-assembler-times "sb\t" 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
new file mode 100644
index 00000000000..c4311c7a8d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define ZERO_N(N)				\
+void zero##N (char *dst)			\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  __builtin_memset (dst, 0, N);			\
+}
+
+/* Emits 1x sd and 1x {sh,sb}.  */
+ZERO_N(11)
+
+/* Emits 1x sd and 1x {sw,sb}.  */
+ZERO_N(13)
+
+/* Emits 1x sd and 1x {sw,sh}.  */
+ZERO_N(14)
+
+/* Emits 1x sd and 1x {sw,sh,sb}.  */
+ZERO_N(15)
+
+/* Emits 2x sd and 1x {sh,sb}.  */
+ZERO_N(19)
+
+/* Emits 2x sd and 1x {sw,sh,sb}.  */
+ZERO_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x sd and 1x {sh,sb}.  */
+ZERO_N(27)
+
+/* Emits 3x sd and 1x {sw,sb}.  */
+ZERO_N(29)
+
+/* Emits 3x sd and 1x {sw,sh,sb}.  */
+ZERO_N(31)
+
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memset-overlapping.c b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c
new file mode 100644
index 00000000000..793766b5262
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define ZERO_N(N)				\
+void zero##N (char *dst)			\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  __builtin_memset (dst, 0, N);			\
+}
+
+/* Emits 1x sd and 1x sw.  */
+ZERO_N(11)
+
+/* Emits 2x sd.  */
+ZERO_N(13)
+
+/* Emits 2x sd.  */
+ZERO_N(14)
+
+/* Emits 2x sd.  */
+ZERO_N(15)
+
+/* Emits 2x sd and 1x sw.  */
+ZERO_N(19)
+
+/* Emits 3x sd.  */
+ZERO_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x sd and 1x sw.  */
+ZERO_N(27)
+
+/* Emits 4x sd.  */
+ZERO_N(29)
+
+/* Emits 4x sd.  */
+ZERO_N(31)
+
+/* { dg-final { scan-assembler-times "sd\t" 23 } } */
+/* { dg-final { scan-assembler-times "sw\t" 3 } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Enable overlap-by-pieces via tune param
@ 2022-11-18 11:36 Philipp Tomsich
  0 siblings, 0 replies; 7+ messages in thread
From: Philipp Tomsich @ 2022-11-18 11:36 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:5b68949152987bd5307040ede203c1db16b8a0a2

commit 5b68949152987bd5307040ede203c1db16b8a0a2
Author: Christoph Müllner <christoph.muellner@vrull.eu>
Date:   Wed Oct 5 02:10:14 2022 +0200

    riscv: Enable overlap-by-pieces via tune param
    
    This patch adds the field overlap_op_by_pieces to the struct
    riscv_tune_param, which allows to enable the overlap_op_by_pieces
    infrastructure.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv.c (struct riscv_tune_param): New field.
            (riscv_overlap_op_by_pieces): New function.
            (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
            riscv_overlap_op_by_pieces.
    
    Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>

Diff:
---
 gcc/config/riscv/riscv.cc                          | 17 ++++++-
 .../gcc.target/riscv/memcpy-nonoverlapping.c       | 53 ++++++++++++++++++++++
 .../riscv/memcpy-overlapping-strictalign.c         | 53 ++++++++++++++++++++++
 .../gcc.target/riscv/memcpy-overlapping.c          | 50 ++++++++++++++++++++
 .../gcc.target/riscv/memset-nonoverlapping.c       | 45 ++++++++++++++++++
 .../gcc.target/riscv/memset-overlapping.c          | 43 ++++++++++++++++++
 6 files changed, 260 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index e4fb53c46f1..f93ffae34c9 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -250,6 +250,7 @@ struct riscv_tune_param
   unsigned short fmv_cost;
   bool slow_unaligned_access;
   unsigned int fusible_ops;
+  bool overlap_op_by_pieces;
 };
 
 /* Information about one micro-arch we know about.  */
@@ -338,6 +339,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -353,6 +355,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -368,6 +371,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
   8,		/* fmv_cost */
   false,            /* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for size.  */
@@ -383,6 +387,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
   8,						/* fmv_cost */
   false,					/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for Ventana Micro VT1.  */
@@ -400,7 +405,8 @@ static const struct riscv_tune_param ventana_vt1_tune_info = {
   ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |       /* fusible_ops */
     RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
     RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
-    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
+    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD ),
+  true,						/* overlap_op_by_pieces */
 };
 
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
@@ -6649,6 +6655,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
   return riscv_slow_unaligned_access_p;
 }
 
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+  return tune_param->overlap_op_by_pieces && !TARGET_STRICT_ALIGN;
+}
+
 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
 
 static bool
@@ -7211,6 +7223,9 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
 #undef TARGET_SLOW_UNALIGNED_ACCESS
 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
 
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
 #undef TARGET_SECONDARY_MEMORY_NEEDED
 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
 
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
new file mode 100644
index 00000000000..fb84d14e505
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(11)
+
+/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(13)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}.  */
+COPY_N(14)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(19)
+
+/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 17 } } */
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+
+/* { dg-final { scan-assembler-times "lhu\t" 7 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 8 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c
new file mode 100644
index 00000000000..a3ad971edb3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64 -mstrict-align" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(11)
+
+/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(13)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}.  */
+COPY_N(14)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(19)
+
+/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 17 } } */
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+
+/* { dg-final { scan-assembler-times "lhu\t" 7 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 8 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c
new file mode 100644
index 00000000000..ffb7248bfd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lw,sw}.  */
+COPY_N(11)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(13)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(14)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lw,sw}.  */
+COPY_N(19)
+
+/* Emits 3x ld and 3x sd.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 2x {lw,sw}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 21 } } */
+/* { dg-final { scan-assembler-times "sd\t" 21 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 5 } } */
+/* { dg-final { scan-assembler-times "sw\t" 5 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 2 } } */
+/* { dg-final { scan-assembler-times "sb\t" 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
new file mode 100644
index 00000000000..c4311c7a8d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define ZERO_N(N)				\
+void zero##N (char *dst)			\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  __builtin_memset (dst, 0, N);			\
+}
+
+/* Emits 1x sd and 1x {sh,sb}.  */
+ZERO_N(11)
+
+/* Emits 1x sd and 1x {sw,sb}.  */
+ZERO_N(13)
+
+/* Emits 1x sd and 1x {sw,sh}.  */
+ZERO_N(14)
+
+/* Emits 1x sd and 1x {sw,sh,sb}.  */
+ZERO_N(15)
+
+/* Emits 2x sd and 1x {sh,sb}.  */
+ZERO_N(19)
+
+/* Emits 2x sd and 1x {sw,sh,sb}.  */
+ZERO_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x sd and 1x {sh,sb}.  */
+ZERO_N(27)
+
+/* Emits 3x sd and 1x {sw,sb}.  */
+ZERO_N(29)
+
+/* Emits 3x sd and 1x {sw,sh,sb}.  */
+ZERO_N(31)
+
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memset-overlapping.c b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c
new file mode 100644
index 00000000000..793766b5262
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define ZERO_N(N)				\
+void zero##N (char *dst)			\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  __builtin_memset (dst, 0, N);			\
+}
+
+/* Emits 1x sd and 1x sw.  */
+ZERO_N(11)
+
+/* Emits 2x sd.  */
+ZERO_N(13)
+
+/* Emits 2x sd.  */
+ZERO_N(14)
+
+/* Emits 2x sd.  */
+ZERO_N(15)
+
+/* Emits 2x sd and 1x sw.  */
+ZERO_N(19)
+
+/* Emits 3x sd.  */
+ZERO_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x sd and 1x sw.  */
+ZERO_N(27)
+
+/* Emits 4x sd.  */
+ZERO_N(29)
+
+/* Emits 4x sd.  */
+ZERO_N(31)
+
+/* { dg-final { scan-assembler-times "sd\t" 23 } } */
+/* { dg-final { scan-assembler-times "sw\t" 3 } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Enable overlap-by-pieces via tune param
@ 2022-11-17 22:27 Philipp Tomsich
  0 siblings, 0 replies; 7+ messages in thread
From: Philipp Tomsich @ 2022-11-17 22:27 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8071a7ddae9c7fd58d78f71359f97b4f8dbc5d83

commit 8071a7ddae9c7fd58d78f71359f97b4f8dbc5d83
Author: Christoph Müllner <christoph.muellner@vrull.eu>
Date:   Wed Oct 5 02:10:14 2022 +0200

    riscv: Enable overlap-by-pieces via tune param
    
    This patch adds the field overlap_op_by_pieces to the struct
    riscv_tune_param, which allows to enable the overlap_op_by_pieces
    infrastructure.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv.c (struct riscv_tune_param): New field.
            (riscv_overlap_op_by_pieces): New function.
            (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
            riscv_overlap_op_by_pieces.
    
    Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>

Diff:
---
 gcc/config/riscv/riscv.cc                          | 17 ++++++-
 .../gcc.target/riscv/memcpy-nonoverlapping.c       | 53 ++++++++++++++++++++++
 .../riscv/memcpy-overlapping-strictalign.c         | 53 ++++++++++++++++++++++
 .../gcc.target/riscv/memcpy-overlapping.c          | 50 ++++++++++++++++++++
 .../gcc.target/riscv/memset-nonoverlapping.c       | 45 ++++++++++++++++++
 .../gcc.target/riscv/memset-overlapping.c          | 43 ++++++++++++++++++
 6 files changed, 260 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index e4fb53c46f1..f93ffae34c9 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -250,6 +250,7 @@ struct riscv_tune_param
   unsigned short fmv_cost;
   bool slow_unaligned_access;
   unsigned int fusible_ops;
+  bool overlap_op_by_pieces;
 };
 
 /* Information about one micro-arch we know about.  */
@@ -338,6 +339,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -353,6 +355,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -368,6 +371,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
   8,		/* fmv_cost */
   false,            /* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for size.  */
@@ -383,6 +387,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
   8,						/* fmv_cost */
   false,					/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for Ventana Micro VT1.  */
@@ -400,7 +405,8 @@ static const struct riscv_tune_param ventana_vt1_tune_info = {
   ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |       /* fusible_ops */
     RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
     RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
-    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
+    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD ),
+  true,						/* overlap_op_by_pieces */
 };
 
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
@@ -6649,6 +6655,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
   return riscv_slow_unaligned_access_p;
 }
 
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+  return tune_param->overlap_op_by_pieces && !TARGET_STRICT_ALIGN;
+}
+
 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
 
 static bool
@@ -7211,6 +7223,9 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
 #undef TARGET_SLOW_UNALIGNED_ACCESS
 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
 
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
 #undef TARGET_SECONDARY_MEMORY_NEEDED
 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
 
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
new file mode 100644
index 00000000000..fb84d14e505
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(11)
+
+/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(13)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}.  */
+COPY_N(14)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(19)
+
+/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 17 } } */
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+
+/* { dg-final { scan-assembler-times "lhu\t" 7 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 8 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c
new file mode 100644
index 00000000000..a3ad971edb3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64 -mstrict-align" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(11)
+
+/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(13)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}.  */
+COPY_N(14)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(19)
+
+/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 17 } } */
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+
+/* { dg-final { scan-assembler-times "lhu\t" 7 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 8 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c
new file mode 100644
index 00000000000..ffb7248bfd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lw,sw}.  */
+COPY_N(11)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(13)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(14)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lw,sw}.  */
+COPY_N(19)
+
+/* Emits 3x ld and 3x sd.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 2x {lw,sw}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 21 } } */
+/* { dg-final { scan-assembler-times "sd\t" 21 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 5 } } */
+/* { dg-final { scan-assembler-times "sw\t" 5 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 2 } } */
+/* { dg-final { scan-assembler-times "sb\t" 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
new file mode 100644
index 00000000000..c4311c7a8d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define ZERO_N(N)				\
+void zero##N (char *dst)			\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  __builtin_memset (dst, 0, N);			\
+}
+
+/* Emits 1x sd and 1x {sh,sb}.  */
+ZERO_N(11)
+
+/* Emits 1x sd and 1x {sw,sb}.  */
+ZERO_N(13)
+
+/* Emits 1x sd and 1x {sw,sh}.  */
+ZERO_N(14)
+
+/* Emits 1x sd and 1x {sw,sh,sb}.  */
+ZERO_N(15)
+
+/* Emits 2x sd and 1x {sh,sb}.  */
+ZERO_N(19)
+
+/* Emits 2x sd and 1x {sw,sh,sb}.  */
+ZERO_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x sd and 1x {sh,sb}.  */
+ZERO_N(27)
+
+/* Emits 3x sd and 1x {sw,sb}.  */
+ZERO_N(29)
+
+/* Emits 3x sd and 1x {sw,sh,sb}.  */
+ZERO_N(31)
+
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memset-overlapping.c b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c
new file mode 100644
index 00000000000..793766b5262
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define ZERO_N(N)				\
+void zero##N (char *dst)			\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  __builtin_memset (dst, 0, N);			\
+}
+
+/* Emits 1x sd and 1x sw.  */
+ZERO_N(11)
+
+/* Emits 2x sd.  */
+ZERO_N(13)
+
+/* Emits 2x sd.  */
+ZERO_N(14)
+
+/* Emits 2x sd.  */
+ZERO_N(15)
+
+/* Emits 2x sd and 1x sw.  */
+ZERO_N(19)
+
+/* Emits 3x sd.  */
+ZERO_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x sd and 1x sw.  */
+ZERO_N(27)
+
+/* Emits 4x sd.  */
+ZERO_N(29)
+
+/* Emits 4x sd.  */
+ZERO_N(31)
+
+/* { dg-final { scan-assembler-times "sd\t" 23 } } */
+/* { dg-final { scan-assembler-times "sw\t" 3 } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Enable overlap-by-pieces via tune param
@ 2022-11-15 14:02 Philipp Tomsich
  0 siblings, 0 replies; 7+ messages in thread
From: Philipp Tomsich @ 2022-11-15 14:02 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:3bfdd3d858eaabdaff5e4d0b883ae0073bab7740

commit 3bfdd3d858eaabdaff5e4d0b883ae0073bab7740
Author: Christoph Müllner <christoph.muellner@vrull.eu>
Date:   Wed Oct 5 02:10:14 2022 +0200

    riscv: Enable overlap-by-pieces via tune param
    
    This patch adds the field overlap_op_by_pieces to the struct
    riscv_tune_param, which allows to enable the overlap_op_by_pieces
    infrastructure.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv.c (struct riscv_tune_param): New field.
            (riscv_overlap_op_by_pieces): New function.
            (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
            riscv_overlap_op_by_pieces.
    
    Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>

Diff:
---
 gcc/config/riscv/riscv.cc                          | 17 ++++++-
 .../gcc.target/riscv/memcpy-nonoverlapping.c       | 53 ++++++++++++++++++++++
 .../riscv/memcpy-overlapping-strictalign.c         | 53 ++++++++++++++++++++++
 .../gcc.target/riscv/memcpy-overlapping.c          | 50 ++++++++++++++++++++
 .../gcc.target/riscv/memset-nonoverlapping.c       | 45 ++++++++++++++++++
 .../gcc.target/riscv/memset-overlapping.c          | 43 ++++++++++++++++++
 6 files changed, 260 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0d0ed0ed31b..82382450a0d 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -243,6 +243,7 @@ struct riscv_tune_param
   unsigned short fmv_cost;
   bool slow_unaligned_access;
   unsigned int fusible_ops;
+  bool overlap_op_by_pieces;
 };
 
 /* Information about one micro-arch we know about.  */
@@ -331,6 +332,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -346,6 +348,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -361,6 +364,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
   8,		/* fmv_cost */
   false,            /* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for size.  */
@@ -376,6 +380,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
   8,						/* fmv_cost */
   false,					/* slow_unaligned_access */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for Ventana Micro VT1.  */
@@ -393,7 +398,8 @@ static const struct riscv_tune_param ventana_vt1_tune_info = {
   ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |       /* fusible_ops */
     RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
     RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
-    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
+    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD ),
+  true,						/* overlap_op_by_pieces */
 };
 
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
@@ -6480,6 +6486,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
   return riscv_slow_unaligned_access_p;
 }
 
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+  return tune_param->overlap_op_by_pieces && !TARGET_STRICT_ALIGN;
+}
+
 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
 
 static bool
@@ -7018,6 +7030,9 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
 #undef TARGET_SLOW_UNALIGNED_ACCESS
 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
 
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
 #undef TARGET_SECONDARY_MEMORY_NEEDED
 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
 
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
new file mode 100644
index 00000000000..fb84d14e505
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(11)
+
+/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(13)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}.  */
+COPY_N(14)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(19)
+
+/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 17 } } */
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+
+/* { dg-final { scan-assembler-times "lhu\t" 7 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 8 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c
new file mode 100644
index 00000000000..a3ad971edb3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64 -mstrict-align" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(11)
+
+/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(13)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}.  */
+COPY_N(14)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(19)
+
+/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 17 } } */
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+
+/* { dg-final { scan-assembler-times "lhu\t" 7 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 8 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c
new file mode 100644
index 00000000000..ffb7248bfd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N)				\
+void copy##N (char *src, char *dst)		\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  src = __builtin_assume_aligned (src, 4096);	\
+  __builtin_memcpy (dst, src, N);		\
+}
+
+/* Emits 1x {ld,sd} and 1x {lw,sw}.  */
+COPY_N(11)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(13)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(14)
+
+/* Emits 2x {ld,sd}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lw,sw}.  */
+COPY_N(19)
+
+/* Emits 3x ld and 3x sd.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 2x {lw,sw}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 21 } } */
+/* { dg-final { scan-assembler-times "sd\t" 21 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 5 } } */
+/* { dg-final { scan-assembler-times "sw\t" 5 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 2 } } */
+/* { dg-final { scan-assembler-times "sb\t" 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
new file mode 100644
index 00000000000..c4311c7a8d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define ZERO_N(N)				\
+void zero##N (char *dst)			\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  __builtin_memset (dst, 0, N);			\
+}
+
+/* Emits 1x sd and 1x {sh,sb}.  */
+ZERO_N(11)
+
+/* Emits 1x sd and 1x {sw,sb}.  */
+ZERO_N(13)
+
+/* Emits 1x sd and 1x {sw,sh}.  */
+ZERO_N(14)
+
+/* Emits 1x sd and 1x {sw,sh,sb}.  */
+ZERO_N(15)
+
+/* Emits 2x sd and 1x {sh,sb}.  */
+ZERO_N(19)
+
+/* Emits 2x sd and 1x {sw,sh,sb}.  */
+ZERO_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x sd and 1x {sh,sb}.  */
+ZERO_N(27)
+
+/* Emits 3x sd and 1x {sw,sb}.  */
+ZERO_N(29)
+
+/* Emits 3x sd and 1x {sw,sh,sb}.  */
+ZERO_N(31)
+
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memset-overlapping.c b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c
new file mode 100644
index 00000000000..793766b5262
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define ZERO_N(N)				\
+void zero##N (char *dst)			\
+{						\
+  dst = __builtin_assume_aligned (dst, 4096);	\
+  __builtin_memset (dst, 0, N);			\
+}
+
+/* Emits 1x sd and 1x sw.  */
+ZERO_N(11)
+
+/* Emits 2x sd.  */
+ZERO_N(13)
+
+/* Emits 2x sd.  */
+ZERO_N(14)
+
+/* Emits 2x sd.  */
+ZERO_N(15)
+
+/* Emits 2x sd and 1x sw.  */
+ZERO_N(19)
+
+/* Emits 3x sd.  */
+ZERO_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x sd and 1x sw.  */
+ZERO_N(27)
+
+/* Emits 4x sd.  */
+ZERO_N(29)
+
+/* Emits 4x sd.  */
+ZERO_N(31)
+
+/* { dg-final { scan-assembler-times "sd\t" 23 } } */
+/* { dg-final { scan-assembler-times "sw\t" 3 } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-12-01 13:24 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-15 15:01 [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Enable overlap-by-pieces via tune param Philipp Tomsich
  -- strict thread matches above, loose matches on Subject: below --
2022-12-01 13:24 Philipp Tomsich
2022-11-18 20:26 Philipp Tomsich
2022-11-18 20:23 Philipp Tomsich
2022-11-18 11:36 Philipp Tomsich
2022-11-17 22:27 Philipp Tomsich
2022-11-15 14:02 Philipp Tomsich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).