From: Christoph Muellner <christoph.muellner@vrull.eu>
To: gcc-patches@gcc.gnu.org, Kito Cheng <kito.cheng@sifive.com>,
Jim Wilson <jim.wilson.gcc@gmail.com>,
Palmer Dabbelt <palmer@dabbelt.com>,
Andrew Waterman <andrew@sifive.com>,
Philipp Tomsich <philipp.tomsich@vrull.eu>,
Jeff Law <jeffreyalaw@gmail.com>,
Vineet Gupta <vineetg@rivosinc.com>
Cc: "Christoph Müllner" <christoph.muellner@vrull.eu>
Subject: [PATCH 3/7] riscv: Enable overlap-by-pieces via tune param
Date: Mon, 14 Nov 2022 00:05:17 +0100 [thread overview]
Message-ID: <20221113230521.712693-4-christoph.muellner@vrull.eu> (raw)
In-Reply-To: <20221113230521.712693-1-christoph.muellner@vrull.eu>
From: Christoph Müllner <christoph.muellner@vrull.eu>
This patch adds the field overlap_op_by_pieces to the struct
riscv_tune_param, which allows to enable the overlap_op_by_pieces
infrastructure.
gcc/ChangeLog:
* config/riscv/riscv.c (struct riscv_tune_param): New field.
(riscv_overlap_op_by_pieces): New function.
(TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
riscv_overlap_op_by_pieces.
Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
---
gcc/config/riscv/riscv.cc | 17 +++++-
.../gcc.target/riscv/memcpy-nonoverlapping.c | 54 +++++++++++++++++++
.../gcc.target/riscv/memcpy-overlapping.c | 50 +++++++++++++++++
.../gcc.target/riscv/memset-nonoverlapping.c | 45 ++++++++++++++++
.../gcc.target/riscv/memset-overlapping.c | 43 +++++++++++++++
5 files changed, 208 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
create mode 100644 gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c
create mode 100644 gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
create mode 100644 gcc/testsuite/gcc.target/riscv/memset-overlapping.c
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index a0c00cfb66f..7357cf51cdf 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -243,6 +243,7 @@ struct riscv_tune_param
unsigned short fmv_cost;
bool slow_unaligned_access;
unsigned int fusible_ops;
+ bool overlap_op_by_pieces;
};
/* Information about one micro-arch we know about. */
@@ -331,6 +332,7 @@ static const struct riscv_tune_param rocket_tune_info = {
8, /* fmv_cost */
true, /* slow_unaligned_access */
RISCV_FUSE_NOTHING, /* fusible_ops */
+ false, /* overlap_op_by_pieces */
};
/* Costs to use when optimizing for Sifive 7 Series. */
@@ -346,6 +348,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
8, /* fmv_cost */
true, /* slow_unaligned_access */
RISCV_FUSE_NOTHING, /* fusible_ops */
+ false, /* overlap_op_by_pieces */
};
/* Costs to use when optimizing for T-HEAD c906. */
@@ -361,6 +364,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
8, /* fmv_cost */
false, /* slow_unaligned_access */
RISCV_FUSE_NOTHING, /* fusible_ops */
+ false, /* overlap_op_by_pieces */
};
/* Costs to use when optimizing for size. */
@@ -376,6 +380,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
8, /* fmv_cost */
false, /* slow_unaligned_access */
RISCV_FUSE_NOTHING, /* fusible_ops */
+ false, /* overlap_op_by_pieces */
};
/* Costs to use when optimizing for Ventana Micro VT1. */
@@ -393,7 +398,8 @@ static const struct riscv_tune_param ventana_vt1_tune_info = {
( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH | /* fusible_ops */
RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
- RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
+ RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD ),
+ true, /* overlap_op_by_pieces */
};
static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
@@ -6444,6 +6450,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
return riscv_slow_unaligned_access_p;
}
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+ return tune_param->overlap_op_by_pieces;
+}
+
/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
static bool
@@ -6974,6 +6986,9 @@ riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
#undef TARGET_SLOW_UNALIGNED_ACCESS
#define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
#undef TARGET_SECONDARY_MEMORY_NEEDED
#define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
new file mode 100644
index 00000000000..1c99e13fc26
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+
+#define COPY_N(N) \
+void copy##N (char *src, char *dst) \
+{ \
+ dst = __builtin_assume_aligned (dst, 4096); \
+ src = __builtin_assume_aligned (src, 4096); \
+ __builtin_memcpy (dst, src, N); \
+}
+
+/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}. */
+COPY_N(11)
+
+/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}. */
+COPY_N(13)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}. */
+COPY_N(14)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}. */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}. */
+COPY_N(19)
+
+/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}. */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+ So the code below is emitted via cpymemsi/block_move_straight. */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}. */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}. */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}. */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 17 } } */
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+
+/* { dg-final { scan-assembler-times "lhu\t" 7 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 8 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c
new file mode 100644
index 00000000000..ffb7248bfd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define COPY_N(N) \
+void copy##N (char *src, char *dst) \
+{ \
+ dst = __builtin_assume_aligned (dst, 4096); \
+ src = __builtin_assume_aligned (src, 4096); \
+ __builtin_memcpy (dst, src, N); \
+}
+
+/* Emits 1x {ld,sd} and 1x {lw,sw}. */
+COPY_N(11)
+
+/* Emits 2x {ld,sd}. */
+COPY_N(13)
+
+/* Emits 2x {ld,sd}. */
+COPY_N(14)
+
+/* Emits 2x {ld,sd}. */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lw,sw}. */
+COPY_N(19)
+
+/* Emits 3x ld and 3x sd. */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+ So the code below is emitted via cpymemsi/block_move_straight. */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}. */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}. */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 2x {lw,sw}. */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 21 } } */
+/* { dg-final { scan-assembler-times "sd\t" 21 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 5 } } */
+/* { dg-final { scan-assembler-times "sw\t" 5 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 2 } } */
+/* { dg-final { scan-assembler-times "sb\t" 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
new file mode 100644
index 00000000000..c4311c7a8d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define ZERO_N(N) \
+void zero##N (char *dst) \
+{ \
+ dst = __builtin_assume_aligned (dst, 4096); \
+ __builtin_memset (dst, 0, N); \
+}
+
+/* Emits 1x sd and 1x {sh,sb}. */
+ZERO_N(11)
+
+/* Emits 1x sd and 1x {sw,sb}. */
+ZERO_N(13)
+
+/* Emits 1x sd and 1x {sw,sh}. */
+ZERO_N(14)
+
+/* Emits 1x sd and 1x {sw,sh,sb}. */
+ZERO_N(15)
+
+/* Emits 2x sd and 1x {sh,sb}. */
+ZERO_N(19)
+
+/* Emits 2x sd and 1x {sw,sh,sb}. */
+ZERO_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+ So the code below is emitted via cpymemsi/block_move_straight. */
+
+/* Emits 3x sd and 1x {sh,sb}. */
+ZERO_N(27)
+
+/* Emits 3x sd and 1x {sw,sb}. */
+ZERO_N(29)
+
+/* Emits 3x sd and 1x {sw,sh,sb}. */
+ZERO_N(31)
+
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memset-overlapping.c b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c
new file mode 100644
index 00000000000..793766b5262
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define ZERO_N(N) \
+void zero##N (char *dst) \
+{ \
+ dst = __builtin_assume_aligned (dst, 4096); \
+ __builtin_memset (dst, 0, N); \
+}
+
+/* Emits 1x sd and 1x sw. */
+ZERO_N(11)
+
+/* Emits 2x sd. */
+ZERO_N(13)
+
+/* Emits 2x sd. */
+ZERO_N(14)
+
+/* Emits 2x sd. */
+ZERO_N(15)
+
+/* Emits 2x sd and 1x sw. */
+ZERO_N(19)
+
+/* Emits 3x sd. */
+ZERO_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+ So the code below is emitted via cpymemsi/block_move_straight. */
+
+/* Emits 3x sd and 1x sw. */
+ZERO_N(27)
+
+/* Emits 4x sd. */
+ZERO_N(29)
+
+/* Emits 4x sd. */
+ZERO_N(31)
+
+/* { dg-final { scan-assembler-times "sd\t" 23 } } */
+/* { dg-final { scan-assembler-times "sw\t" 3 } } */
--
2.38.1
next prev parent reply other threads:[~2022-11-13 23:05 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-13 23:05 [PATCH 0/7] riscv: Improve builtins expansion Christoph Muellner
2022-11-13 23:05 ` [PATCH 1/7] riscv: bitmanip: add orc.b as an unspec Christoph Muellner
2022-11-14 16:51 ` Jeff Law
2022-11-14 17:53 ` Jeff Law
2022-11-14 19:05 ` Philipp Tomsich
2022-11-13 23:05 ` [PATCH 2/7] riscv: bitmanip/zbb: Add prefix/postfix and enable visiblity Christoph Muellner
2022-11-14 16:55 ` Jeff Law
2022-11-13 23:05 ` Christoph Muellner [this message]
2022-11-14 2:48 ` [PATCH 3/7] riscv: Enable overlap-by-pieces via tune param Vineet Gupta
2022-11-14 7:59 ` Philipp Tomsich
2022-11-14 8:29 ` Christoph Müllner
2022-11-14 19:04 ` Jeff Law
2022-11-14 19:07 ` Christoph Müllner
2022-11-13 23:05 ` [PATCH 4/7] riscv: Move riscv_block_move_loop to separate file Christoph Muellner
2022-11-14 16:56 ` Jeff Law
2022-11-13 23:05 ` [PATCH 5/7] riscv: Use by-pieces to do overlapping accesses in block_move_straight Christoph Muellner
2022-11-14 17:16 ` Jeff Law
2022-11-14 19:01 ` Christoph Müllner
2022-11-14 19:05 ` Jeff Law
2022-11-13 23:05 ` [PATCH 6/7] riscv: Add support for strlen inline expansion Christoph Muellner
2022-11-14 18:17 ` Jeff Law
2022-11-14 21:07 ` Christoph Müllner
2022-11-13 23:05 ` [PATCH 7/7] riscv: Add support for str(n)cmp " Christoph Muellner
2022-11-14 19:28 ` Jeff Law
2022-11-14 21:49 ` Christoph Müllner
2022-11-15 0:22 ` Jeff Law
2022-11-15 0:46 ` Kito Cheng
2022-11-15 0:53 ` Palmer Dabbelt
2022-11-15 1:55 ` Kito Cheng
2022-11-15 3:41 ` Jeff Law
2022-11-15 22:22 ` Christoph Müllner
2022-11-16 0:15 ` Philipp Tomsich
2022-11-21 3:24 ` Kito Cheng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221113230521.712693-4-christoph.muellner@vrull.eu \
--to=christoph.muellner@vrull.eu \
--cc=andrew@sifive.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=jeffreyalaw@gmail.com \
--cc=jim.wilson.gcc@gmail.com \
--cc=kito.cheng@sifive.com \
--cc=palmer@dabbelt.com \
--cc=philipp.tomsich@vrull.eu \
--cc=vineetg@rivosinc.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).