From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1923) id AF49B384F6C0; Fri, 18 Nov 2022 20:23:43 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org AF49B384F6C0 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1668803023; bh=ywcQH1EORXCrkhQGMkF+YWYm8XguX6akZbuuD0iyoxs=; h=From:To:Subject:Date:From; b=iF0RWSY3fxc62K/5BdkEfdzrXNGwzLHCrPHC42pernhdvsy2huSljhzd26Hk3R+BZ kyPjOflshqrtzCFFA+H23r22Z7HOSB15wzKsIfVtGGPqW/1L1d0XmT2Pnr64x4PB1H pXUz2VkUa0Q6kINON2C2xCgqGQnyKrGTYBTYOybA= MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="utf-8" From: Philipp Tomsich To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Enable overlap-by-pieces via tune param X-Act-Checkin: gcc X-Git-Author: =?utf-8?q?Christoph_M=C3=BCllner?= X-Git-Refname: refs/vendors/vrull/heads/for-upstream X-Git-Oldrev: 74922e227e22a451c2ddcd9abf5b7fb4f16da2a5 X-Git-Newrev: 8f6a6b1e2336d737889103ec28c2a42f52037423 Message-Id: <20221118202343.AF49B384F6C0@sourceware.org> Date: Fri, 18 Nov 2022 20:23:43 +0000 (GMT) List-Id: https://gcc.gnu.org/g:8f6a6b1e2336d737889103ec28c2a42f52037423 commit 8f6a6b1e2336d737889103ec28c2a42f52037423 Author: Christoph Müllner Date: Wed Oct 5 02:10:14 2022 +0200 riscv: Enable overlap-by-pieces via tune param This patch adds the field overlap_op_by_pieces to the struct riscv_tune_param, which allows to enable the overlap_op_by_pieces infrastructure. gcc/ChangeLog: * config/riscv/riscv.c (struct riscv_tune_param): New field. (riscv_overlap_op_by_pieces): New function. (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to riscv_overlap_op_by_pieces. Signed-off-by: Christoph Müllner Diff: --- gcc/config/riscv/riscv.cc | 17 ++++++- .../gcc.target/riscv/memcpy-nonoverlapping.c | 53 ++++++++++++++++++++++ .../riscv/memcpy-overlapping-strictalign.c | 53 ++++++++++++++++++++++ .../gcc.target/riscv/memcpy-overlapping.c | 50 ++++++++++++++++++++ .../gcc.target/riscv/memset-nonoverlapping.c | 45 ++++++++++++++++++ .../gcc.target/riscv/memset-overlapping.c | 43 ++++++++++++++++++ 6 files changed, 260 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 69e86a59f60..2d973c38ab4 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -250,6 +250,7 @@ struct riscv_tune_param unsigned short fmv_cost; bool slow_unaligned_access; unsigned int fusible_ops; + bool overlap_op_by_pieces; }; /* Information about one micro-arch we know about. */ @@ -338,6 +339,7 @@ static const struct riscv_tune_param rocket_tune_info = { 8, /* fmv_cost */ true, /* slow_unaligned_access */ RISCV_FUSE_NOTHING, /* fusible_ops */ + false, /* overlap_op_by_pieces */ }; /* Costs to use when optimizing for Sifive 7 Series. */ @@ -353,6 +355,7 @@ static const struct riscv_tune_param sifive_7_tune_info = { 8, /* fmv_cost */ true, /* slow_unaligned_access */ RISCV_FUSE_NOTHING, /* fusible_ops */ + false, /* overlap_op_by_pieces */ }; /* Costs to use when optimizing for T-HEAD c906. */ @@ -368,6 +371,7 @@ static const struct riscv_tune_param thead_c906_tune_info = { 8, /* fmv_cost */ false, /* slow_unaligned_access */ RISCV_FUSE_NOTHING, /* fusible_ops */ + false, /* overlap_op_by_pieces */ }; /* Costs to use when optimizing for size. */ @@ -383,6 +387,7 @@ static const struct riscv_tune_param optimize_size_tune_info = { 8, /* fmv_cost */ false, /* slow_unaligned_access */ RISCV_FUSE_NOTHING, /* fusible_ops */ + false, /* overlap_op_by_pieces */ }; /* Costs to use when optimizing for Ventana Micro VT1. */ @@ -400,7 +405,8 @@ static const struct riscv_tune_param ventana_vt1_tune_info = { ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH | /* fusible_ops */ RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED | RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI | - RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD ) + RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD ), + true, /* overlap_op_by_pieces */ }; static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *); @@ -6649,6 +6655,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int) return riscv_slow_unaligned_access_p; } +static bool +riscv_overlap_op_by_pieces (void) +{ + return tune_param->overlap_op_by_pieces && !TARGET_STRICT_ALIGN; +} + /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ static bool @@ -7212,6 +7224,9 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask) #undef TARGET_SLOW_UNALIGNED_ACCESS #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access +#undef TARGET_OVERLAP_OP_BY_PIECES_P +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces + #undef TARGET_SECONDARY_MEMORY_NEEDED #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c new file mode 100644 index 00000000000..fb84d14e505 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c @@ -0,0 +1,53 @@ +/* { dg-do compile } */ +/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */ + +#define COPY_N(N) \ +void copy##N (char *src, char *dst) \ +{ \ + dst = __builtin_assume_aligned (dst, 4096); \ + src = __builtin_assume_aligned (src, 4096); \ + __builtin_memcpy (dst, src, N); \ +} + +/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}. */ +COPY_N(11) + +/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}. */ +COPY_N(13) + +/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}. */ +COPY_N(14) + +/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}. */ +COPY_N(15) + +/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}. */ +COPY_N(19) + +/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}. */ +COPY_N(23) + +/* The by-pieces infrastructure handles up to 24 bytes. + So the code below is emitted via cpymemsi/block_move_straight. */ + +/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}. */ +COPY_N(27) + +/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}. */ +COPY_N(29) + +/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}. */ +COPY_N(31) + +/* { dg-final { scan-assembler-times "ld\t" 17 } } */ +/* { dg-final { scan-assembler-times "sd\t" 17 } } */ + +/* { dg-final { scan-assembler-times "lw\t" 6 } } */ +/* { dg-final { scan-assembler-times "sw\t" 6 } } */ + +/* { dg-final { scan-assembler-times "lhu\t" 7 } } */ +/* { dg-final { scan-assembler-times "sh\t" 7 } } */ + +/* { dg-final { scan-assembler-times "lbu\t" 8 } } */ +/* { dg-final { scan-assembler-times "sb\t" 8 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c new file mode 100644 index 00000000000..a3ad971edb3 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping-strictalign.c @@ -0,0 +1,53 @@ +/* { dg-do compile } */ +/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64 -mstrict-align" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */ + +#define COPY_N(N) \ +void copy##N (char *src, char *dst) \ +{ \ + dst = __builtin_assume_aligned (dst, 4096); \ + src = __builtin_assume_aligned (src, 4096); \ + __builtin_memcpy (dst, src, N); \ +} + +/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}. */ +COPY_N(11) + +/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}. */ +COPY_N(13) + +/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}. */ +COPY_N(14) + +/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}. */ +COPY_N(15) + +/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}. */ +COPY_N(19) + +/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}. */ +COPY_N(23) + +/* The by-pieces infrastructure handles up to 24 bytes. + So the code below is emitted via cpymemsi/block_move_straight. */ + +/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}. */ +COPY_N(27) + +/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}. */ +COPY_N(29) + +/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}. */ +COPY_N(31) + +/* { dg-final { scan-assembler-times "ld\t" 17 } } */ +/* { dg-final { scan-assembler-times "sd\t" 17 } } */ + +/* { dg-final { scan-assembler-times "lw\t" 6 } } */ +/* { dg-final { scan-assembler-times "sw\t" 6 } } */ + +/* { dg-final { scan-assembler-times "lhu\t" 7 } } */ +/* { dg-final { scan-assembler-times "sh\t" 7 } } */ + +/* { dg-final { scan-assembler-times "lbu\t" 8 } } */ +/* { dg-final { scan-assembler-times "sb\t" 8 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c new file mode 100644 index 00000000000..ffb7248bfd1 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/memcpy-overlapping.c @@ -0,0 +1,50 @@ +/* { dg-do compile } */ +/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */ + +#define COPY_N(N) \ +void copy##N (char *src, char *dst) \ +{ \ + dst = __builtin_assume_aligned (dst, 4096); \ + src = __builtin_assume_aligned (src, 4096); \ + __builtin_memcpy (dst, src, N); \ +} + +/* Emits 1x {ld,sd} and 1x {lw,sw}. */ +COPY_N(11) + +/* Emits 2x {ld,sd}. */ +COPY_N(13) + +/* Emits 2x {ld,sd}. */ +COPY_N(14) + +/* Emits 2x {ld,sd}. */ +COPY_N(15) + +/* Emits 2x {ld,sd} and 1x {lw,sw}. */ +COPY_N(19) + +/* Emits 3x ld and 3x sd. */ +COPY_N(23) + +/* The by-pieces infrastructure handles up to 24 bytes. + So the code below is emitted via cpymemsi/block_move_straight. */ + +/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}. */ +COPY_N(27) + +/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}. */ +COPY_N(29) + +/* Emits 3x {ld,sd} and 2x {lw,sw}. */ +COPY_N(31) + +/* { dg-final { scan-assembler-times "ld\t" 21 } } */ +/* { dg-final { scan-assembler-times "sd\t" 21 } } */ + +/* { dg-final { scan-assembler-times "lw\t" 5 } } */ +/* { dg-final { scan-assembler-times "sw\t" 5 } } */ + +/* { dg-final { scan-assembler-times "lbu\t" 2 } } */ +/* { dg-final { scan-assembler-times "sb\t" 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c new file mode 100644 index 00000000000..c4311c7a8d0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ +/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */ + +#define ZERO_N(N) \ +void zero##N (char *dst) \ +{ \ + dst = __builtin_assume_aligned (dst, 4096); \ + __builtin_memset (dst, 0, N); \ +} + +/* Emits 1x sd and 1x {sh,sb}. */ +ZERO_N(11) + +/* Emits 1x sd and 1x {sw,sb}. */ +ZERO_N(13) + +/* Emits 1x sd and 1x {sw,sh}. */ +ZERO_N(14) + +/* Emits 1x sd and 1x {sw,sh,sb}. */ +ZERO_N(15) + +/* Emits 2x sd and 1x {sh,sb}. */ +ZERO_N(19) + +/* Emits 2x sd and 1x {sw,sh,sb}. */ +ZERO_N(23) + +/* The by-pieces infrastructure handles up to 24 bytes. + So the code below is emitted via cpymemsi/block_move_straight. */ + +/* Emits 3x sd and 1x {sh,sb}. */ +ZERO_N(27) + +/* Emits 3x sd and 1x {sw,sb}. */ +ZERO_N(29) + +/* Emits 3x sd and 1x {sw,sh,sb}. */ +ZERO_N(31) + +/* { dg-final { scan-assembler-times "sd\t" 17 } } */ +/* { dg-final { scan-assembler-times "sw\t" 6 } } */ +/* { dg-final { scan-assembler-times "sh\t" 7 } } */ +/* { dg-final { scan-assembler-times "sb\t" 8 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/memset-overlapping.c b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c new file mode 100644 index 00000000000..793766b5262 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/memset-overlapping.c @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options "-mcpu=ventana-vt1 -march=rv64gc -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */ + +#define ZERO_N(N) \ +void zero##N (char *dst) \ +{ \ + dst = __builtin_assume_aligned (dst, 4096); \ + __builtin_memset (dst, 0, N); \ +} + +/* Emits 1x sd and 1x sw. */ +ZERO_N(11) + +/* Emits 2x sd. */ +ZERO_N(13) + +/* Emits 2x sd. */ +ZERO_N(14) + +/* Emits 2x sd. */ +ZERO_N(15) + +/* Emits 2x sd and 1x sw. */ +ZERO_N(19) + +/* Emits 3x sd. */ +ZERO_N(23) + +/* The by-pieces infrastructure handles up to 24 bytes. + So the code below is emitted via cpymemsi/block_move_straight. */ + +/* Emits 3x sd and 1x sw. */ +ZERO_N(27) + +/* Emits 4x sd. */ +ZERO_N(29) + +/* Emits 4x sd. */ +ZERO_N(31) + +/* { dg-final { scan-assembler-times "sd\t" 23 } } */ +/* { dg-final { scan-assembler-times "sw\t" 3 } } */