public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v2 2/5] riscv: vectorized mem* functions
@ 2023-04-21  7:29 Hau Hsu
  0 siblings, 0 replies; 3+ messages in thread
From: Hau Hsu @ 2023-04-21  7:29 UTC (permalink / raw)
  To: libc-alpha, hongrong.hsu, jerry.shih, nick.knight, kito.cheng
  Cc: greentime.hu, alice.chan, andrew, vincent.chen, hau.hsu

From: Jerry Shih <jerry.shih@sifive.com>

This patch proposes implementations of memchr, memcmp, memcpy, memmove,
and memset that leverage the RISC-V V extension (RVV), version 1.0.
These routines assumes VLEN is at least 32 bits, as is required by all
currently defined vector extensions, and they support arbitrarily large
VLEN. All implementations work for both RV32 and RV64 platforms, and
make no assumptions about page size.
---
 sysdeps/riscv/rvv/memchr.S  | 63 +++++++++++++++++++++++++++++++
 sysdeps/riscv/rvv/memcmp.S  | 75 +++++++++++++++++++++++++++++++++++++
 sysdeps/riscv/rvv/memcpy.S  | 51 +++++++++++++++++++++++++
 sysdeps/riscv/rvv/memmove.S | 72 +++++++++++++++++++++++++++++++++++
 sysdeps/riscv/rvv/memset.S  | 51 +++++++++++++++++++++++++
 5 files changed, 312 insertions(+)
 create mode 100644 sysdeps/riscv/rvv/memchr.S
 create mode 100644 sysdeps/riscv/rvv/memcmp.S
 create mode 100644 sysdeps/riscv/rvv/memcpy.S
 create mode 100644 sysdeps/riscv/rvv/memmove.S
 create mode 100644 sysdeps/riscv/rvv/memset.S

diff --git a/sysdeps/riscv/rvv/memchr.S b/sysdeps/riscv/rvv/memchr.S
new file mode 100644
index 0000000000..6981a9f8b0
--- /dev/null
+++ b/sysdeps/riscv/rvv/memchr.S
@@ -0,0 +1,63 @@
+/* RVV versions memchr.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jerry Shih <jerry.shih@sifive.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define iResult a0
+
+#define pSrc a0
+#define iValue a1
+#define iNum a2
+
+#define iVL a3
+#define iTemp a4
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+#define vMask v8
+
+ENTRY(memchr)
+
+L(loop):
+    vsetvli zero, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8ff.v vData, (pSrc)
+    /* Find the iValue inside the loaded data.  */
+    vmseq.vx vMask, vData, iValue
+    vfirst.m iTemp, vMask
+
+    /* Skip the loop if we find the matched value.  */
+    bgez iTemp, L(found)
+
+    csrr iVL, vl
+    sub iNum, iNum, iVL
+    add pSrc, pSrc, iVL
+
+    bnez iNum, L(loop)
+
+    li iResult, 0
+    ret
+
+L(found):
+    add iResult, pSrc, iTemp
+    ret
+
+END(memchr)
+libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/riscv/rvv/memcmp.S b/sysdeps/riscv/rvv/memcmp.S
new file mode 100644
index 0000000000..b156ec524c
--- /dev/null
+++ b/sysdeps/riscv/rvv/memcmp.S
@@ -0,0 +1,75 @@
+/* RVV versions memcmp.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jerry Shih <jerry.shih@sifive.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define iResult a0
+
+#define pSrc1 a0
+#define pSrc2 a1
+#define iNum a2
+
+#define iVL a3
+#define iTemp a4
+#define iTemp1 a5
+#define iTemp2 a6
+
+#define ELEM_LMUL_SETTING m8
+#define vData1 v0
+#define vData2 v8
+#define vMask v16
+
+ENTRY(memcmp)
+
+L(loop):
+    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8.v vData1, (pSrc1)
+    vle8.v vData2, (pSrc2)
+
+    vmsne.vv vMask, vData1, vData2
+    sub iNum, iNum, iVL
+    vfirst.m iTemp, vMask
+
+    /* Skip the loop if we find the different value between pSrc1 and pSrc2.  */
+    bgez iTemp, L(found)
+
+    add pSrc1, pSrc1, iVL
+    add pSrc2, pSrc2, iVL
+
+    bnez iNum, L(loop)
+
+    li iResult, 0
+    ret
+
+L(found):
+    add pSrc1, pSrc1, iTemp
+    add pSrc2, pSrc2, iTemp
+    lbu iTemp1, 0(pSrc1)
+    lbu iTemp2, 0(pSrc2)
+    sub iResult, iTemp1, iTemp2
+    ret
+
+END(memcmp)
+libc_hidden_builtin_def (memcmp)
+weak_alias (memcmp,bcmp)
+strong_alias (memcmp, __memcmpeq)
+libc_hidden_def (__memcmpeq)
+
diff --git a/sysdeps/riscv/rvv/memcpy.S b/sysdeps/riscv/rvv/memcpy.S
new file mode 100644
index 0000000000..de790fbe51
--- /dev/null
+++ b/sysdeps/riscv/rvv/memcpy.S
@@ -0,0 +1,51 @@
+/* RVV versions memcpy.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jerry Shih <jerry.shih@sifive.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define pDst a0
+#define pSrc a1
+#define iNum a2
+
+#define iVL a3
+#define pDstPtr a4
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+
+ENTRY(memcpy)
+
+    mv pDstPtr, pDst
+
+L(loop):
+    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8.v vData, (pSrc)
+    sub iNum, iNum, iVL
+    add pSrc, pSrc, iVL
+    vse8.v vData, (pDstPtr)
+    add pDstPtr, pDstPtr, iVL
+
+    bnez iNum, L(loop)
+
+    ret
+
+END(memcpy)
+libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/riscv/rvv/memmove.S b/sysdeps/riscv/rvv/memmove.S
new file mode 100644
index 0000000000..ed12744064
--- /dev/null
+++ b/sysdeps/riscv/rvv/memmove.S
@@ -0,0 +1,72 @@
+/* RVV versions memmove.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jerry Shih <jerry.shih@sifive.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define pDst a0
+#define pSrc a1
+#define iNum a2
+
+#define iVL a3
+#define pDstPtr a4
+#define pSrcBackwardPtr a5
+#define pDstBackwardPtr a6
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+
+ENTRY(memmove)
+
+    mv pDstPtr, pDst
+
+    /* If pSrc is equal or after pDst, all data in pSrc will be loaded before
+       overwrited for the overlapping case. We could use faster `forward-copy`.  */
+    bgeu pSrc, pDst, L(forward_copy_loop)
+    add pSrcBackwardPtr, pSrc, iNum
+    add pDstBackwardPtr, pDst, iNum
+    /* If pDst inside source data range, we need to use `backward_copy_loop` to
+       handle the overlapping issue.  */
+    bltu pDst, pSrcBackwardPtr, L(backward_copy_loop)
+
+L(forward_copy_loop):
+    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8.v vData, (pSrc)
+    sub iNum, iNum, iVL
+    add pSrc, pSrc, iVL
+    vse8.v vData, (pDstPtr)
+    add pDstPtr, pDstPtr, iVL
+
+    bnez iNum, L(forward_copy_loop)
+    ret
+
+L(backward_copy_loop):
+    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+
+    sub pSrcBackwardPtr, pSrcBackwardPtr, iVL
+    vle8.v vData, (pSrcBackwardPtr)
+    sub iNum, iNum, iVL
+    sub pDstBackwardPtr, pDstBackwardPtr, iVL
+    vse8.v vData, (pDstBackwardPtr)
+    bnez iNum, L(backward_copy_loop)
+    ret
+
+END(memmove)
+libc_hidden_builtin_def (memmove)
diff --git a/sysdeps/riscv/rvv/memset.S b/sysdeps/riscv/rvv/memset.S
new file mode 100644
index 0000000000..3a6c3d0afd
--- /dev/null
+++ b/sysdeps/riscv/rvv/memset.S
@@ -0,0 +1,51 @@
+/* RVV versions memset.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jerry Shih <jerry.shih@sifive.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define pDst a0
+#define iValue a1
+#define iNum a2
+
+#define iVL a3
+#define iTemp a4
+#define pDstPtr a5
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+
+ENTRY(memset)
+
+    mv pDstPtr, pDst
+
+    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+    vmv.v.x vData, iValue
+
+L(loop):
+    vse8.v vData, (pDstPtr)
+    sub iNum, iNum, iVL
+    add pDstPtr, pDstPtr, iVL
+    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+    bnez iNum, L(loop)
+
+    ret
+
+END(memset)
+libc_hidden_builtin_def (memset)
-- 
2.37.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2 2/5] riscv: vectorized mem* functions
  2023-04-21  7:54 ` [PATCH v2 2/5] riscv: vectorized mem* functions Hau Hsu
@ 2023-04-21 12:12   ` Adhemerval Zanella Netto
  0 siblings, 0 replies; 3+ messages in thread
From: Adhemerval Zanella Netto @ 2023-04-21 12:12 UTC (permalink / raw)
  To: Hau Hsu, libc-alpha, hongrong.hsu, jerry.shih, nick.knight, kito.cheng
  Cc: greentime.hu, alice.chan, andrew, vincent.chen



On 21/04/23 04:54, Hau Hsu via Libc-alpha wrote:
> From: Jerry Shih <jerry.shih@sifive.com>
> 
> This patch proposes implementations of memchr, memcmp, memcpy, memmove,
> and memset that leverage the RISC-V V extension (RVV), version 1.0.
> These routines assumes VLEN is at least 32 bits, as is required by all
> currently defined vector extensions, and they support arbitrarily large
> VLEN. All implementations work for both RV32 and RV64 platforms, and
> make no assumptions about page size.

This is not a full review, just some remark skimming through the patch.

> ---
>  sysdeps/riscv/rvv/memchr.S  | 63 +++++++++++++++++++++++++++++++
>  sysdeps/riscv/rvv/memcmp.S  | 75 +++++++++++++++++++++++++++++++++++++
>  sysdeps/riscv/rvv/memcpy.S  | 51 +++++++++++++++++++++++++
>  sysdeps/riscv/rvv/memmove.S | 72 +++++++++++++++++++++++++++++++++++
>  sysdeps/riscv/rvv/memset.S  | 51 +++++++++++++++++++++++++
>  5 files changed, 312 insertions(+)
>  create mode 100644 sysdeps/riscv/rvv/memchr.S
>  create mode 100644 sysdeps/riscv/rvv/memcmp.S
>  create mode 100644 sysdeps/riscv/rvv/memcpy.S
>  create mode 100644 sysdeps/riscv/rvv/memmove.S
>  create mode 100644 sysdeps/riscv/rvv/memset.S
> 
> diff --git a/sysdeps/riscv/rvv/memchr.S b/sysdeps/riscv/rvv/memchr.S
> new file mode 100644
> index 0000000000..6981a9f8b0
> --- /dev/null
> +++ b/sysdeps/riscv/rvv/memchr.S
> @@ -0,0 +1,63 @@
> +/* RVV versions memchr.  RISC-V version.
> +   Copyright (C) 2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +   Contributed by Jerry Shih <jerry.shih@sifive.com>.

We don't use 'Contributed by' anymore.

> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <sys/asm.h>
> +
> +#define iResult a0
> +
> +#define pSrc a0
> +#define iValue a1
> +#define iNum a2
> +
> +#define iVL a3
> +#define iTemp a4
> +
> +#define ELEM_LMUL_SETTING m8
> +#define vData v0
> +#define vMask v8

We avoid to use camelcase, even for assembly implementations.

> +
> +ENTRY(memchr)
> +
> +L(loop):
> +    vsetvli zero, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> +
> +    vle8ff.v vData, (pSrc)
> +    /* Find the iValue inside the loaded data.  */
> +    vmseq.vx vMask, vData, iValue
> +    vfirst.m iTemp, vMask
> +
> +    /* Skip the loop if we find the matched value.  */
> +    bgez iTemp, L(found)
> +
> +    csrr iVL, vl
> +    sub iNum, iNum, iVL
> +    add pSrc, pSrc, iVL
> +
> +    bnez iNum, L(loop)
> +
> +    li iResult, 0
> +    ret
> +
> +L(found):
> +    add iResult, pSrc, iTemp
> +    ret
> +
> +END(memchr)
> +libc_hidden_builtin_def (memchr)
> diff --git a/sysdeps/riscv/rvv/memcmp.S b/sysdeps/riscv/rvv/memcmp.S
> new file mode 100644
> index 0000000000..b156ec524c
> --- /dev/null
> +++ b/sysdeps/riscv/rvv/memcmp.S
> @@ -0,0 +1,75 @@
> +/* RVV versions memcmp.  RISC-V version.
> +   Copyright (C) 2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +   Contributed by Jerry Shih <jerry.shih@sifive.com>.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <sys/asm.h>
> +
> +#define iResult a0
> +
> +#define pSrc1 a0
> +#define pSrc2 a1
> +#define iNum a2
> +
> +#define iVL a3
> +#define iTemp a4
> +#define iTemp1 a5
> +#define iTemp2 a6
> +
> +#define ELEM_LMUL_SETTING m8
> +#define vData1 v0
> +#define vData2 v8
> +#define vMask v16
> +
> +ENTRY(memcmp)
> +
> +L(loop):
> +    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> +
> +    vle8.v vData1, (pSrc1)
> +    vle8.v vData2, (pSrc2)
> +
> +    vmsne.vv vMask, vData1, vData2
> +    sub iNum, iNum, iVL
> +    vfirst.m iTemp, vMask
> +
> +    /* Skip the loop if we find the different value between pSrc1 and pSrc2.  */
> +    bgez iTemp, L(found)
> +
> +    add pSrc1, pSrc1, iVL
> +    add pSrc2, pSrc2, iVL
> +
> +    bnez iNum, L(loop)
> +
> +    li iResult, 0
> +    ret
> +
> +L(found):
> +    add pSrc1, pSrc1, iTemp
> +    add pSrc2, pSrc2, iTemp
> +    lbu iTemp1, 0(pSrc1)
> +    lbu iTemp2, 0(pSrc2)
> +    sub iResult, iTemp1, iTemp2
> +    ret
> +
> +END(memcmp)
> +libc_hidden_builtin_def (memcmp)
> +weak_alias (memcmp,bcmp)
> +strong_alias (memcmp, __memcmpeq)
> +libc_hidden_def (__memcmpeq)
> +
> diff --git a/sysdeps/riscv/rvv/memcpy.S b/sysdeps/riscv/rvv/memcpy.S
> new file mode 100644
> index 0000000000..de790fbe51
> --- /dev/null
> +++ b/sysdeps/riscv/rvv/memcpy.S
> @@ -0,0 +1,51 @@
> +/* RVV versions memcpy.  RISC-V version.
> +   Copyright (C) 2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +   Contributed by Jerry Shih <jerry.shih@sifive.com>.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <sys/asm.h>
> +
> +#define pDst a0
> +#define pSrc a1
> +#define iNum a2
> +
> +#define iVL a3
> +#define pDstPtr a4
> +
> +#define ELEM_LMUL_SETTING m8
> +#define vData v0
> +
> +ENTRY(memcpy)
> +
> +    mv pDstPtr, pDst
> +
> +L(loop):
> +    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> +
> +    vle8.v vData, (pSrc)
> +    sub iNum, iNum, iVL
> +    add pSrc, pSrc, iVL
> +    vse8.v vData, (pDstPtr)
> +    add pDstPtr, pDstPtr, iVL
> +
> +    bnez iNum, L(loop)
> +
> +    ret
> +
> +END(memcpy)
> +libc_hidden_builtin_def (memcpy)
> diff --git a/sysdeps/riscv/rvv/memmove.S b/sysdeps/riscv/rvv/memmove.S
> new file mode 100644
> index 0000000000..ed12744064
> --- /dev/null
> +++ b/sysdeps/riscv/rvv/memmove.S
> @@ -0,0 +1,72 @@
> +/* RVV versions memmove.  RISC-V version.
> +   Copyright (C) 2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +   Contributed by Jerry Shih <jerry.shih@sifive.com>.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <sys/asm.h>
> +
> +#define pDst a0
> +#define pSrc a1
> +#define iNum a2
> +
> +#define iVL a3
> +#define pDstPtr a4
> +#define pSrcBackwardPtr a5
> +#define pDstBackwardPtr a6
> +
> +#define ELEM_LMUL_SETTING m8
> +#define vData v0
> +
> +ENTRY(memmove)
> +
> +    mv pDstPtr, pDst
> +
> +    /* If pSrc is equal or after pDst, all data in pSrc will be loaded before
> +       overwrited for the overlapping case. We could use faster `forward-copy`.  */
> +    bgeu pSrc, pDst, L(forward_copy_loop)
> +    add pSrcBackwardPtr, pSrc, iNum
> +    add pDstBackwardPtr, pDst, iNum
> +    /* If pDst inside source data range, we need to use `backward_copy_loop` to
> +       handle the overlapping issue.  */
> +    bltu pDst, pSrcBackwardPtr, L(backward_copy_loop)
> +
> +L(forward_copy_loop):
> +    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> +
> +    vle8.v vData, (pSrc)
> +    sub iNum, iNum, iVL
> +    add pSrc, pSrc, iVL
> +    vse8.v vData, (pDstPtr)
> +    add pDstPtr, pDstPtr, iVL
> +
> +    bnez iNum, L(forward_copy_loop)
> +    ret
> +
> +L(backward_copy_loop):
> +    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> +
> +    sub pSrcBackwardPtr, pSrcBackwardPtr, iVL
> +    vle8.v vData, (pSrcBackwardPtr)
> +    sub iNum, iNum, iVL
> +    sub pDstBackwardPtr, pDstBackwardPtr, iVL
> +    vse8.v vData, (pDstBackwardPtr)
> +    bnez iNum, L(backward_copy_loop)
> +    ret
> +
> +END(memmove)
> +libc_hidden_builtin_def (memmove)
> diff --git a/sysdeps/riscv/rvv/memset.S b/sysdeps/riscv/rvv/memset.S
> new file mode 100644
> index 0000000000..3a6c3d0afd
> --- /dev/null
> +++ b/sysdeps/riscv/rvv/memset.S
> @@ -0,0 +1,51 @@
> +/* RVV versions memset.  RISC-V version.
> +   Copyright (C) 2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +   Contributed by Jerry Shih <jerry.shih@sifive.com>.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <sys/asm.h>
> +
> +#define pDst a0
> +#define iValue a1
> +#define iNum a2
> +
> +#define iVL a3
> +#define iTemp a4
> +#define pDstPtr a5
> +
> +#define ELEM_LMUL_SETTING m8
> +#define vData v0
> +
> +ENTRY(memset)
> +
> +    mv pDstPtr, pDst
> +
> +    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> +    vmv.v.x vData, iValue
> +
> +L(loop):
> +    vse8.v vData, (pDstPtr)
> +    sub iNum, iNum, iVL
> +    add pDstPtr, pDstPtr, iVL
> +    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> +    bnez iNum, L(loop)
> +
> +    ret
> +
> +END(memset)
> +libc_hidden_builtin_def (memset)

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH v2 2/5] riscv: vectorized mem* functions
  2023-04-21  7:54 [PATCH v2 0/5] riscv: Vectorized mem*/str* function Hau Hsu
@ 2023-04-21  7:54 ` Hau Hsu
  2023-04-21 12:12   ` Adhemerval Zanella Netto
  0 siblings, 1 reply; 3+ messages in thread
From: Hau Hsu @ 2023-04-21  7:54 UTC (permalink / raw)
  To: libc-alpha, hongrong.hsu, jerry.shih, nick.knight, kito.cheng
  Cc: greentime.hu, alice.chan, andrew, vincent.chen, hau.hsu

From: Jerry Shih <jerry.shih@sifive.com>

This patch proposes implementations of memchr, memcmp, memcpy, memmove,
and memset that leverage the RISC-V V extension (RVV), version 1.0.
These routines assumes VLEN is at least 32 bits, as is required by all
currently defined vector extensions, and they support arbitrarily large
VLEN. All implementations work for both RV32 and RV64 platforms, and
make no assumptions about page size.
---
 sysdeps/riscv/rvv/memchr.S  | 63 +++++++++++++++++++++++++++++++
 sysdeps/riscv/rvv/memcmp.S  | 75 +++++++++++++++++++++++++++++++++++++
 sysdeps/riscv/rvv/memcpy.S  | 51 +++++++++++++++++++++++++
 sysdeps/riscv/rvv/memmove.S | 72 +++++++++++++++++++++++++++++++++++
 sysdeps/riscv/rvv/memset.S  | 51 +++++++++++++++++++++++++
 5 files changed, 312 insertions(+)
 create mode 100644 sysdeps/riscv/rvv/memchr.S
 create mode 100644 sysdeps/riscv/rvv/memcmp.S
 create mode 100644 sysdeps/riscv/rvv/memcpy.S
 create mode 100644 sysdeps/riscv/rvv/memmove.S
 create mode 100644 sysdeps/riscv/rvv/memset.S

diff --git a/sysdeps/riscv/rvv/memchr.S b/sysdeps/riscv/rvv/memchr.S
new file mode 100644
index 0000000000..6981a9f8b0
--- /dev/null
+++ b/sysdeps/riscv/rvv/memchr.S
@@ -0,0 +1,63 @@
+/* RVV versions memchr.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jerry Shih <jerry.shih@sifive.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define iResult a0
+
+#define pSrc a0
+#define iValue a1
+#define iNum a2
+
+#define iVL a3
+#define iTemp a4
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+#define vMask v8
+
+ENTRY(memchr)
+
+L(loop):
+    vsetvli zero, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8ff.v vData, (pSrc)
+    /* Find the iValue inside the loaded data.  */
+    vmseq.vx vMask, vData, iValue
+    vfirst.m iTemp, vMask
+
+    /* Skip the loop if we find the matched value.  */
+    bgez iTemp, L(found)
+
+    csrr iVL, vl
+    sub iNum, iNum, iVL
+    add pSrc, pSrc, iVL
+
+    bnez iNum, L(loop)
+
+    li iResult, 0
+    ret
+
+L(found):
+    add iResult, pSrc, iTemp
+    ret
+
+END(memchr)
+libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/riscv/rvv/memcmp.S b/sysdeps/riscv/rvv/memcmp.S
new file mode 100644
index 0000000000..b156ec524c
--- /dev/null
+++ b/sysdeps/riscv/rvv/memcmp.S
@@ -0,0 +1,75 @@
+/* RVV versions memcmp.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jerry Shih <jerry.shih@sifive.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define iResult a0
+
+#define pSrc1 a0
+#define pSrc2 a1
+#define iNum a2
+
+#define iVL a3
+#define iTemp a4
+#define iTemp1 a5
+#define iTemp2 a6
+
+#define ELEM_LMUL_SETTING m8
+#define vData1 v0
+#define vData2 v8
+#define vMask v16
+
+ENTRY(memcmp)
+
+L(loop):
+    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8.v vData1, (pSrc1)
+    vle8.v vData2, (pSrc2)
+
+    vmsne.vv vMask, vData1, vData2
+    sub iNum, iNum, iVL
+    vfirst.m iTemp, vMask
+
+    /* Skip the loop if we find the different value between pSrc1 and pSrc2.  */
+    bgez iTemp, L(found)
+
+    add pSrc1, pSrc1, iVL
+    add pSrc2, pSrc2, iVL
+
+    bnez iNum, L(loop)
+
+    li iResult, 0
+    ret
+
+L(found):
+    add pSrc1, pSrc1, iTemp
+    add pSrc2, pSrc2, iTemp
+    lbu iTemp1, 0(pSrc1)
+    lbu iTemp2, 0(pSrc2)
+    sub iResult, iTemp1, iTemp2
+    ret
+
+END(memcmp)
+libc_hidden_builtin_def (memcmp)
+weak_alias (memcmp,bcmp)
+strong_alias (memcmp, __memcmpeq)
+libc_hidden_def (__memcmpeq)
+
diff --git a/sysdeps/riscv/rvv/memcpy.S b/sysdeps/riscv/rvv/memcpy.S
new file mode 100644
index 0000000000..de790fbe51
--- /dev/null
+++ b/sysdeps/riscv/rvv/memcpy.S
@@ -0,0 +1,51 @@
+/* RVV versions memcpy.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jerry Shih <jerry.shih@sifive.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define pDst a0
+#define pSrc a1
+#define iNum a2
+
+#define iVL a3
+#define pDstPtr a4
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+
+ENTRY(memcpy)
+
+    mv pDstPtr, pDst
+
+L(loop):
+    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8.v vData, (pSrc)
+    sub iNum, iNum, iVL
+    add pSrc, pSrc, iVL
+    vse8.v vData, (pDstPtr)
+    add pDstPtr, pDstPtr, iVL
+
+    bnez iNum, L(loop)
+
+    ret
+
+END(memcpy)
+libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/riscv/rvv/memmove.S b/sysdeps/riscv/rvv/memmove.S
new file mode 100644
index 0000000000..ed12744064
--- /dev/null
+++ b/sysdeps/riscv/rvv/memmove.S
@@ -0,0 +1,72 @@
+/* RVV versions memmove.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jerry Shih <jerry.shih@sifive.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define pDst a0
+#define pSrc a1
+#define iNum a2
+
+#define iVL a3
+#define pDstPtr a4
+#define pSrcBackwardPtr a5
+#define pDstBackwardPtr a6
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+
+ENTRY(memmove)
+
+    mv pDstPtr, pDst
+
+    /* If pSrc is equal or after pDst, all data in pSrc will be loaded before
+       overwrited for the overlapping case. We could use faster `forward-copy`.  */
+    bgeu pSrc, pDst, L(forward_copy_loop)
+    add pSrcBackwardPtr, pSrc, iNum
+    add pDstBackwardPtr, pDst, iNum
+    /* If pDst inside source data range, we need to use `backward_copy_loop` to
+       handle the overlapping issue.  */
+    bltu pDst, pSrcBackwardPtr, L(backward_copy_loop)
+
+L(forward_copy_loop):
+    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8.v vData, (pSrc)
+    sub iNum, iNum, iVL
+    add pSrc, pSrc, iVL
+    vse8.v vData, (pDstPtr)
+    add pDstPtr, pDstPtr, iVL
+
+    bnez iNum, L(forward_copy_loop)
+    ret
+
+L(backward_copy_loop):
+    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+
+    sub pSrcBackwardPtr, pSrcBackwardPtr, iVL
+    vle8.v vData, (pSrcBackwardPtr)
+    sub iNum, iNum, iVL
+    sub pDstBackwardPtr, pDstBackwardPtr, iVL
+    vse8.v vData, (pDstBackwardPtr)
+    bnez iNum, L(backward_copy_loop)
+    ret
+
+END(memmove)
+libc_hidden_builtin_def (memmove)
diff --git a/sysdeps/riscv/rvv/memset.S b/sysdeps/riscv/rvv/memset.S
new file mode 100644
index 0000000000..3a6c3d0afd
--- /dev/null
+++ b/sysdeps/riscv/rvv/memset.S
@@ -0,0 +1,51 @@
+/* RVV versions memset.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jerry Shih <jerry.shih@sifive.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define pDst a0
+#define iValue a1
+#define iNum a2
+
+#define iVL a3
+#define iTemp a4
+#define pDstPtr a5
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+
+ENTRY(memset)
+
+    mv pDstPtr, pDst
+
+    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+    vmv.v.x vData, iValue
+
+L(loop):
+    vse8.v vData, (pDstPtr)
+    sub iNum, iNum, iVL
+    add pDstPtr, pDstPtr, iVL
+    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+    bnez iNum, L(loop)
+
+    ret
+
+END(memset)
+libc_hidden_builtin_def (memset)
-- 
2.37.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-04-21 12:12 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-21  7:29 [PATCH v2 2/5] riscv: vectorized mem* functions Hau Hsu
2023-04-21  7:54 [PATCH v2 0/5] riscv: Vectorized mem*/str* function Hau Hsu
2023-04-21  7:54 ` [PATCH v2 2/5] riscv: vectorized mem* functions Hau Hsu
2023-04-21 12:12   ` Adhemerval Zanella Netto

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).