public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
From: Szabolcs Nagy <nsz@sourceware.org>
To: glibc-cvs@sourceware.org
Subject: [glibc/arm/morello/main] aarch64: morello: string: memset
Date: Wed, 23 Nov 2022 14:42:44 +0000 (GMT)	[thread overview]
Message-ID: <20221123144244.3EDBA3852C58@sourceware.org> (raw)

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=8d8d807e96a5b0fdd1147f19d43b0959fa75e8e4

commit 8d8d807e96a5b0fdd1147f19d43b0959fa75e8e4
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Tue Apr 26 08:19:43 2022 +0100

    aarch64: morello: string: memset
    
    memset from arm optimized-routines morello branch.

Diff:
---
 sysdeps/aarch64/morello/memset.S | 154 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 154 insertions(+)

diff --git a/sysdeps/aarch64/morello/memset.S b/sysdeps/aarch64/morello/memset.S
new file mode 100644
index 0000000000..db65050421
--- /dev/null
+++ b/sysdeps/aarch64/morello/memset.S
@@ -0,0 +1,154 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#ifndef MEMSET
+# define MEMSET memset
+#endif
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, Morello, Advanced SIMD, unaligned accesses.
+ *
+ */
+
+#if defined(__CHERI_PURE_CAPABILITY__)
+#define dstin	c0
+#define val	x1
+#define valw	w1
+#define count	x2
+#define dst	c3
+#define xdst	x3
+#define dstend	c4
+#define xdstend	x4
+#define zva_val	x5
+#else
+#define dstin	x0
+#define val	x1
+#define valw	w1
+#define count	x2
+#define dst	x3
+#define xdst	x3
+#define dstend	x4
+#define xdstend	x4
+#define zva_val	x5
+#endif
+
+ENTRY (MEMSET)
+	PTR_ARG (0)
+	SIZE_ARG (2)
+
+	dup	v0.16B, valw
+	add	dstend, dstin, count
+
+	cmp	count, 96
+	b.hi	L(set_long)
+	cmp	count, 16
+	b.hs	L(set_medium)
+	mov	val, v0.D[0]
+
+	/* Set 0..15 bytes.  */
+	tbz	count, 3, 1f
+	str	val, [dstin]
+	str	val, [dstend, -8]
+	ret
+	.p2align 4
+1:	tbz	count, 2, 2f
+	str	valw, [dstin]
+	str	valw, [dstend, -4]
+	ret
+2:	cbz	count, 3f
+	strb	valw, [dstin]
+	tbz	count, 1, 3f
+	strh	valw, [dstend, -2]
+3:	ret
+
+	/* Set 17..96 bytes.  */
+L(set_medium):
+	str	q0, [dstin]
+	tbnz	count, 6, L(set96)
+	str	q0, [dstend, -16]
+	tbz	count, 5, 1f
+	str	q0, [dstin, 16]
+	str	q0, [dstend, -32]
+1:	ret
+
+	.p2align 4
+	/* Set 64..96 bytes.  Write 64 bytes from the start and
+	   32 bytes from the end.  */
+L(set96):
+	str	q0, [dstin, 16]
+	stp	q0, q0, [dstin, 32]
+	stp	q0, q0, [dstend, -32]
+	ret
+
+	.p2align 4
+L(set_long):
+	and	valw, valw, 255
+#if defined(__CHERI_PURE_CAPABILITY__)
+	alignd	dst, dstin, 4
+#else
+	bic	dst, dstin, 15
+#endif
+	str	q0, [dstin]
+	cmp	count, 160
+	ccmp	valw, 0, 0, hs
+	b.ne	L(no_zva)
+
+#ifndef SKIP_ZVA_CHECK
+	mrs	zva_val, dczid_el0
+	and	zva_val, zva_val, 31
+	cmp	zva_val, 4		/* ZVA size is 64 bytes.  */
+	b.ne	L(no_zva)
+#endif
+	str	q0, [dst, 16]
+	stp	q0, q0, [dst, 32]
+#if defined(__CHERI_PURE_CAPABILITY__)
+	alignd	dst, dst, 6
+#else
+	bic	dst, dst, 63
+#endif
+	sub	count, xdstend, xdst	/* Count is now 64 too large.  */
+	sub	count, count, 128	/* Adjust count and bias for loop.  */
+
+	.p2align 4
+L(zva_loop):
+	add	dst, dst, 64
+	dc	zva, dst
+	subs	count, count, 64
+	b.hi	L(zva_loop)
+	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
+	ret
+
+L(no_zva):
+	sub	count, xdstend, xdst	/* Count is 16 too large.  */
+	sub	dst, dst, 16		/* Dst is biased by -32.  */
+	sub	count, count, 64 + 16	/* Adjust count and bias for loop.  */
+L(no_zva_loop):
+	stp	q0, q0, [dst, 32]
+	stp	q0, q0, [dst, 64]!
+	subs	count, count, 64
+	b.hi	L(no_zva_loop)
+	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
+	ret
+
+END (MEMSET)
+libc_hidden_builtin_def (MEMSET)

             reply	other threads:[~2022-11-23 14:42 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-23 14:42 Szabolcs Nagy [this message]
  -- strict thread matches above, loose matches on Subject: below --
2022-10-27 13:52 Szabolcs Nagy
2022-10-26 15:14 Szabolcs Nagy
2022-08-05 19:32 Szabolcs Nagy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221123144244.3EDBA3852C58@sourceware.org \
    --to=nsz@sourceware.org \
    --cc=glibc-cvs@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).