public inbox for cygwin-cvs@sourceware.org
help / color / mirror / Atom feed
* [newlib-cygwin] Cygwin: x86_64: import latest NetBSD bcopy.S
@ 2022-12-20 21:54 Corinna Vinschen
  0 siblings, 0 replies; only message in thread
From: Corinna Vinschen @ 2022-12-20 21:54 UTC (permalink / raw)
  To: cygwin-cvs

https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=43743ed754727a0ab5bbe9b15068d3256791f011

commit 43743ed754727a0ab5bbe9b15068d3256791f011
Author:     Corinna Vinschen <corinna@vinschen.de>
AuthorDate: Tue Dec 20 10:13:38 2022 +0100
Commit:     Corinna Vinschen <corinna@vinschen.de>
CommitDate: Tue Dec 20 10:13:59 2022 +0100

    Cygwin: x86_64: import latest NetBSD bcopy.S
    
    Tweak slightly to allow implementing entire {w}mem{p}{cpy,move}
    family:
    
    Add WIDE macro processing for wmem* and POST macro processing for
    memp* functions.
    
    Signed-off-by: Corinna Vinschen <corinna@vinschen.de>

Diff:
---
 winsup/cygwin/Makefile.am       |   8 +-
 winsup/cygwin/x86_64/bcopy.S    | 192 ++++++++++++++++++++++++++++++++++++++++
 winsup/cygwin/x86_64/memcpy.S   | 133 +---------------------------
 winsup/cygwin/x86_64/memmove.S  |   4 +
 winsup/cygwin/x86_64/mempcpy.S  |   5 ++
 winsup/cygwin/x86_64/wmemcpy.S  |   5 ++
 winsup/cygwin/x86_64/wmemmove.S |   5 ++
 winsup/cygwin/x86_64/wmempcpy.S |   6 ++
 8 files changed, 227 insertions(+), 131 deletions(-)

diff --git a/winsup/cygwin/Makefile.am b/winsup/cygwin/Makefile.am
index f63e8959141b..f8c249f527e7 100644
--- a/winsup/cygwin/Makefile.am
+++ b/winsup/cygwin/Makefile.am
@@ -52,9 +52,15 @@ TEST_LIB_NAME=libcygwin0.a
 # These objects are included directly into the import library
 if TARGET_X86_64
 TARGET_FILES= \
+	x86_64/bcopy.S \
 	x86_64/memchr.S \
 	x86_64/memcpy.S \
-	x86_64/memset.S
+	x86_64/memmove.S \
+	x86_64/mempcpy.S \
+	x86_64/memset.S \
+	x86_64/wmemcpy.S \
+	x86_64/wmemmove.S \
+	x86_64/wmempcpy.S
 endif
 
 LIB_FILES= \
diff --git a/winsup/cygwin/x86_64/bcopy.S b/winsup/cygwin/x86_64/bcopy.S
new file mode 100644
index 000000000000..84dba1223e25
--- /dev/null
+++ b/winsup/cygwin/x86_64/bcopy.S
@@ -0,0 +1,192 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from locore.s.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+
+#if defined(LIBC_SCCS)
+	RCSID("$NetBSD: bcopy.S,v 1.5 2014/03/22 19:16:34 jakllsch Exp $")
+#endif
+
+	/*
+	 * (ov)bcopy (src,dst,cnt)
+	 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
+	 *
+	 * Hacked about by dsl@netbsd.org
+	 */
+
+#ifdef MEMCOPY
+#ifdef WIDE
+#ifdef POST
+ENTRY3(wmempcpy)
+#else
+ENTRY3(wmemcpy)
+#endif
+#else
+#ifdef POST
+ENTRY3(mempcpy)
+#else
+ENTRY3(memcpy)
+#endif
+#endif
+#define NO_OVERLAP
+#else
+#ifdef MEMMOVE
+#ifdef WIDE
+ENTRY3(wmemmove)
+#else
+ENTRY3(memmove)
+#endif
+#else
+ENTRY3(bcopy)
+#endif
+#endif
+#ifdef WIDE
+	shlq	$1,%rdx		/* cnt * sizeof (wchar_t) */
+#endif
+	movq	%rdx,%rcx
+#if defined(MEMCOPY) || defined(MEMMOVE)
+	movq	%rdi,%rax	/* must return destination address */
+#ifdef POST
+	addq	%rdx,%rax	/* + n */
+#endif
+	mov	%rdi,%r11	/* for misaligned check */
+#else
+	mov	%rsi,%r11	/* for misaligned check */
+	xchgq	%rdi,%rsi	/* bcopy() has arg order reversed */
+#endif
+
+#if !defined(NO_OVERLAP)
+	movq	%rdi,%r8
+	subq	%rsi,%r8
+#endif
+
+	shrq	$3,%rcx		/* count for copy by words */
+	jz	8f		/* j if less than 8 bytes */
+
+	lea	-8(%rdi,%rdx),%r9	/* target address of last 8 */
+	mov	-8(%rsi,%rdx),%r10	/* get last word */
+#if !defined(NO_OVERLAP)
+	cmpq	%rdx,%r8	/* overlapping? */
+	jb	10f
+#endif
+
+/*
+ * Non-overlaping, copy forwards.
+ * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
+ * if %ecx is more than 76.
+ * AMD might do something similar some day.
+ */
+	and	$7,%r11		/* destination misaligned ? */
+	jnz	2f
+	rep
+	movsq
+	mov	%r10,(%r9)	/* write last word */
+	ret
+
+/*
+ * Destination misaligned
+ * AMD say it is better to align the destination (not the source).
+ * This will also re-align copies if the source and dest are both
+ * misaligned by the same amount)
+ * (I think Nehalem will use its accelerated copy if the source
+ * and destination have the same alignment.)
+ */
+2:
+	lea	-9(%r11,%rdx),%rcx	/* post re-alignment count */
+	neg	%r11			/* now -1 .. -7 */
+	mov	(%rsi),%rdx		/* get first word */
+	mov	%rdi,%r8		/* target for first word */
+	lea	8(%rsi,%r11),%rsi
+	lea	8(%rdi,%r11),%rdi
+	shr	$3,%rcx
+	rep
+	movsq
+	mov	%rdx,(%r8)		/* write first word */
+	mov	%r10,(%r9)		/* write last word */
+	ret
+
+#if !defined(NO_OVERLAP)
+/* Must copy backwards.
+ * Reverse copy is probably easy to code faster than 'rep movds'
+ * since that requires (IIRC) an extra clock every 3 iterations (AMD).
+ * However I don't suppose anything cares that much!
+ * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
+ * The copy is aligned with the buffer start (more likely to
+ * be a multiple of 8 than the end).
+ */
+10:
+	lea	-8(%rsi,%rcx,8),%rsi
+	lea	-8(%rdi,%rcx,8),%rdi
+	std
+	rep
+	movsq
+	cld
+	mov	%r10,(%r9)	/* write last bytes */
+	ret
+#endif
+
+/* Less than 8 bytes to copy, copy by bytes */
+/* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
+ * For longer transfers it is 50+ !
+ */
+8:	mov	%rdx,%rcx
+
+#if !defined(NO_OVERLAP)
+	cmpq	%rdx,%r8	/* overlapping? */
+	jb	81f
+#endif
+
+	/* nope, copy forwards. */
+	rep
+	movsb
+	ret
+
+#if !defined(NO_OVERLAP)
+/* Must copy backwards */
+81:
+	lea	-1(%rsi,%rcx),%rsi
+	lea	-1(%rdi,%rcx),%rdi
+	std
+	rep
+	movsb
+	cld
+	ret
+#endif
+
+#ifdef MEMCOPY
+END(memcpy)
+#else
+#ifdef MEMMOVE
+END(memmove)
+#else
+END(bcopy)
+#endif
+#endif
diff --git a/winsup/cygwin/x86_64/memcpy.S b/winsup/cygwin/x86_64/memcpy.S
index 4be7a01459f9..a53243b5ff26 100644
--- a/winsup/cygwin/x86_64/memcpy.S
+++ b/winsup/cygwin/x86_64/memcpy.S
@@ -1,131 +1,4 @@
-/* These functions are almost verbatim FreeBSD code (even if the header of
-   one file mentiones NetBSD), just wrapped in the minimum required code to
-   make them work under the MS AMD64 ABI.
-   See FreeBSD src/lib/libc/amd64/string/bcopy.S */
+/*	$NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $	*/
 
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from locore.s.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- * 3. Neither the name of the University nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- */
-
-	.seh_proc _memcpy
-_memcpy:
-	movq	%rsi,8(%rsp)
-	movq	%rdi,16(%rsp)
-	.seh_endprologue
-	movq	%rcx,%rdi
-	movq	%rdx,%rsi
-	movq	%r8,%rdx
-
-	movq    %rdx,%rcx
-	movq    %rdi,%r8
-	subq    %rsi,%r8
-	cmpq    %rcx,%r8	/* overlapping? */
-	jb      1f
-	cld                     /* nope, copy forwards. */
-	shrq    $3,%rcx		/* copy by words */
-	rep movsq
-	movq    %rdx,%rcx
-	andq    $7,%rcx		/* any bytes left? */
-	rep movsb
-	jmp	2f
-1:
-	addq    %rcx,%rdi	/* copy backwards. */
-	addq    %rcx,%rsi
-	std
-	andq    $7,%rcx		/* any fractional bytes? */
-	decq    %rdi
-	decq    %rsi
-	rep movsb
-	movq    %rdx,%rcx	/* copy remainder by words */
-	shrq    $3,%rcx
-	subq    $7,%rsi
-	subq    $7,%rdi
-	rep movsq
-	cld
-2:
-	movq	8(%rsp),%rsi
-	movq	16(%rsp),%rdi
-	ret
-	.seh_endproc
-
-	.globl  memmove
-	.seh_proc memmove
-memmove:
-	.seh_endprologue
-	movq	%rcx,%rax	/* return dst */
-	jmp	_memcpy
-	.seh_endproc
-
-	.globl  memcpy
-	.seh_proc memcpy
-memcpy:
-	.seh_endprologue
-	movq	%rcx,%rax	/* return dst */
-	jmp	_memcpy
-	.seh_endproc
-
-	.globl  mempcpy
-	.seh_proc mempcpy
-mempcpy:
-	.seh_endprologue
-	movq	%rcx,%rax	/* return dst  */
-	addq	%r8,%rax	/*         + n */
-	jmp	_memcpy
-	.seh_endproc
-
-	.globl  wmemmove
-	.seh_proc wmemmove
-wmemmove:
-	.seh_endprologue
-	shlq	$1,%r8		/* cnt * sizeof (wchar_t) */
-	movq	%rcx,%rax	/* return dst */
-	jmp	_memcpy
-	.seh_endproc
-
-	.globl  wmemcpy
-	.seh_proc wmemcpy
-wmemcpy:
-	.seh_endprologue
-	shlq	$1,%r8		/* cnt * sizeof (wchar_t) */
-	movq	%rcx,%rax	/* return dst */
-	jmp	_memcpy
-	.seh_endproc
-
-	.globl  wmempcpy
-	.seh_proc wmempcpy
-wmempcpy:
-	.seh_endprologue
-	shlq	$1,%r8		/* cnt * sizeof (wchar_t) */
-	movq	%rcx,%rax	/* return dst */
-	addq	%r8,%rax	/*         + n */
-	jmp	_memcpy
-	.seh_endproc
+#define MEMCOPY
+#include "bcopy.S"
diff --git a/winsup/cygwin/x86_64/memmove.S b/winsup/cygwin/x86_64/memmove.S
new file mode 100644
index 000000000000..f4b7b08257fa
--- /dev/null
+++ b/winsup/cygwin/x86_64/memmove.S
@@ -0,0 +1,4 @@
+/*	$NetBSD: memmove.S,v 1.1 2005/12/20 19:28:51 christos Exp $	*/
+
+#define MEMMOVE
+#include "bcopy.S"
diff --git a/winsup/cygwin/x86_64/mempcpy.S b/winsup/cygwin/x86_64/mempcpy.S
new file mode 100644
index 000000000000..7ccb4f1d9c12
--- /dev/null
+++ b/winsup/cygwin/x86_64/mempcpy.S
@@ -0,0 +1,5 @@
+/*	$NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $	*/
+
+#define MEMCOPY
+#define POST
+#include "bcopy.S"
diff --git a/winsup/cygwin/x86_64/wmemcpy.S b/winsup/cygwin/x86_64/wmemcpy.S
new file mode 100644
index 000000000000..c998ecac58b5
--- /dev/null
+++ b/winsup/cygwin/x86_64/wmemcpy.S
@@ -0,0 +1,5 @@
+/*	$NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $	*/
+
+#define MEMCOPY
+#define WIDE
+#include "bcopy.S"
diff --git a/winsup/cygwin/x86_64/wmemmove.S b/winsup/cygwin/x86_64/wmemmove.S
new file mode 100644
index 000000000000..e7ee8efeb693
--- /dev/null
+++ b/winsup/cygwin/x86_64/wmemmove.S
@@ -0,0 +1,5 @@
+/*	$NetBSD: memmove.S,v 1.1 2005/12/20 19:28:51 christos Exp $	*/
+
+#define MEMMOVE
+#define WIDE
+#include "bcopy.S"
diff --git a/winsup/cygwin/x86_64/wmempcpy.S b/winsup/cygwin/x86_64/wmempcpy.S
new file mode 100644
index 000000000000..53f4ceb2e89e
--- /dev/null
+++ b/winsup/cygwin/x86_64/wmempcpy.S
@@ -0,0 +1,6 @@
+/*	$NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $	*/
+
+#define MEMCOPY
+#define WIDE
+#define POST
+#include "bcopy.S"

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-12-20 21:54 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-20 21:54 [newlib-cygwin] Cygwin: x86_64: import latest NetBSD bcopy.S Corinna Vinschen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).