public inbox for cygwin-cvs@sourceware.org
help / color / mirror / Atom feed
* [newlib-cygwin] Cygwin: x86_64: import latest NetBSD bcopy.S
@ 2022-12-20 21:54 Corinna Vinschen
0 siblings, 0 replies; only message in thread
From: Corinna Vinschen @ 2022-12-20 21:54 UTC (permalink / raw)
To: cygwin-cvs
https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=43743ed754727a0ab5bbe9b15068d3256791f011
commit 43743ed754727a0ab5bbe9b15068d3256791f011
Author: Corinna Vinschen <corinna@vinschen.de>
AuthorDate: Tue Dec 20 10:13:38 2022 +0100
Commit: Corinna Vinschen <corinna@vinschen.de>
CommitDate: Tue Dec 20 10:13:59 2022 +0100
Cygwin: x86_64: import latest NetBSD bcopy.S
Tweak slightly to allow implementing entire {w}mem{p}{cpy,move}
family:
Add WIDE macro processing for wmem* and POST macro processing for
memp* functions.
Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
Diff:
---
winsup/cygwin/Makefile.am | 8 +-
winsup/cygwin/x86_64/bcopy.S | 192 ++++++++++++++++++++++++++++++++++++++++
winsup/cygwin/x86_64/memcpy.S | 133 +---------------------------
winsup/cygwin/x86_64/memmove.S | 4 +
winsup/cygwin/x86_64/mempcpy.S | 5 ++
winsup/cygwin/x86_64/wmemcpy.S | 5 ++
winsup/cygwin/x86_64/wmemmove.S | 5 ++
winsup/cygwin/x86_64/wmempcpy.S | 6 ++
8 files changed, 227 insertions(+), 131 deletions(-)
diff --git a/winsup/cygwin/Makefile.am b/winsup/cygwin/Makefile.am
index f63e8959141b..f8c249f527e7 100644
--- a/winsup/cygwin/Makefile.am
+++ b/winsup/cygwin/Makefile.am
@@ -52,9 +52,15 @@ TEST_LIB_NAME=libcygwin0.a
# These objects are included directly into the import library
if TARGET_X86_64
TARGET_FILES= \
+ x86_64/bcopy.S \
x86_64/memchr.S \
x86_64/memcpy.S \
- x86_64/memset.S
+ x86_64/memmove.S \
+ x86_64/mempcpy.S \
+ x86_64/memset.S \
+ x86_64/wmemcpy.S \
+ x86_64/wmemmove.S \
+ x86_64/wmempcpy.S
endif
LIB_FILES= \
diff --git a/winsup/cygwin/x86_64/bcopy.S b/winsup/cygwin/x86_64/bcopy.S
new file mode 100644
index 000000000000..84dba1223e25
--- /dev/null
+++ b/winsup/cygwin/x86_64/bcopy.S
@@ -0,0 +1,192 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from locore.s.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+
+#if defined(LIBC_SCCS)
+ RCSID("$NetBSD: bcopy.S,v 1.5 2014/03/22 19:16:34 jakllsch Exp $")
+#endif
+
+ /*
+ * (ov)bcopy (src,dst,cnt)
+ * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
+ *
+ * Hacked about by dsl@netbsd.org
+ */
+
+#ifdef MEMCOPY
+#ifdef WIDE
+#ifdef POST
+ENTRY3(wmempcpy)
+#else
+ENTRY3(wmemcpy)
+#endif
+#else
+#ifdef POST
+ENTRY3(mempcpy)
+#else
+ENTRY3(memcpy)
+#endif
+#endif
+#define NO_OVERLAP
+#else
+#ifdef MEMMOVE
+#ifdef WIDE
+ENTRY3(wmemmove)
+#else
+ENTRY3(memmove)
+#endif
+#else
+ENTRY3(bcopy)
+#endif
+#endif
+#ifdef WIDE
+ shlq $1,%rdx /* cnt * sizeof (wchar_t) */
+#endif
+ movq %rdx,%rcx
+#if defined(MEMCOPY) || defined(MEMMOVE)
+ movq %rdi,%rax /* must return destination address */
+#ifdef POST
+ addq %rdx,%rax /* + n */
+#endif
+ mov %rdi,%r11 /* for misaligned check */
+#else
+ mov %rsi,%r11 /* for misaligned check */
+ xchgq %rdi,%rsi /* bcopy() has arg order reversed */
+#endif
+
+#if !defined(NO_OVERLAP)
+ movq %rdi,%r8
+ subq %rsi,%r8
+#endif
+
+ shrq $3,%rcx /* count for copy by words */
+ jz 8f /* j if less than 8 bytes */
+
+ lea -8(%rdi,%rdx),%r9 /* target address of last 8 */
+ mov -8(%rsi,%rdx),%r10 /* get last word */
+#if !defined(NO_OVERLAP)
+ cmpq %rdx,%r8 /* overlapping? */
+ jb 10f
+#endif
+
+/*
+ * Non-overlaping, copy forwards.
+ * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
+ * if %ecx is more than 76.
+ * AMD might do something similar some day.
+ */
+ and $7,%r11 /* destination misaligned ? */
+ jnz 2f
+ rep
+ movsq
+ mov %r10,(%r9) /* write last word */
+ ret
+
+/*
+ * Destination misaligned
+ * AMD say it is better to align the destination (not the source).
+ * This will also re-align copies if the source and dest are both
+ * misaligned by the same amount)
+ * (I think Nehalem will use its accelerated copy if the source
+ * and destination have the same alignment.)
+ */
+2:
+ lea -9(%r11,%rdx),%rcx /* post re-alignment count */
+ neg %r11 /* now -1 .. -7 */
+ mov (%rsi),%rdx /* get first word */
+ mov %rdi,%r8 /* target for first word */
+ lea 8(%rsi,%r11),%rsi
+ lea 8(%rdi,%r11),%rdi
+ shr $3,%rcx
+ rep
+ movsq
+ mov %rdx,(%r8) /* write first word */
+ mov %r10,(%r9) /* write last word */
+ ret
+
+#if !defined(NO_OVERLAP)
+/* Must copy backwards.
+ * Reverse copy is probably easy to code faster than 'rep movds'
+ * since that requires (IIRC) an extra clock every 3 iterations (AMD).
+ * However I don't suppose anything cares that much!
+ * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
+ * The copy is aligned with the buffer start (more likely to
+ * be a multiple of 8 than the end).
+ */
+10:
+ lea -8(%rsi,%rcx,8),%rsi
+ lea -8(%rdi,%rcx,8),%rdi
+ std
+ rep
+ movsq
+ cld
+ mov %r10,(%r9) /* write last bytes */
+ ret
+#endif
+
+/* Less than 8 bytes to copy, copy by bytes */
+/* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
+ * For longer transfers it is 50+ !
+ */
+8: mov %rdx,%rcx
+
+#if !defined(NO_OVERLAP)
+ cmpq %rdx,%r8 /* overlapping? */
+ jb 81f
+#endif
+
+ /* nope, copy forwards. */
+ rep
+ movsb
+ ret
+
+#if !defined(NO_OVERLAP)
+/* Must copy backwards */
+81:
+ lea -1(%rsi,%rcx),%rsi
+ lea -1(%rdi,%rcx),%rdi
+ std
+ rep
+ movsb
+ cld
+ ret
+#endif
+
+#ifdef MEMCOPY
+END(memcpy)
+#else
+#ifdef MEMMOVE
+END(memmove)
+#else
+END(bcopy)
+#endif
+#endif
diff --git a/winsup/cygwin/x86_64/memcpy.S b/winsup/cygwin/x86_64/memcpy.S
index 4be7a01459f9..a53243b5ff26 100644
--- a/winsup/cygwin/x86_64/memcpy.S
+++ b/winsup/cygwin/x86_64/memcpy.S
@@ -1,131 +1,4 @@
-/* These functions are almost verbatim FreeBSD code (even if the header of
- one file mentiones NetBSD), just wrapped in the minimum required code to
- make them work under the MS AMD64 ABI.
- See FreeBSD src/lib/libc/amd64/string/bcopy.S */
+/* $NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from locore.s.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * 3. Neither the name of the University nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- */
-
- .seh_proc _memcpy
-_memcpy:
- movq %rsi,8(%rsp)
- movq %rdi,16(%rsp)
- .seh_endprologue
- movq %rcx,%rdi
- movq %rdx,%rsi
- movq %r8,%rdx
-
- movq %rdx,%rcx
- movq %rdi,%r8
- subq %rsi,%r8
- cmpq %rcx,%r8 /* overlapping? */
- jb 1f
- cld /* nope, copy forwards. */
- shrq $3,%rcx /* copy by words */
- rep movsq
- movq %rdx,%rcx
- andq $7,%rcx /* any bytes left? */
- rep movsb
- jmp 2f
-1:
- addq %rcx,%rdi /* copy backwards. */
- addq %rcx,%rsi
- std
- andq $7,%rcx /* any fractional bytes? */
- decq %rdi
- decq %rsi
- rep movsb
- movq %rdx,%rcx /* copy remainder by words */
- shrq $3,%rcx
- subq $7,%rsi
- subq $7,%rdi
- rep movsq
- cld
-2:
- movq 8(%rsp),%rsi
- movq 16(%rsp),%rdi
- ret
- .seh_endproc
-
- .globl memmove
- .seh_proc memmove
-memmove:
- .seh_endprologue
- movq %rcx,%rax /* return dst */
- jmp _memcpy
- .seh_endproc
-
- .globl memcpy
- .seh_proc memcpy
-memcpy:
- .seh_endprologue
- movq %rcx,%rax /* return dst */
- jmp _memcpy
- .seh_endproc
-
- .globl mempcpy
- .seh_proc mempcpy
-mempcpy:
- .seh_endprologue
- movq %rcx,%rax /* return dst */
- addq %r8,%rax /* + n */
- jmp _memcpy
- .seh_endproc
-
- .globl wmemmove
- .seh_proc wmemmove
-wmemmove:
- .seh_endprologue
- shlq $1,%r8 /* cnt * sizeof (wchar_t) */
- movq %rcx,%rax /* return dst */
- jmp _memcpy
- .seh_endproc
-
- .globl wmemcpy
- .seh_proc wmemcpy
-wmemcpy:
- .seh_endprologue
- shlq $1,%r8 /* cnt * sizeof (wchar_t) */
- movq %rcx,%rax /* return dst */
- jmp _memcpy
- .seh_endproc
-
- .globl wmempcpy
- .seh_proc wmempcpy
-wmempcpy:
- .seh_endprologue
- shlq $1,%r8 /* cnt * sizeof (wchar_t) */
- movq %rcx,%rax /* return dst */
- addq %r8,%rax /* + n */
- jmp _memcpy
- .seh_endproc
+#define MEMCOPY
+#include "bcopy.S"
diff --git a/winsup/cygwin/x86_64/memmove.S b/winsup/cygwin/x86_64/memmove.S
new file mode 100644
index 000000000000..f4b7b08257fa
--- /dev/null
+++ b/winsup/cygwin/x86_64/memmove.S
@@ -0,0 +1,4 @@
+/* $NetBSD: memmove.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */
+
+#define MEMMOVE
+#include "bcopy.S"
diff --git a/winsup/cygwin/x86_64/mempcpy.S b/winsup/cygwin/x86_64/mempcpy.S
new file mode 100644
index 000000000000..7ccb4f1d9c12
--- /dev/null
+++ b/winsup/cygwin/x86_64/mempcpy.S
@@ -0,0 +1,5 @@
+/* $NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */
+
+#define MEMCOPY
+#define POST
+#include "bcopy.S"
diff --git a/winsup/cygwin/x86_64/wmemcpy.S b/winsup/cygwin/x86_64/wmemcpy.S
new file mode 100644
index 000000000000..c998ecac58b5
--- /dev/null
+++ b/winsup/cygwin/x86_64/wmemcpy.S
@@ -0,0 +1,5 @@
+/* $NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */
+
+#define MEMCOPY
+#define WIDE
+#include "bcopy.S"
diff --git a/winsup/cygwin/x86_64/wmemmove.S b/winsup/cygwin/x86_64/wmemmove.S
new file mode 100644
index 000000000000..e7ee8efeb693
--- /dev/null
+++ b/winsup/cygwin/x86_64/wmemmove.S
@@ -0,0 +1,5 @@
+/* $NetBSD: memmove.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */
+
+#define MEMMOVE
+#define WIDE
+#include "bcopy.S"
diff --git a/winsup/cygwin/x86_64/wmempcpy.S b/winsup/cygwin/x86_64/wmempcpy.S
new file mode 100644
index 000000000000..53f4ceb2e89e
--- /dev/null
+++ b/winsup/cygwin/x86_64/wmempcpy.S
@@ -0,0 +1,6 @@
+/* $NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */
+
+#define MEMCOPY
+#define WIDE
+#define POST
+#include "bcopy.S"
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-12-20 21:54 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-20 21:54 [newlib-cygwin] Cygwin: x86_64: import latest NetBSD bcopy.S Corinna Vinschen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).