From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2155) id 0CC993858031; Tue, 20 Dec 2022 21:54:57 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 0CC993858031 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1671573297; bh=n86NR4A2IQd9aGQZscX2Z8X/atsfyw7fC+Flwg19mKE=; h=From:To:Subject:Date:From; b=VYZe3/FQK6uetRNeLRHsxviXYlHh6D5i/BTlg8Hl2idf80na+OLc6yeutf73YTi2m MDoHFzijnfmE7l4WNJ4xF+FN+V4UbjJ0mYTs5kKU726Qm3IlwkcARuVZph9EZKrWpx fuj+XVKQ+XQUqAQhRTJXRZGY2k9VZh+jusNbKyeA= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: Corinna Vinschen To: cygwin-cvs@sourceware.org Subject: [newlib-cygwin] Cygwin: x86_64: import latest NetBSD bcopy.S X-Act-Checkin: newlib-cygwin X-Git-Author: Corinna Vinschen X-Git-Refname: refs/heads/master X-Git-Oldrev: b09617a828e8b1a372847d52673171d78b7dede1 X-Git-Newrev: 43743ed754727a0ab5bbe9b15068d3256791f011 Message-Id: <20221220215457.0CC993858031@sourceware.org> Date: Tue, 20 Dec 2022 21:54:57 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=3Dnewlib-cygwin.git;h=3D43743ed7547= 27a0ab5bbe9b15068d3256791f011 commit 43743ed754727a0ab5bbe9b15068d3256791f011 Author: Corinna Vinschen AuthorDate: Tue Dec 20 10:13:38 2022 +0100 Commit: Corinna Vinschen CommitDate: Tue Dec 20 10:13:59 2022 +0100 Cygwin: x86_64: import latest NetBSD bcopy.S =20 Tweak slightly to allow implementing entire {w}mem{p}{cpy,move} family: =20 Add WIDE macro processing for wmem* and POST macro processing for memp* functions. =20 Signed-off-by: Corinna Vinschen Diff: --- winsup/cygwin/Makefile.am | 8 +- winsup/cygwin/x86_64/bcopy.S | 192 ++++++++++++++++++++++++++++++++++++= ++++ winsup/cygwin/x86_64/memcpy.S | 133 +--------------------------- winsup/cygwin/x86_64/memmove.S | 4 + winsup/cygwin/x86_64/mempcpy.S | 5 ++ winsup/cygwin/x86_64/wmemcpy.S | 5 ++ winsup/cygwin/x86_64/wmemmove.S | 5 ++ winsup/cygwin/x86_64/wmempcpy.S | 6 ++ 8 files changed, 227 insertions(+), 131 deletions(-) diff --git a/winsup/cygwin/Makefile.am b/winsup/cygwin/Makefile.am index f63e8959141b..f8c249f527e7 100644 --- a/winsup/cygwin/Makefile.am +++ b/winsup/cygwin/Makefile.am @@ -52,9 +52,15 @@ TEST_LIB_NAME=3Dlibcygwin0.a # These objects are included directly into the import library if TARGET_X86_64 TARGET_FILES=3D \ + x86_64/bcopy.S \ x86_64/memchr.S \ x86_64/memcpy.S \ - x86_64/memset.S + x86_64/memmove.S \ + x86_64/mempcpy.S \ + x86_64/memset.S \ + x86_64/wmemcpy.S \ + x86_64/wmemmove.S \ + x86_64/wmempcpy.S endif =20 LIB_FILES=3D \ diff --git a/winsup/cygwin/x86_64/bcopy.S b/winsup/cygwin/x86_64/bcopy.S new file mode 100644 index 000000000000..84dba1223e25 --- /dev/null +++ b/winsup/cygwin/x86_64/bcopy.S @@ -0,0 +1,192 @@ +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from locore.s. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURP= OSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENT= IAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STR= ICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY W= AY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +#if defined(LIBC_SCCS) + RCSID("$NetBSD: bcopy.S,v 1.5 2014/03/22 19:16:34 jakllsch Exp $") +#endif + + /* + * (ov)bcopy (src,dst,cnt) + * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 + * + * Hacked about by dsl@netbsd.org + */ + +#ifdef MEMCOPY +#ifdef WIDE +#ifdef POST +ENTRY3(wmempcpy) +#else +ENTRY3(wmemcpy) +#endif +#else +#ifdef POST +ENTRY3(mempcpy) +#else +ENTRY3(memcpy) +#endif +#endif +#define NO_OVERLAP +#else +#ifdef MEMMOVE +#ifdef WIDE +ENTRY3(wmemmove) +#else +ENTRY3(memmove) +#endif +#else +ENTRY3(bcopy) +#endif +#endif +#ifdef WIDE + shlq $1,%rdx /* cnt * sizeof (wchar_t) */ +#endif + movq %rdx,%rcx +#if defined(MEMCOPY) || defined(MEMMOVE) + movq %rdi,%rax /* must return destination address */ +#ifdef POST + addq %rdx,%rax /* + n */ +#endif + mov %rdi,%r11 /* for misaligned check */ +#else + mov %rsi,%r11 /* for misaligned check */ + xchgq %rdi,%rsi /* bcopy() has arg order reversed */ +#endif + +#if !defined(NO_OVERLAP) + movq %rdi,%r8 + subq %rsi,%r8 +#endif + + shrq $3,%rcx /* count for copy by words */ + jz 8f /* j if less than 8 bytes */ + + lea -8(%rdi,%rdx),%r9 /* target address of last 8 */ + mov -8(%rsi,%rdx),%r10 /* get last word */ +#if !defined(NO_OVERLAP) + cmpq %rdx,%r8 /* overlapping? */ + jb 10f +#endif + +/* + * Non-overlaping, copy forwards. + * Newer Intel cpus (Nehalem) will do 16byte read/write transfers + * if %ecx is more than 76. + * AMD might do something similar some day. + */ + and $7,%r11 /* destination misaligned ? */ + jnz 2f + rep + movsq + mov %r10,(%r9) /* write last word */ + ret + +/* + * Destination misaligned + * AMD say it is better to align the destination (not the source). + * This will also re-align copies if the source and dest are both + * misaligned by the same amount) + * (I think Nehalem will use its accelerated copy if the source + * and destination have the same alignment.) + */ +2: + lea -9(%r11,%rdx),%rcx /* post re-alignment count */ + neg %r11 /* now -1 .. -7 */ + mov (%rsi),%rdx /* get first word */ + mov %rdi,%r8 /* target for first word */ + lea 8(%rsi,%r11),%rsi + lea 8(%rdi,%r11),%rdi + shr $3,%rcx + rep + movsq + mov %rdx,(%r8) /* write first word */ + mov %r10,(%r9) /* write last word */ + ret + +#if !defined(NO_OVERLAP) +/* Must copy backwards. + * Reverse copy is probably easy to code faster than 'rep movds' + * since that requires (IIRC) an extra clock every 3 iterations (AMD). + * However I don't suppose anything cares that much! + * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4. + * The copy is aligned with the buffer start (more likely to + * be a multiple of 8 than the end). + */ +10: + lea -8(%rsi,%rcx,8),%rsi + lea -8(%rdi,%rcx,8),%rdi + std + rep + movsq + cld + mov %r10,(%r9) /* write last bytes */ + ret +#endif + +/* Less than 8 bytes to copy, copy by bytes */ +/* Intel Nehalem optimise 'rep movsb' for <=3D 7 bytes (9-15 clocks). + * For longer transfers it is 50+ ! + */ +8: mov %rdx,%rcx + +#if !defined(NO_OVERLAP) + cmpq %rdx,%r8 /* overlapping? */ + jb 81f +#endif + + /* nope, copy forwards. */ + rep + movsb + ret + +#if !defined(NO_OVERLAP) +/* Must copy backwards */ +81: + lea -1(%rsi,%rcx),%rsi + lea -1(%rdi,%rcx),%rdi + std + rep + movsb + cld + ret +#endif + +#ifdef MEMCOPY +END(memcpy) +#else +#ifdef MEMMOVE +END(memmove) +#else +END(bcopy) +#endif +#endif diff --git a/winsup/cygwin/x86_64/memcpy.S b/winsup/cygwin/x86_64/memcpy.S index 4be7a01459f9..a53243b5ff26 100644 --- a/winsup/cygwin/x86_64/memcpy.S +++ b/winsup/cygwin/x86_64/memcpy.S @@ -1,131 +1,4 @@ -/* These functions are almost verbatim FreeBSD code (even if the header of - one file mentiones NetBSD), just wrapped in the minimum required code to - make them work under the MS AMD64 ABI. - See FreeBSD src/lib/libc/amd64/string/bcopy.S */ +/* $NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */ =20 -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from locore.s. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. - */ - - .seh_proc _memcpy -_memcpy: - movq %rsi,8(%rsp) - movq %rdi,16(%rsp) - .seh_endprologue - movq %rcx,%rdi - movq %rdx,%rsi - movq %r8,%rdx - - movq %rdx,%rcx - movq %rdi,%r8 - subq %rsi,%r8 - cmpq %rcx,%r8 /* overlapping? */ - jb 1f - cld /* nope, copy forwards. */ - shrq $3,%rcx /* copy by words */ - rep movsq - movq %rdx,%rcx - andq $7,%rcx /* any bytes left? */ - rep movsb - jmp 2f -1: - addq %rcx,%rdi /* copy backwards. */ - addq %rcx,%rsi - std - andq $7,%rcx /* any fractional bytes? */ - decq %rdi - decq %rsi - rep movsb - movq %rdx,%rcx /* copy remainder by words */ - shrq $3,%rcx - subq $7,%rsi - subq $7,%rdi - rep movsq - cld -2: - movq 8(%rsp),%rsi - movq 16(%rsp),%rdi - ret - .seh_endproc - - .globl memmove - .seh_proc memmove -memmove: - .seh_endprologue - movq %rcx,%rax /* return dst */ - jmp _memcpy - .seh_endproc - - .globl memcpy - .seh_proc memcpy -memcpy: - .seh_endprologue - movq %rcx,%rax /* return dst */ - jmp _memcpy - .seh_endproc - - .globl mempcpy - .seh_proc mempcpy -mempcpy: - .seh_endprologue - movq %rcx,%rax /* return dst */ - addq %r8,%rax /* + n */ - jmp _memcpy - .seh_endproc - - .globl wmemmove - .seh_proc wmemmove -wmemmove: - .seh_endprologue - shlq $1,%r8 /* cnt * sizeof (wchar_t) */ - movq %rcx,%rax /* return dst */ - jmp _memcpy - .seh_endproc - - .globl wmemcpy - .seh_proc wmemcpy -wmemcpy: - .seh_endprologue - shlq $1,%r8 /* cnt * sizeof (wchar_t) */ - movq %rcx,%rax /* return dst */ - jmp _memcpy - .seh_endproc - - .globl wmempcpy - .seh_proc wmempcpy -wmempcpy: - .seh_endprologue - shlq $1,%r8 /* cnt * sizeof (wchar_t) */ - movq %rcx,%rax /* return dst */ - addq %r8,%rax /* + n */ - jmp _memcpy - .seh_endproc +#define MEMCOPY +#include "bcopy.S" diff --git a/winsup/cygwin/x86_64/memmove.S b/winsup/cygwin/x86_64/memmove.S new file mode 100644 index 000000000000..f4b7b08257fa --- /dev/null +++ b/winsup/cygwin/x86_64/memmove.S @@ -0,0 +1,4 @@ +/* $NetBSD: memmove.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */ + +#define MEMMOVE +#include "bcopy.S" diff --git a/winsup/cygwin/x86_64/mempcpy.S b/winsup/cygwin/x86_64/mempcpy.S new file mode 100644 index 000000000000..7ccb4f1d9c12 --- /dev/null +++ b/winsup/cygwin/x86_64/mempcpy.S @@ -0,0 +1,5 @@ +/* $NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */ + +#define MEMCOPY +#define POST +#include "bcopy.S" diff --git a/winsup/cygwin/x86_64/wmemcpy.S b/winsup/cygwin/x86_64/wmemcpy.S new file mode 100644 index 000000000000..c998ecac58b5 --- /dev/null +++ b/winsup/cygwin/x86_64/wmemcpy.S @@ -0,0 +1,5 @@ +/* $NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */ + +#define MEMCOPY +#define WIDE +#include "bcopy.S" diff --git a/winsup/cygwin/x86_64/wmemmove.S b/winsup/cygwin/x86_64/wmemmov= e.S new file mode 100644 index 000000000000..e7ee8efeb693 --- /dev/null +++ b/winsup/cygwin/x86_64/wmemmove.S @@ -0,0 +1,5 @@ +/* $NetBSD: memmove.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */ + +#define MEMMOVE +#define WIDE +#include "bcopy.S" diff --git a/winsup/cygwin/x86_64/wmempcpy.S b/winsup/cygwin/x86_64/wmempcp= y.S new file mode 100644 index 000000000000..53f4ceb2e89e --- /dev/null +++ b/winsup/cygwin/x86_64/wmempcpy.S @@ -0,0 +1,6 @@ +/* $NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */ + +#define MEMCOPY +#define WIDE +#define POST +#include "bcopy.S"