From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2155) id C21F03857363; Mon, 8 Aug 2022 16:16:55 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org C21F03857363 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: Corinna Vinschen To: cygwin-cvs@sourceware.org Subject: [newlib-cygwin] Cygwin: split out x86_64 memset/memcpy functions X-Act-Checkin: newlib-cygwin X-Git-Author: Corinna Vinschen X-Git-Refname: refs/heads/master X-Git-Oldrev: 4d6c88e03095e02537d99e90c4babd1a534a258b X-Git-Newrev: 3e13d935545f31cbd1f4871b84f5d70928c56f42 Message-Id: <20220808161655.C21F03857363@sourceware.org> Date: Mon, 8 Aug 2022 16:16:55 +0000 (GMT) X-BeenThere: cygwin-cvs@cygwin.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Cygwin core component git logs List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 08 Aug 2022 16:16:55 -0000 https://sourceware.org/git/gitweb.cgi?p=3Dnewlib-cygwin.git;h=3D3e13d935545= f31cbd1f4871b84f5d70928c56f42 commit 3e13d935545f31cbd1f4871b84f5d70928c56f42 Author: Corinna Vinschen Date: Mon Aug 8 16:25:41 2022 +0200 Cygwin: split out x86_64 memset/memcpy functions =20 move the assembler memset and memcpy functions into their own assembler files. =20 Signed-off-by: Corinna Vinschen Diff: --- winsup/cygwin/Makefile.am | 7 ++ winsup/cygwin/miscfuncs.cc | 207 --------------------------------------= ---- winsup/cygwin/x86_64/memcpy.s | 131 ++++++++++++++++++++++++++ winsup/cygwin/x86_64/memset.s | 69 ++++++++++++++ 4 files changed, 207 insertions(+), 207 deletions(-) diff --git a/winsup/cygwin/Makefile.am b/winsup/cygwin/Makefile.am index c7d7a7716..6838be99a 100644 --- a/winsup/cygwin/Makefile.am +++ b/winsup/cygwin/Makefile.am @@ -53,6 +53,12 @@ TEST_LIB_NAME=3Dlibcygwin0.a # =20 # These objects are included directly into the import library +if TARGET_X86_64 +TARGET_FILES=3D \ + x86_64/memcpy.s \ + x86_64/memset.s +endif + LIB_FILES=3D \ lib/_cygwin_crt0_common.cc \ lib/atexit.c \ @@ -361,6 +367,7 @@ liblib_a_SOURCES=3D \ $(LIB_FILES) =20 libdll_a_SOURCES=3D \ + $(TARGET_FILES) \ $(DLL_FILES) \ $(REGEX_FILES) \ $(MALLOC_FILES) \ diff --git a/winsup/cygwin/miscfuncs.cc b/winsup/cygwin/miscfuncs.cc index bc362e331..1513bc04e 100644 --- a/winsup/cygwin/miscfuncs.cc +++ b/winsup/cygwin/miscfuncs.cc @@ -705,213 +705,6 @@ err: return thread; } =20 -#ifdef __x86_64__ -/* These functions are almost verbatim FreeBSD code (even if the header of - one file mentiones NetBSD), just wrapped in the minimum required code to - make them work with the MS AMD64 ABI. - See FreeBSD src/lib/libc/amd64/string/memset.S - and FreeBSD src/lib/libc/amd64/string/bcopy.S */ - -asm (" \n\ -/* \n\ - * Written by J.T. Conklin . \n\ - * Public domain. \n\ - * Adapted for NetBSD/x86_64 by \n\ - * Frank van der Linden \n\ - */ \n\ - \n\ - .globl memset \n\ - .seh_proc memset \n\ -memset: \n\ - movq %rsi,8(%rsp) \n\ - movq %rdi,16(%rsp) \n\ - .seh_endprologue \n\ - movq %rcx,%rdi \n\ - movq %rdx,%rsi \n\ - movq %r8,%rdx \n\ - \n\ - movq %rsi,%rax \n\ - andq $0xff,%rax \n\ - movq %rdx,%rcx \n\ - movq %rdi,%r11 \n\ - \n\ - cld /* set fill direction forward */ \n\ - \n\ - /* if the string is too short, it's really not worth the \n\ - * overhead of aligning to word boundries, etc. So we jump to \n\ - * a plain unaligned set. */ \n\ - cmpq $0x0f,%rcx \n\ - jle L1 \n\ - \n\ - movb %al,%ah /* copy char to all bytes in word */\n\ - movl %eax,%edx \n\ - sall $16,%eax \n\ - orl %edx,%eax \n\ - \n\ - movl %eax,%edx \n\ - salq $32,%rax \n\ - orq %rdx,%rax \n\ - \n\ - movq %rdi,%rdx /* compute misalignment */ \n\ - negq %rdx \n\ - andq $7,%rdx \n\ - movq %rcx,%r8 \n\ - subq %rdx,%r8 \n\ - \n\ - movq %rdx,%rcx /* set until word aligned */ \n\ - rep \n\ - stosb \n\ - \n\ - movq %r8,%rcx \n\ - shrq $3,%rcx /* set by words */ \n\ - rep \n\ - stosq \n\ - \n\ - movq %r8,%rcx /* set remainder by bytes */ \n\ - andq $7,%rcx \n\ -L1: rep \n\ - stosb \n\ - movq %r11,%rax \n\ - \n\ - movq 8(%rsp),%rsi \n\ - movq 16(%rsp),%rdi \n\ - ret \n\ - .seh_endproc \n\ -"); - -asm (" \n\ -/*- \n\ - * Copyright (c) 1990 The Regents of the University of California. \n\ - * All rights reserved. \n\ - * \n\ - * This code is derived from locore.s. \n\ - * \n\ - * Redistribution and use in source and binary forms, with or without \n\ - * modification, are permitted provided that the following conditions \n\ - * are met: \n\ - * 1. Redistributions of source code must retain the above copyright \n\ - * notice, this list of conditions and the following disclaimer. \n\ - * 2. Redistributions in binary form must reproduce the above copyright \n\ - * notice, this list of conditions and the following disclaimer in \n\ - * the documentation and/or other materials provided with the \n\ - * distribution. \n\ - * 3. Neither the name of the University nor the names of its \n\ - * contributors may be used to endorse or promote products derived \n\ - * from this software without specific prior written permission. \n\ - * \n\ - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' \n\ - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,\n\ - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A \n\ - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR \n\ - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\n\ - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, \n\ - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR \n\ - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY \n\ - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT \n\ - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE \n\ - * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH \n\ - * DAMAGE. \n\ - */ \n\ - \n\ - .seh_proc _memcpy \n\ -_memcpy: \n\ - movq %rsi,8(%rsp) \n\ - movq %rdi,16(%rsp) \n\ - .seh_endprologue \n\ - movq %rcx,%rdi \n\ - movq %rdx,%rsi \n\ - movq %r8,%rdx \n\ - \n\ - movq %rdx,%rcx \n\ - movq %rdi,%r8 \n\ - subq %rsi,%r8 \n\ - cmpq %rcx,%r8 /* overlapping? */ \n\ - jb 1f \n\ - cld /* nope, copy forwards. */ \n\ - shrq $3,%rcx /* copy by words */ \n\ - rep movsq \n\ - movq %rdx,%rcx \n\ - andq $7,%rcx /* any bytes left? */ \n\ - rep movsb \n\ - jmp 2f \n\ -1: \n\ - addq %rcx,%rdi /* copy backwards. */ \n\ - addq %rcx,%rsi \n\ - std \n\ - andq $7,%rcx /* any fractional bytes? */ \n\ - decq %rdi \n\ - decq %rsi \n\ - rep movsb \n\ - movq %rdx,%rcx /* copy remainder by words */ \n\ - shrq $3,%rcx \n\ - subq $7,%rsi \n\ - subq $7,%rdi \n\ - rep movsq \n\ - cld \n\ -2: \n\ - movq 8(%rsp),%rsi \n\ - movq 16(%rsp),%rdi \n\ - ret \n\ - .seh_endproc \n\ - \n\ - .globl memmove \n\ - .seh_proc memmove \n\ -memmove: \n\ - .seh_endprologue \n\ - movq %rcx,%rax /* return dst */ \n\ - jmp _memcpy \n\ - .seh_endproc \n\ - \n\ - .globl memcpy \n\ - .seh_proc memcpy \n\ -memcpy: \n\ - .seh_endprologue \n\ - movq %rcx,%rax /* return dst */ \n\ - jmp _memcpy \n\ - .seh_endproc \n\ - \n\ - .globl mempcpy \n\ - .seh_proc mempcpy \n\ -mempcpy: \n\ - .seh_endprologue \n\ - movq %rcx,%rax /* return dst */ \n\ - addq %r8,%rax /* + n */ \n\ - jmp _memcpy \n\ - .seh_endproc \n\ - \n\ - .globl wmemmove \n\ - .seh_proc wmemmove \n\ -wmemmove: \n\ - .seh_endprologue \n\ - shlq $1,%r8 /* cnt * sizeof (wchar_t) */ \n\ - movq %rcx,%rax /* return dst */ \n\ - jmp _memcpy \n\ - .seh_endproc \n\ - \n\ - .globl wmemcpy \n\ - .seh_proc wmemcpy \n\ -wmemcpy: \n\ - .seh_endprologue \n\ - shlq $1,%r8 /* cnt * sizeof (wchar_t) */ \n\ - movq %rcx,%rax /* return dst */ \n\ - jmp _memcpy \n\ - .seh_endproc \n\ - \n\ - .globl wmempcpy \n\ - .seh_proc wmempcpy \n\ -wmempcpy: \n\ - .seh_endprologue \n\ - shlq $1,%r8 /* cnt * sizeof (wchar_t) */ \n\ - movq %rcx,%rax /* return dst */ \n\ - addq %r8,%rax /* + n */ \n\ - jmp _memcpy \n\ - .seh_endproc \n\ -"); - -#else -#error unimplemented for this target -#endif - /* Signal the thread name to any attached debugger =20 (See "How to: Set a Thread Name in Native Code" diff --git a/winsup/cygwin/x86_64/memcpy.s b/winsup/cygwin/x86_64/memcpy.s new file mode 100644 index 000000000..4be7a0145 --- /dev/null +++ b/winsup/cygwin/x86_64/memcpy.s @@ -0,0 +1,131 @@ +/* These functions are almost verbatim FreeBSD code (even if the header of + one file mentiones NetBSD), just wrapped in the minimum required code to + make them work under the MS AMD64 ABI. + See FreeBSD src/lib/libc/amd64/string/bcopy.S */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from locore.s. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + + .seh_proc _memcpy +_memcpy: + movq %rsi,8(%rsp) + movq %rdi,16(%rsp) + .seh_endprologue + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + + movq %rdx,%rcx + movq %rdi,%r8 + subq %rsi,%r8 + cmpq %rcx,%r8 /* overlapping? */ + jb 1f + cld /* nope, copy forwards. */ + shrq $3,%rcx /* copy by words */ + rep movsq + movq %rdx,%rcx + andq $7,%rcx /* any bytes left? */ + rep movsb + jmp 2f +1: + addq %rcx,%rdi /* copy backwards. */ + addq %rcx,%rsi + std + andq $7,%rcx /* any fractional bytes? */ + decq %rdi + decq %rsi + rep movsb + movq %rdx,%rcx /* copy remainder by words */ + shrq $3,%rcx + subq $7,%rsi + subq $7,%rdi + rep movsq + cld +2: + movq 8(%rsp),%rsi + movq 16(%rsp),%rdi + ret + .seh_endproc + + .globl memmove + .seh_proc memmove +memmove: + .seh_endprologue + movq %rcx,%rax /* return dst */ + jmp _memcpy + .seh_endproc + + .globl memcpy + .seh_proc memcpy +memcpy: + .seh_endprologue + movq %rcx,%rax /* return dst */ + jmp _memcpy + .seh_endproc + + .globl mempcpy + .seh_proc mempcpy +mempcpy: + .seh_endprologue + movq %rcx,%rax /* return dst */ + addq %r8,%rax /* + n */ + jmp _memcpy + .seh_endproc + + .globl wmemmove + .seh_proc wmemmove +wmemmove: + .seh_endprologue + shlq $1,%r8 /* cnt * sizeof (wchar_t) */ + movq %rcx,%rax /* return dst */ + jmp _memcpy + .seh_endproc + + .globl wmemcpy + .seh_proc wmemcpy +wmemcpy: + .seh_endprologue + shlq $1,%r8 /* cnt * sizeof (wchar_t) */ + movq %rcx,%rax /* return dst */ + jmp _memcpy + .seh_endproc + + .globl wmempcpy + .seh_proc wmempcpy +wmempcpy: + .seh_endprologue + shlq $1,%r8 /* cnt * sizeof (wchar_t) */ + movq %rcx,%rax /* return dst */ + addq %r8,%rax /* + n */ + jmp _memcpy + .seh_endproc diff --git a/winsup/cygwin/x86_64/memset.s b/winsup/cygwin/x86_64/memset.s new file mode 100644 index 000000000..ac73b6ace --- /dev/null +++ b/winsup/cygwin/x86_64/memset.s @@ -0,0 +1,69 @@ +/* These functions are almost verbatim FreeBSD code (even if the header of + one file mentiones NetBSD), just wrapped in the minimum required code to + make them work under the MS AMD64 ABI. + See FreeBSD src/lib/libc/amd64/string/memset.S */ + +/* + * Written by J.T. Conklin . + * Public domain. + * Adapted for NetBSD/x86_64 by + * Frank van der Linden + */ + + .globl memset + .seh_proc memset +memset: + movq %rsi,8(%rsp) + movq %rdi,16(%rsp) + .seh_endprologue + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + + movq %rsi,%rax + andq $0xff,%rax + movq %rdx,%rcx + movq %rdi,%r11 + + cld /* set fill direction forward */ + + /* if the string is too short, it's really not worth the + * overhead of aligning to word boundries, etc. So we jump to + * a plain unaligned set. */ + cmpq $0x0f,%rcx + jle L1 + + movb %al,%ah /* copy char to all bytes in word */ + movl %eax,%edx + sall $16,%eax + orl %edx,%eax + + movl %eax,%edx + salq $32,%rax + orq %rdx,%rax + + movq %rdi,%rdx /* compute misalignment */ + negq %rdx + andq $7,%rdx + movq %rcx,%r8 + subq %rdx,%r8 + + movq %rdx,%rcx /* set until word aligned */ + rep + stosb + + movq %r8,%rcx + shrq $3,%rcx /* set by words */ + rep + stosq + + movq %r8,%rcx /* set remainder by bytes */ + andq $7,%rcx +L1: rep + stosb + movq %r11,%rax + + movq 8(%rsp),%rsi + movq 16(%rsp),%rdi + ret + .seh_endproc