From: Joel Sherrill <joel@rtems.org>
To: Sebastian Huber <sebastian.huber@embedded-brains.de>
Cc: newlib@sourceware.org
Subject: Re: [PATCH] aarch64: Remove duplicated optimized memmove()
Date: Fri, 27 Oct 2023 12:15:25 -0500 [thread overview]
Message-ID: <CAF9ehCWBpURSiFDd8tgVdsfeHKxNXZ+9GFyaVvDuPZxUctWgWw@mail.gmail.com> (raw)
In-Reply-To: <20231027165918.69721-1-sebastian.huber@embedded-brains.de>
[-- Attachment #1: Type: text/plain, Size: 15732 bytes --]
This doesn't appear to resolve the issue.
$ aarch64-rtems6-nm -g ~/test-gcc/install-master/aarch64-rtems6/lib/libc.a
| grep "T memmov"
0000000000000000 T memmove
0000000000000000 T memmove
The libc/string/memmove.c is compiled and not overridden by anything in
machine/aarch64. This results in the libc/string and memcpy.S objects both
being in the libc.a with memmove().
I think there has to be an empty file for memmove.[cS] in machine/aarch64
to override the libc/string version.
Alternatively, I think the memmove-stub.c could just be renamed memmove.c
and this should also give the same result.
--joel
On Fri, Oct 27, 2023 at 11:59 AM Sebastian Huber <
sebastian.huber@embedded-brains.de> wrote:
> The optimized aarch64/memcpy.S already provides a memmove() implementation.
> ---
> newlib/Makefile.in | 20 ---
> newlib/libc/machine/aarch64/Makefile.inc | 1 -
> newlib/libc/machine/aarch64/memmove-stub.c | 2 +-
> newlib/libc/machine/aarch64/memmove.S | 155 ---------------------
> 4 files changed, 1 insertion(+), 177 deletions(-)
> delete mode 100644 newlib/libc/machine/aarch64/memmove.S
>
> diff --git a/newlib/Makefile.in b/newlib/Makefile.in
> index 4cb3534cc4..9eb21c7919 100644
> --- a/newlib/Makefile.in
> +++ b/newlib/Makefile.in
> @@ -595,7 +595,6 @@ check_PROGRAMS =
> @HAVE_LIBC_MACHINE_AARCH64_TRUE@
> libc/machine/aarch64/memcpy-stub.c \
> @HAVE_LIBC_MACHINE_AARCH64_TRUE@ libc/machine/aarch64/memcpy.S \
> @HAVE_LIBC_MACHINE_AARCH64_TRUE@
> libc/machine/aarch64/memmove-stub.c \
> -@HAVE_LIBC_MACHINE_AARCH64_TRUE@ libc/machine/aarch64/memmove.S \
> @HAVE_LIBC_MACHINE_AARCH64_TRUE@
> libc/machine/aarch64/memrchr-stub.c \
> @HAVE_LIBC_MACHINE_AARCH64_TRUE@ libc/machine/aarch64/memrchr.S \
> @HAVE_LIBC_MACHINE_AARCH64_TRUE@
> libc/machine/aarch64/memset-stub.c \
> @@ -1848,7 +1847,6 @@ am__objects_51 = libc/ssp/libc_a-chk_fail.$(OBJEXT) \
> @HAVE_LIBC_MACHINE_AARCH64_TRUE@
> libc/machine/aarch64/libc_a-memcpy-stub.$(OBJEXT) \
> @HAVE_LIBC_MACHINE_AARCH64_TRUE@
> libc/machine/aarch64/libc_a-memcpy.$(OBJEXT) \
> @HAVE_LIBC_MACHINE_AARCH64_TRUE@
> libc/machine/aarch64/libc_a-memmove-stub.$(OBJEXT) \
> -@HAVE_LIBC_MACHINE_AARCH64_TRUE@
> libc/machine/aarch64/libc_a-memmove.$(OBJEXT) \
> @HAVE_LIBC_MACHINE_AARCH64_TRUE@
> libc/machine/aarch64/libc_a-memrchr-stub.$(OBJEXT) \
> @HAVE_LIBC_MACHINE_AARCH64_TRUE@
> libc/machine/aarch64/libc_a-memrchr.$(OBJEXT) \
> @HAVE_LIBC_MACHINE_AARCH64_TRUE@
> libc/machine/aarch64/libc_a-memset-stub.$(OBJEXT) \
> @@ -8025,9 +8023,6 @@ libc/machine/aarch64/libc_a-memcpy.$(OBJEXT): \
> libc/machine/aarch64/libc_a-memmove-stub.$(OBJEXT): \
> libc/machine/aarch64/$(am__dirstamp) \
> libc/machine/aarch64/$(DEPDIR)/$(am__dirstamp)
> -libc/machine/aarch64/libc_a-memmove.$(OBJEXT): \
> - libc/machine/aarch64/$(am__dirstamp) \
> - libc/machine/aarch64/$(DEPDIR)/$(am__dirstamp)
> libc/machine/aarch64/libc_a-memrchr-stub.$(OBJEXT): \
> libc/machine/aarch64/$(am__dirstamp) \
> libc/machine/aarch64/$(DEPDIR)/$(am__dirstamp)
> @@ -12739,7 +12734,6 @@ distclean-compile:
> @AMDEP_TRUE@@am__include@ @am__quote@libc
> /machine/aarch64/$(DEPDIR)/libc_a-memcpy-stub.Po@am__quote@
> @AMDEP_TRUE@@am__include@ @am__quote@libc
> /machine/aarch64/$(DEPDIR)/libc_a-memcpy.Po@am__quote@
> @AMDEP_TRUE@@am__include@ @am__quote@libc
> /machine/aarch64/$(DEPDIR)/libc_a-memmove-stub.Po@am__quote@
> -@AMDEP_TRUE@@am__include@ @am__quote@libc
> /machine/aarch64/$(DEPDIR)/libc_a-memmove.Po@am__quote@
> @AMDEP_TRUE@@am__include@ @am__quote@libc
> /machine/aarch64/$(DEPDIR)/libc_a-memrchr-stub.Po@am__quote@
> @AMDEP_TRUE@@am__include@ @am__quote@libc
> /machine/aarch64/$(DEPDIR)/libc_a-memrchr.Po@am__quote@
> @AMDEP_TRUE@@am__include@ @am__quote@libc
> /machine/aarch64/$(DEPDIR)/libc_a-memset-stub.Po@am__quote@
> @@ -16709,20 +16703,6 @@ libc/machine/aarch64/libc_a-memcpy.obj:
> libc/machine/aarch64/memcpy.S
> @AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE)
> $(depcomp) @AMDEPBACKSLASH@
> @am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CCAS) $(DEFS)
> $(DEFAULT_INCLUDES) $(INCLUDES) $(libc_a_CPPFLAGS) $(CPPFLAGS)
> $(libc_a_CCASFLAGS) $(CCASFLAGS) -c -o
> libc/machine/aarch64/libc_a-memcpy.obj `if test -f
> 'libc/machine/aarch64/memcpy.S'; then $(CYGPATH_W)
> 'libc/machine/aarch64/memcpy.S'; else $(CYGPATH_W)
> '$(srcdir)/libc/machine/aarch64/memcpy.S'; fi`
>
> -libc/machine/aarch64/libc_a-memmove.o: libc/machine/aarch64/memmove.S
> -@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CCAS) $(DEFS) $(DEFAULT_INCLUDES)
> $(INCLUDES) $(libc_a_CPPFLAGS) $(CPPFLAGS) $(libc_a_CCASFLAGS) $(CCASFLAGS)
> -MT libc/machine/aarch64/libc_a-memmove.o -MD -MP -MF
> libc/machine/aarch64/$(DEPDIR)/libc_a-memmove.Tpo -c -o
> libc/machine/aarch64/libc_a-memmove.o `test -f
> 'libc/machine/aarch64/memmove.S' || echo
> '$(srcdir)/'`libc/machine/aarch64/memmove.S
> -@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv)
> libc/machine/aarch64/$(DEPDIR)/libc_a-memmove.Tpo
> libc/machine/aarch64/$(DEPDIR)/libc_a-memmove.Po
> -@AMDEP_TRUE@@am__fastdepCCAS_FALSE@
> $(AM_V_CPPAS)source='libc/machine/aarch64/memmove.S'
> object='libc/machine/aarch64/libc_a-memmove.o' libtool=no @AMDEPBACKSLASH@
> -@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE)
> $(depcomp) @AMDEPBACKSLASH@
> -@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CCAS) $(DEFS)
> $(DEFAULT_INCLUDES) $(INCLUDES) $(libc_a_CPPFLAGS) $(CPPFLAGS)
> $(libc_a_CCASFLAGS) $(CCASFLAGS) -c -o
> libc/machine/aarch64/libc_a-memmove.o `test -f
> 'libc/machine/aarch64/memmove.S' || echo
> '$(srcdir)/'`libc/machine/aarch64/memmove.S
> -
> -libc/machine/aarch64/libc_a-memmove.obj: libc/machine/aarch64/memmove.S
> -@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CCAS) $(DEFS) $(DEFAULT_INCLUDES)
> $(INCLUDES) $(libc_a_CPPFLAGS) $(CPPFLAGS) $(libc_a_CCASFLAGS) $(CCASFLAGS)
> -MT libc/machine/aarch64/libc_a-memmove.obj -MD -MP -MF
> libc/machine/aarch64/$(DEPDIR)/libc_a-memmove.Tpo -c -o
> libc/machine/aarch64/libc_a-memmove.obj `if test -f
> 'libc/machine/aarch64/memmove.S'; then $(CYGPATH_W)
> 'libc/machine/aarch64/memmove.S'; else $(CYGPATH_W)
> '$(srcdir)/libc/machine/aarch64/memmove.S'; fi`
> -@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv)
> libc/machine/aarch64/$(DEPDIR)/libc_a-memmove.Tpo
> libc/machine/aarch64/$(DEPDIR)/libc_a-memmove.Po
> -@AMDEP_TRUE@@am__fastdepCCAS_FALSE@
> $(AM_V_CPPAS)source='libc/machine/aarch64/memmove.S'
> object='libc/machine/aarch64/libc_a-memmove.obj' libtool=no @AMDEPBACKSLASH@
> -@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE)
> $(depcomp) @AMDEPBACKSLASH@
> -@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CCAS) $(DEFS)
> $(DEFAULT_INCLUDES) $(INCLUDES) $(libc_a_CPPFLAGS) $(CPPFLAGS)
> $(libc_a_CCASFLAGS) $(CCASFLAGS) -c -o
> libc/machine/aarch64/libc_a-memmove.obj `if test -f
> 'libc/machine/aarch64/memmove.S'; then $(CYGPATH_W)
> 'libc/machine/aarch64/memmove.S'; else $(CYGPATH_W)
> '$(srcdir)/libc/machine/aarch64/memmove.S'; fi`
> -
> libc/machine/aarch64/libc_a-memrchr.o: libc/machine/aarch64/memrchr.S
> @am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CCAS) $(DEFS) $(DEFAULT_INCLUDES)
> $(INCLUDES) $(libc_a_CPPFLAGS) $(CPPFLAGS) $(libc_a_CCASFLAGS) $(CCASFLAGS)
> -MT libc/machine/aarch64/libc_a-memrchr.o -MD -MP -MF
> libc/machine/aarch64/$(DEPDIR)/libc_a-memrchr.Tpo -c -o
> libc/machine/aarch64/libc_a-memrchr.o `test -f
> 'libc/machine/aarch64/memrchr.S' || echo
> '$(srcdir)/'`libc/machine/aarch64/memrchr.S
> @am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv)
> libc/machine/aarch64/$(DEPDIR)/libc_a-memrchr.Tpo
> libc/machine/aarch64/$(DEPDIR)/libc_a-memrchr.Po
> diff --git a/newlib/libc/machine/aarch64/Makefile.inc
> b/newlib/libc/machine/aarch64/Makefile.inc
> index c749b0d575..f705dfea15 100644
> --- a/newlib/libc/machine/aarch64/Makefile.inc
> +++ b/newlib/libc/machine/aarch64/Makefile.inc
> @@ -6,7 +6,6 @@ libc_a_SOURCES += \
> %D%/memcpy-stub.c \
> %D%/memcpy.S \
> %D%/memmove-stub.c \
> - %D%/memmove.S \
> %D%/memrchr-stub.c \
> %D%/memrchr.S \
> %D%/memset-stub.c \
> diff --git a/newlib/libc/machine/aarch64/memmove-stub.c
> b/newlib/libc/machine/aarch64/memmove-stub.c
> index 8fa4ab9387..bc8255fb8b 100644
> --- a/newlib/libc/machine/aarch64/memmove-stub.c
> +++ b/newlib/libc/machine/aarch64/memmove-stub.c
> @@ -27,5 +27,5 @@
> #if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED))
> # include "../../string/memmove.c"
> #else
> -/* See memmove.S */
> +/* See memcpy.S */
> #endif
> diff --git a/newlib/libc/machine/aarch64/memmove.S
> b/newlib/libc/machine/aarch64/memmove.S
> deleted file mode 100644
> index 597a8c8e9e..0000000000
> --- a/newlib/libc/machine/aarch64/memmove.S
> +++ /dev/null
> @@ -1,155 +0,0 @@
> -/* Copyright (c) 2013, Linaro Limited
> - All rights reserved.
> -
> - Redistribution and use in source and binary forms, with or without
> - modification, are permitted provided that the following conditions are
> met:
> - * Redistributions of source code must retain the above copyright
> - notice, this list of conditions and the following disclaimer.
> - * Redistributions in binary form must reproduce the above copyright
> - notice, this list of conditions and the following disclaimer in
> the
> - documentation and/or other materials provided with the
> distribution.
> - * Neither the name of the Linaro nor the
> - names of its contributors may be used to endorse or promote
> products
> - derived from this software without specific prior written
> permission.
> -
> - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
> -
> -/*
> - * Copyright (c) 2015 ARM Ltd
> - * All rights reserved.
> - *
> - * Redistribution and use in source and binary forms, with or without
> - * modification, are permitted provided that the following conditions
> - * are met:
> - * 1. Redistributions of source code must retain the above copyright
> - * notice, this list of conditions and the following disclaimer.
> - * 2. Redistributions in binary form must reproduce the above copyright
> - * notice, this list of conditions and the following disclaimer in the
> - * documentation and/or other materials provided with the distribution.
> - * 3. The name of the company may not be used to endorse or promote
> - * products derived from this software without specific prior written
> - * permission.
> - *
> - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR
> IMPLIED
> - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
> - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
> - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> LIMITED
> - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> - */
> -
> -/* Assumptions:
> - *
> - * ARMv8-a, AArch64, unaligned accesses
> - */
> -
> -#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED))
> -/* See memmove-stub.c */
> -#else
> -
> - .macro def_fn f p2align=0
> - .text
> - .p2align \p2align
> - .global \f
> - .type \f, %function
> -\f:
> - .endm
> -
> -/* Parameters and result. */
> -#define dstin x0
> -#define src x1
> -#define count x2
> -#define srcend x3
> -#define dstend x4
> -#define tmp1 x5
> -#define A_l x6
> -#define A_h x7
> -#define B_l x8
> -#define B_h x9
> -#define C_l x10
> -#define C_h x11
> -#define D_l x12
> -#define D_h x13
> -#define E_l count
> -#define E_h tmp1
> -
> -/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps.
> - Larger backwards copies are also handled by memcpy. The only remaining
> - case is forward large copies. The destination is aligned, and an
> - unrolled loop processes 64 bytes per iteration.
> -*/
> -
> -def_fn memmove, 6
> - sub tmp1, dstin, src
> - cmp count, 96
> - ccmp tmp1, count, 2, hi
> - b.hs memcpy
> -
> - cbz tmp1, 3f
> - add dstend, dstin, count
> - add srcend, src, count
> -
> - /* Align dstend to 16 byte alignment so that we don't cross cache
> line
> - boundaries on both loads and stores. There are at least 96
> bytes
> - to copy, so copy 16 bytes unaligned and then align. The loop
> - copies 64 bytes per iteration and prefetches one iteration
> ahead. */
> -
> - and tmp1, dstend, 15
> - ldp D_l, D_h, [srcend, -16]
> - sub srcend, srcend, tmp1
> - sub count, count, tmp1
> - ldp A_l, A_h, [srcend, -16]
> - stp D_l, D_h, [dstend, -16]
> - ldp B_l, B_h, [srcend, -32]
> - ldp C_l, C_h, [srcend, -48]
> - ldp D_l, D_h, [srcend, -64]!
> - sub dstend, dstend, tmp1
> - subs count, count, 128
> - b.ls 2f
> - nop
> -1:
> - stp A_l, A_h, [dstend, -16]
> - ldp A_l, A_h, [srcend, -16]
> - stp B_l, B_h, [dstend, -32]
> - ldp B_l, B_h, [srcend, -32]
> - stp C_l, C_h, [dstend, -48]
> - ldp C_l, C_h, [srcend, -48]
> - stp D_l, D_h, [dstend, -64]!
> - ldp D_l, D_h, [srcend, -64]!
> - subs count, count, 64
> - b.hi 1b
> -
> - /* Write the last full set of 64 bytes. The remainder is at most
> 64
> - bytes, so it is safe to always copy 64 bytes from the start
> even if
> - there is just 1 byte left. */
> -2:
> - ldp E_l, E_h, [src, 48]
> - stp A_l, A_h, [dstend, -16]
> - ldp A_l, A_h, [src, 32]
> - stp B_l, B_h, [dstend, -32]
> - ldp B_l, B_h, [src, 16]
> - stp C_l, C_h, [dstend, -48]
> - ldp C_l, C_h, [src]
> - stp D_l, D_h, [dstend, -64]
> - stp E_l, E_h, [dstin, 48]
> - stp A_l, A_h, [dstin, 32]
> - stp B_l, B_h, [dstin, 16]
> - stp C_l, C_h, [dstin]
> -3: ret
> -
> - .size memmove, . - memmove
> -#endif
> --
> 2.35.3
>
>
next prev parent reply other threads:[~2023-10-27 17:15 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-10-27 16:59 Sebastian Huber
2023-10-27 17:15 ` Joel Sherrill [this message]
2023-10-27 18:19 ` Sebastian Huber
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAF9ehCWBpURSiFDd8tgVdsfeHKxNXZ+9GFyaVvDuPZxUctWgWw@mail.gmail.com \
--to=joel@rtems.org \
--cc=newlib@sourceware.org \
--cc=sebastian.huber@embedded-brains.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).