From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-oi1-x22f.google.com (mail-oi1-x22f.google.com [IPv6:2607:f8b0:4864:20::22f]) by sourceware.org (Postfix) with ESMTPS id CCA73386C593 for ; Wed, 29 Jun 2022 23:31:10 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org CCA73386C593 Received: by mail-oi1-x22f.google.com with SMTP id w193so23757925oie.5 for ; Wed, 29 Jun 2022 16:31:10 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:mime-version:references:in-reply-to:from:date :message-id:subject:to:cc; bh=C5rVD8UjnpaNJn8EPHEzvPJ0gF8vHPiHuBwghXoz2M0=; b=nqWrmS2yzquEMMyyYzqXGETZ4bwwagnnchbAVYl2+5eghJe4Nmsv/P8mSKDXbnW/bM 38j9/+bt2ZkegvvMpoFuKUKuwxK2fCEkYoguXtnHhWnu2/wWVJvTqsG6qynPZuXQm2LT 4Hus486wST0wH3j92ZjlaOYaY+uQRACbmewW3lOZWfGyFqISw+E+WQsCpXpMMTRajc7l +f5Q6EC38PH40fEQVfhNOLERkAzJCG2G2ARzhFcpW4u7CR0JSOzB+c/8ajEkKP9KaYQF uVJSuf5ofwS+g8m88abVpakGknSxA/4SM0B19fJ7WKsc+ZrCIUbFxSBb+CGMRbX3zKdO iuZg== X-Gm-Message-State: AJIora+TVJQLFlRec/kx6LhSQ1J8HSLbGVYQGt+fhPtVQwzFQHL7lCkj SPnztxobG5L/HsOKjecCAHkBAVqd3j13KE6bFn8= X-Google-Smtp-Source: AGRyM1uZklR5G/hbrU4TTL8kK3tbLplyilms/sCheP6Y9tOWc0DL9o/8Z5U9ffkeu5Dfx97XkKWby+had5xo97KIG1o= X-Received: by 2002:a05:6808:14c4:b0:335:242a:f898 with SMTP id f4-20020a05680814c400b00335242af898mr3511234oiw.201.1656545470122; Wed, 29 Jun 2022 16:31:10 -0700 (PDT) MIME-Version: 1.0 References: <20220628152757.17922-1-goldstein.w.n@gmail.com> <20220629230716.1264249-1-goldstein.w.n@gmail.com> In-Reply-To: <20220629230716.1264249-1-goldstein.w.n@gmail.com> From: "H.J. Lu" Date: Wed, 29 Jun 2022 16:30:34 -0700 Message-ID: Subject: Re: [PATCH v3 1/2] x86: Move mem{p}{mov|cpy}_{chk_}erms to its own file To: Noah Goldstein Cc: GNU C Library , "Carlos O'Donell" Content-Type: text/plain; charset="UTF-8" X-Spam-Status: No, score=-3024.7 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, KAM_SHORT, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 29 Jun 2022 23:31:12 -0000 On Wed, Jun 29, 2022 at 4:07 PM Noah Goldstein wrote: > > The primary memmove_{impl}_unaligned_erms implementations don't > interact with this function. Putting them in same file both > wastes space and unnecessarily bloats a hot code section. > --- > sysdeps/x86_64/multiarch/Makefile | 1 + > sysdeps/x86_64/multiarch/memmove-erms.S | 72 +++++++++++++++++++ > .../multiarch/memmove-vec-unaligned-erms.S | 50 ------------- > 3 files changed, 73 insertions(+), 50 deletions(-) > create mode 100644 sysdeps/x86_64/multiarch/memmove-erms.S > > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile > index 666ee4d5d6..62a4d96fb8 100644 > --- a/sysdeps/x86_64/multiarch/Makefile > +++ b/sysdeps/x86_64/multiarch/Makefile > @@ -18,6 +18,7 @@ sysdep_routines += \ > memmove-avx-unaligned-erms-rtm \ > memmove-avx512-no-vzeroupper \ > memmove-avx512-unaligned-erms \ > + memmove-erms \ > memmove-evex-unaligned-erms \ > memmove-sse2-unaligned-erms \ > memmove-ssse3 \ > diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S > new file mode 100644 > index 0000000000..2d3a6ccb76 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/memmove-erms.S > @@ -0,0 +1,72 @@ > +/* memcpy/mempcpy/memmove implement with rep movsb > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + . */ > + > + > +#include > + > +#if defined USE_MULTIARCH && IS_IN (libc) > + .text > +ENTRY (__mempcpy_chk_erms) > + cmp %RDX_LP, %RCX_LP > + jb HIDDEN_JUMPTARGET (__chk_fail) > +END (__mempcpy_chk_erms) > + > +/* Only used to measure performance of REP MOVSB. */ > +ENTRY (__mempcpy_erms) > + mov %RDI_LP, %RAX_LP > + /* Skip zero length. */ > + test %RDX_LP, %RDX_LP > + jz 2f > + add %RDX_LP, %RAX_LP > + jmp L(start_movsb) > +END (__mempcpy_erms) > + > +ENTRY (__memmove_chk_erms) > + cmp %RDX_LP, %RCX_LP > + jb HIDDEN_JUMPTARGET (__chk_fail) > +END (__memmove_chk_erms) > + > +ENTRY (__memmove_erms) > + movq %rdi, %rax > + /* Skip zero length. */ > + test %RDX_LP, %RDX_LP > + jz 2f > +L(start_movsb): > + mov %RDX_LP, %RCX_LP > + cmp %RSI_LP, %RDI_LP > + jb 1f > + /* Source == destination is less common. */ > + je 2f > + lea (%rsi,%rcx), %RDX_LP > + cmp %RDX_LP, %RDI_LP > + jb L(movsb_backward) > +1: > + rep movsb > +2: > + ret > +L(movsb_backward): > + leaq -1(%rdi,%rcx), %rdi > + leaq -1(%rsi,%rcx), %rsi > + std > + rep movsb > + cld > + ret > +END (__memmove_erms) > +strong_alias (__memmove_erms, __memcpy_erms) > +strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > +#endif > diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > index d1518b8bab..04747133b7 100644 > --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > @@ -239,56 +239,6 @@ L(start): > #endif > #if defined USE_MULTIARCH && IS_IN (libc) > END (MEMMOVE_SYMBOL (__memmove, unaligned)) > -# if VEC_SIZE == 16 > -ENTRY (__mempcpy_chk_erms) > - cmp %RDX_LP, %RCX_LP > - jb HIDDEN_JUMPTARGET (__chk_fail) > -END (__mempcpy_chk_erms) > - > -/* Only used to measure performance of REP MOVSB. */ > -ENTRY (__mempcpy_erms) > - mov %RDI_LP, %RAX_LP > - /* Skip zero length. */ > - test %RDX_LP, %RDX_LP > - jz 2f > - add %RDX_LP, %RAX_LP > - jmp L(start_movsb) > -END (__mempcpy_erms) > - > -ENTRY (__memmove_chk_erms) > - cmp %RDX_LP, %RCX_LP > - jb HIDDEN_JUMPTARGET (__chk_fail) > -END (__memmove_chk_erms) > - > -ENTRY (__memmove_erms) > - movq %rdi, %rax > - /* Skip zero length. */ > - test %RDX_LP, %RDX_LP > - jz 2f > -L(start_movsb): > - mov %RDX_LP, %RCX_LP > - cmp %RSI_LP, %RDI_LP > - jb 1f > - /* Source == destination is less common. */ > - je 2f > - lea (%rsi,%rcx), %RDX_LP > - cmp %RDX_LP, %RDI_LP > - jb L(movsb_backward) > -1: > - rep movsb > -2: > - ret > -L(movsb_backward): > - leaq -1(%rdi,%rcx), %rdi > - leaq -1(%rsi,%rcx), %rsi > - std > - rep movsb > - cld > - ret > -END (__memmove_erms) > -strong_alias (__memmove_erms, __memcpy_erms) > -strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > -# endif > > # ifdef SHARED > ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms)) > -- > 2.34.1 > LGTM. Thanks. -- H.J.