From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pf1-x434.google.com (mail-pf1-x434.google.com [IPv6:2607:f8b0:4864:20::434]) by sourceware.org (Postfix) with ESMTPS id 8E0FC3847814 for ; Thu, 30 Jun 2022 02:03:22 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 8E0FC3847814 Received: by mail-pf1-x434.google.com with SMTP id p14so16759789pfh.6 for ; Wed, 29 Jun 2022 19:03:22 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:mime-version:references:in-reply-to:from:date :message-id:subject:to:cc; bh=FhHvSODjPH5htmO6gW7SeSz1QME5uL5wKOtoMIDDkrE=; b=BHBUDnfCh3elC+hoT7vzvdN7nxhC7evkXinCt1tjdFUCaH5kkxqB2IVRu0y9DR1nqL qW+W6ij8O48LgrNxA3fE8Hi28rk6UDPQSyuGE7QDqV/HBf9LJ8EqAwMQGAUjIIH+5S+r 4kpTYdzeJCFnR2uz230qZ/Wn/X9gVNutN793mVY/+UgXxCMmybVr9cAE6xFytraNt8kl VpAWd4OsOR/ziamvsIHwcqxeCamJ39lLSCf7inzowwvw1ppG3FtxJI3t1QOJbPbLAXBq X4s2TPw5EEB/9HX0Y7AkXDobHWBGde+oY6Phnwm1fG5hyqakaoYZmmvpgwTmt4ZxNIZE TXOA== X-Gm-Message-State: AJIora8/YGs3Hnga6h+NXXfDoYspVwDi6lYW7R6LBw7d24jSXUfiJ3NW /r82iskj9vZbcOhMWjEe0F77klszRswIom2mTZI= X-Google-Smtp-Source: AGRyM1t9qnEdgdTH/1y4p5+bbVdTdlORWDFHoKJyTKASdhw+Rzx2BqhG6mu3aPmJ0SNsmb7b6qYmruGshLDGnh4/Kww= X-Received: by 2002:a63:b54c:0:b0:40c:7b84:4f7f with SMTP id u12-20020a63b54c000000b0040c7b844f7fmr5338841pgo.586.1656554601558; Wed, 29 Jun 2022 19:03:21 -0700 (PDT) MIME-Version: 1.0 References: <20220630015618.3586787-1-goldstein.w.n@gmail.com> In-Reply-To: <20220630015618.3586787-1-goldstein.w.n@gmail.com> From: "H.J. Lu" Date: Wed, 29 Jun 2022 19:02:45 -0700 Message-ID: Subject: Re: [PATCH v1] x86: Add missing IS_IN (libc) check to memmove-ssse3.S To: Noah Goldstein Cc: GNU C Library , "Carlos O'Donell" Content-Type: text/plain; charset="UTF-8" X-Spam-Status: No, score=-3025.0 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, KAM_SHORT, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 30 Jun 2022 02:03:24 -0000 On Wed, Jun 29, 2022 at 6:56 PM Noah Goldstein wrote: > > Was missing to for the multiarch build rtld-memmove-ssse3.os was > being built and exporting symbols: > > >$ nm string/rtld-memmove-ssse3.os > U __GI___chk_fail > 0000000000000020 T __memcpy_chk_ssse3 > 0000000000000040 T __memcpy_ssse3 > 0000000000000020 T __memmove_chk_ssse3 > 0000000000000040 T __memmove_ssse3 > 0000000000000000 T __mempcpy_chk_ssse3 > 0000000000000010 T __mempcpy_ssse3 > U __x86_shared_cache_size_half > > Introduced after 2.35 in: > > commit 26b2478322db94edc9e0e8f577b2f71d291e5acb > Author: Noah Goldstein > Date: Thu Apr 14 11:47:40 2022 -0500 > > x86: Reduce code size of mem{move|pcpy|cpy}-ssse3 > --- > sysdeps/x86_64/multiarch/memmove-ssse3.S | 60 +++++++++++++++++------- > 1 file changed, 44 insertions(+), 16 deletions(-) > > diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3.S b/sysdeps/x86_64/multiarch/memmove-ssse3.S > index 310ff62b86..a88fde4a8f 100644 > --- a/sysdeps/x86_64/multiarch/memmove-ssse3.S > +++ b/sysdeps/x86_64/multiarch/memmove-ssse3.S > @@ -1,19 +1,42 @@ > -#include > - > -#ifndef MEMMOVE > -# define MEMMOVE __memmove_ssse3 > -# define MEMMOVE_CHK __memmove_chk_ssse3 > -# define MEMCPY __memcpy_ssse3 > -# define MEMCPY_CHK __memcpy_chk_ssse3 > -# define MEMPCPY __mempcpy_ssse3 > -# define MEMPCPY_CHK __mempcpy_chk_ssse3 > -#endif > +/* memmove/memcpy/mempcpy optimized for aligned access with SSSE3. > + All versions must be listed in ifunc-impl-list.c. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + . */ > + > + > +#if IS_IN (libc) > + > +# include > +# ifndef MEMMOVE > +# define MEMMOVE __memmove_ssse3 > +# define MEMMOVE_CHK __memmove_chk_ssse3 > +# define MEMCPY __memcpy_ssse3 > +# define MEMCPY_CHK __memcpy_chk_ssse3 > +# define MEMPCPY __mempcpy_ssse3 > +# define MEMPCPY_CHK __mempcpy_chk_ssse3 > +# endif > > .section .text.ssse3, "ax", @progbits > +# if defined SHARED > ENTRY(MEMPCPY_CHK) > cmp %RDX_LP, %RCX_LP > jb HIDDEN_JUMPTARGET(__chk_fail) > END(MEMPCPY_CHK) > +# endif > > ENTRY(MEMPCPY) > mov %RDI_LP, %RAX_LP > @@ -21,10 +44,12 @@ ENTRY(MEMPCPY) > jmp L(start) > END(MEMPCPY) > > +# if defined SHARED > ENTRY(MEMMOVE_CHK) > cmp %RDX_LP, %RCX_LP > jb HIDDEN_JUMPTARGET(__chk_fail) > END(MEMMOVE_CHK) > +# endif > > ENTRY_P2ALIGN(MEMMOVE, 6) > # ifdef __ILP32__ > @@ -124,11 +149,11 @@ L(more_2x_vec): > loop. */ > movups %xmm0, (%rdi) > > -#ifdef SHARED_CACHE_SIZE_HALF > +# ifdef SHARED_CACHE_SIZE_HALF > cmp $SHARED_CACHE_SIZE_HALF, %RDX_LP > -#else > +# else > cmp __x86_shared_cache_size_half(%rip), %rdx > -#endif > +# endif > ja L(large_memcpy) > > leaq -64(%rdi, %rdx), %r8 > @@ -206,7 +231,7 @@ L(end_loop_fwd): > > /* Extactly 64 bytes if `jmp L(end_loop_fwd)` is long encoding. > 60 bytes otherwise. */ > -#define ALIGNED_LOOP_FWD(align_by); \ > +# define ALIGNED_LOOP_FWD(align_by); \ > .p2align 6; \ > L(loop_fwd_ ## align_by): \ > movaps 16(%rsi), %xmm0; \ > @@ -275,7 +300,7 @@ L(end_large_loop_fwd): > > /* Size > 64 bytes and <= 96 bytes. 32-byte align between ensure > 96-byte spacing between each. */ > -#define ALIGNED_LARGE_LOOP_FWD(align_by); \ > +# define ALIGNED_LARGE_LOOP_FWD(align_by); \ > .p2align 5; \ > L(large_loop_fwd_ ## align_by): \ > movaps 16(%rsi), %xmm0; \ > @@ -343,7 +368,7 @@ L(end_loop_bkwd): > > /* Extactly 64 bytes if `jmp L(end_loop_bkwd)` is long encoding. > 60 bytes otherwise. */ > -#define ALIGNED_LOOP_BKWD(align_by); \ > +# define ALIGNED_LOOP_BKWD(align_by); \ > .p2align 6; \ > L(loop_bkwd_ ## align_by): \ > movaps 32(%rsi), %xmm1; \ > @@ -381,4 +406,7 @@ L(loop_bkwd_ ## align_by): \ > END(MEMMOVE) > > strong_alias (MEMMOVE, MEMCPY) > +# if defined SHARED > strong_alias (MEMMOVE_CHK, MEMCPY_CHK) > +# endif > +#endif > -- > 2.34.1 > LGTM. Thanks. -- H.J.