From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-oo1-xc33.google.com (mail-oo1-xc33.google.com [IPv6:2607:f8b0:4864:20::c33]) by sourceware.org (Postfix) with ESMTPS id D6D55394881A for ; Wed, 23 Feb 2022 14:09:43 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org D6D55394881A Received: by mail-oo1-xc33.google.com with SMTP id w10-20020a4ae08a000000b0031bdf7a6d76so22322081oos.10 for ; Wed, 23 Feb 2022 06:09:43 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=H5EsMQeOltNOy6JkihNotVFTgekkfOUZOAlVxQGNgLE=; b=Nmcz7P6brOi44f3VZeOGTlYNpv6KwRxABrYUdDXN1v2KpIXrf4Bh0DkJN3gO4hu3sE /Bl7vkU/pCBz8jvBArhfSOk1wobmcMu6sR1JjHfcPo0DLWtDTvtMAeU4b8G41xl5HD+6 RO3v1NKbuH5a7uiQJfNi8+QRs+Ok1sCZRe/m2vtoZR4HwrGhn2gAbwCsgxQakza/Vr3O XVeYjbMn5cm8rTLtjihCQtbPwrBdU2yOLGkOWxLNf46m/hQZhJnR7w53EDCGPrdFF/GE wSWmaS05H+mpBCD6kQhHvHR+zobjHefuHqh+KwoHvEiEI/LxIVxqQE1qLHOx+F0qbME2 EBGg== X-Gm-Message-State: AOAM530dXSoFwigfv8kvJKyaG4vqA+Elj4CzGWmQQXPcdfdYjMJ4oQHR 8Y2+O9V+l3Xm/2Bq1GBHerA8njlfNIUNeQ== X-Google-Smtp-Source: ABdhPJwXgQN/S7h2eaTxLdutT48rQZQOZ9lrpyplvAlUdvqWuRDSLbfUy+cEKAqxjI95Yh2F/f44qA== X-Received: by 2002:a05:6870:6603:b0:d3:6ffa:5d35 with SMTP id gf3-20020a056870660300b000d36ffa5d35mr3762097oab.319.1645625382607; Wed, 23 Feb 2022 06:09:42 -0800 (PST) Received: from birita.. ([2804:431:c7ca:cb36:52bd:55cf:8e44:571]) by smtp.gmail.com with ESMTPSA id o22sm8801734otp.21.2022.02.23.06.09.41 for (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 23 Feb 2022 06:09:42 -0800 (PST) From: Adhemerval Zanella To: libc-alpha@sourceware.org Subject: [PATCH v2 11/11] i686: Remove bzero optimizations Date: Wed, 23 Feb 2022 11:09:21 -0300 Message-Id: <20220223140921.2768062-12-adhemerval.zanella@linaro.org> X-Mailer: git-send-email 2.32.0 In-Reply-To: <20220223140921.2768062-1-adhemerval.zanella@linaro.org> References: <20220223140921.2768062-1-adhemerval.zanella@linaro.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-11.8 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, KAM_SHORT, KAM_STOCKGEN, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 23 Feb 2022 14:09:46 -0000 The symbol is not present in current POSIX specification and compiler already generates memset call. --- sysdeps/i386/bzero.S | 5 --- sysdeps/i386/i586/bzero.S | 4 -- sysdeps/i386/i586/memset.S | 16 ++------ sysdeps/i386/i686/bzero.S | 4 -- sysdeps/i386/i686/memset.S | 23 +++--------- sysdeps/i386/i686/multiarch/Makefile | 6 +-- sysdeps/i386/i686/multiarch/bzero-ia32.S | 37 ------------------- sysdeps/i386/i686/multiarch/bzero-sse2-rep.S | 3 -- sysdeps/i386/i686/multiarch/bzero-sse2.S | 3 -- sysdeps/i386/i686/multiarch/bzero.c | 32 ---------------- sysdeps/i386/i686/multiarch/ifunc-impl-list.c | 8 ---- sysdeps/i386/i686/multiarch/memset-sse2-rep.S | 24 +++--------- sysdeps/i386/i686/multiarch/memset-sse2.S | 24 +++--------- sysdeps/i386/memset.S | 14 +------ 14 files changed, 22 insertions(+), 181 deletions(-) delete mode 100644 sysdeps/i386/bzero.S delete mode 100644 sysdeps/i386/i586/bzero.S delete mode 100644 sysdeps/i386/i686/bzero.S delete mode 100644 sysdeps/i386/i686/multiarch/bzero-ia32.S delete mode 100644 sysdeps/i386/i686/multiarch/bzero-sse2-rep.S delete mode 100644 sysdeps/i386/i686/multiarch/bzero-sse2.S delete mode 100644 sysdeps/i386/i686/multiarch/bzero.c diff --git a/sysdeps/i386/bzero.S b/sysdeps/i386/bzero.S deleted file mode 100644 index c8dd47b4da..0000000000 --- a/sysdeps/i386/bzero.S +++ /dev/null @@ -1,5 +0,0 @@ -#define USE_AS_BZERO -#define memset __bzero -#include "memset.S" - -weak_alias (__bzero, bzero) diff --git a/sysdeps/i386/i586/bzero.S b/sysdeps/i386/i586/bzero.S deleted file mode 100644 index 2a106719a4..0000000000 --- a/sysdeps/i386/i586/bzero.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_BZERO -#define memset __bzero -#include -weak_alias (__bzero, bzero) diff --git a/sysdeps/i386/i586/memset.S b/sysdeps/i386/i586/memset.S index ae09c3b40a..672af41398 100644 --- a/sysdeps/i386/i586/memset.S +++ b/sysdeps/i386/i586/memset.S @@ -23,15 +23,11 @@ #define PARMS 4+4 /* space for 1 saved reg */ #define RTN PARMS #define DEST RTN -#ifdef USE_AS_BZERO -# define LEN DEST+4 -#else -# define CHR DEST+4 -# define LEN CHR+4 -#endif +#define CHR DEST+4 +#define LEN CHR+4 .text -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO +#if defined SHARED && IS_IN (libc) ENTRY (__memset_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) @@ -46,15 +42,11 @@ ENTRY (memset) movl DEST(%esp), %edi cfi_rel_offset (edi, 0) movl LEN(%esp), %edx -#ifdef USE_AS_BZERO - xorl %eax, %eax /* we fill with 0 */ -#else movb CHR(%esp), %al movb %al, %ah movl %eax, %ecx shll $16, %eax movw %cx, %ax -#endif cld /* If less than 36 bytes to write, skip tricky code (it wouldn't work). */ @@ -100,10 +92,8 @@ L(2): shrl $2, %ecx /* convert byte count to longword count */ rep stosb -#ifndef USE_AS_BZERO /* Load result (only if used as memset). */ movl DEST(%esp), %eax /* start address of destination is result */ -#endif popl %edi cfi_adjust_cfa_offset (-4) cfi_restore (edi) diff --git a/sysdeps/i386/i686/bzero.S b/sysdeps/i386/i686/bzero.S deleted file mode 100644 index c7898f18e0..0000000000 --- a/sysdeps/i386/i686/bzero.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_BZERO -#define memset __bzero -#include -weak_alias (__bzero, bzero) diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S index fd5b26aeae..3cb86c016d 100644 --- a/sysdeps/i386/i686/memset.S +++ b/sysdeps/i386/i686/memset.S @@ -21,18 +21,13 @@ #include "asm-syntax.h" #define PARMS 4+4 /* space for 1 saved reg */ -#ifdef USE_AS_BZERO -# define DEST PARMS -# define LEN DEST+4 -#else -# define RTN PARMS -# define DEST RTN -# define CHR DEST+4 -# define LEN CHR+4 -#endif +#define RTN PARMS +#define DEST RTN +#define CHR DEST+4 +#define LEN CHR+4 .text -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO +#if defined SHARED && IS_IN (libc) ENTRY_CHK (__memset_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) @@ -46,11 +41,7 @@ ENTRY (memset) cfi_adjust_cfa_offset (4) movl DEST(%esp), %edx movl LEN(%esp), %ecx -#ifdef USE_AS_BZERO - xorl %eax, %eax /* fill with 0 */ -#else movzbl CHR(%esp), %eax -#endif jecxz 1f movl %edx, %edi cfi_rel_offset (edi, 0) @@ -70,9 +61,7 @@ ENTRY (memset) 2: movl %ecx, %edx shrl $2, %ecx andl $3, %edx -#ifndef USE_AS_BZERO imul $0x01010101, %eax -#endif rep stosl movl %edx, %ecx @@ -80,9 +69,7 @@ ENTRY (memset) stosb 1: -#ifndef USE_AS_BZERO movl DEST(%esp), %eax /* start address of destination is result */ -#endif popl %edi cfi_adjust_cfa_offset (-4) cfi_restore (edi) diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile index 02fa02658e..9fe5ea8639 100644 --- a/sysdeps/i386/i686/multiarch/Makefile +++ b/sysdeps/i386/i686/multiarch/Makefile @@ -1,9 +1,9 @@ ifeq ($(subdir),string) gen-as-const-headers += locale-defines.sym -sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ +sysdep_routines += memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \ memmove-ssse3-rep \ - memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \ + memset-sse2-rep strcmp-ssse3 \ strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \ memcmp-ssse3 memcmp-sse4 varshift \ strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \ @@ -21,7 +21,7 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ memcpy-sse2-unaligned \ mempcpy-sse2-unaligned memmove-sse2-unaligned \ strcspn-c strpbrk-c strspn-c \ - bzero-ia32 rawmemchr-ia32 \ + rawmemchr-ia32 \ memchr-ia32 memcmp-ia32 memcpy-ia32 memmove-ia32 \ mempcpy-ia32 memset-ia32 strcat-ia32 strchr-ia32 \ strrchr-ia32 strcpy-ia32 strcmp-ia32 strcspn-ia32 \ diff --git a/sysdeps/i386/i686/multiarch/bzero-ia32.S b/sysdeps/i386/i686/multiarch/bzero-ia32.S deleted file mode 100644 index 96afe9bad1..0000000000 --- a/sysdeps/i386/i686/multiarch/bzero-ia32.S +++ /dev/null @@ -1,37 +0,0 @@ -/* bzero optimized for i686. - Copyright (C) 2017-2022 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include - -#if IS_IN (libc) -# define __bzero __bzero_ia32 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI___bzero; __GI___bzero = __bzero -# endif - -# undef weak_alias -# define weak_alias(original, alias) - -# include -#endif diff --git a/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S b/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S deleted file mode 100644 index 507b288bb3..0000000000 --- a/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_BZERO -#define __memset_sse2_rep __bzero_sse2_rep -#include "memset-sse2-rep.S" diff --git a/sysdeps/i386/i686/multiarch/bzero-sse2.S b/sysdeps/i386/i686/multiarch/bzero-sse2.S deleted file mode 100644 index 8d04512e4e..0000000000 --- a/sysdeps/i386/i686/multiarch/bzero-sse2.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_BZERO -#define __memset_sse2 __bzero_sse2 -#include "memset-sse2.S" diff --git a/sysdeps/i386/i686/multiarch/bzero.c b/sysdeps/i386/i686/multiarch/bzero.c deleted file mode 100644 index 7fd0ddd576..0000000000 --- a/sysdeps/i386/i686/multiarch/bzero.c +++ /dev/null @@ -1,32 +0,0 @@ -/* Multiple versions of bzero. - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2017-2022 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -/* Define multiple versions only for the definition in libc. */ -#if IS_IN (libc) -# define bzero __redirect_bzero -# include -# undef bzero - -# define SYMBOL_NAME bzero -# include "ifunc-memset.h" - -libc_ifunc_redirected (__redirect_bzero, __bzero, IFUNC_SELECTOR ()); - -weak_alias (__bzero, bzero) -#endif diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c index 5c7a42dc97..c014f52bf9 100644 --- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c +++ b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c @@ -36,14 +36,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, size_t i = 0; - /* Support sysdeps/i386/i686/multiarch/bzero.S. */ - IFUNC_IMPL (i, name, bzero, - IFUNC_IMPL_ADD (array, i, bzero, CPU_FEATURE_USABLE (SSE2), - __bzero_sse2_rep) - IFUNC_IMPL_ADD (array, i, bzero, CPU_FEATURE_USABLE (SSE2), - __bzero_sse2) - IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ia32)) - /* Support sysdeps/i386/i686/multiarch/memchr.S. */ IFUNC_IMPL (i, name, memchr, IFUNC_IMPL_ADD (array, i, memchr, CPU_FEATURE_USABLE (SSE2), diff --git a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S index 37a10575e7..28df7836e0 100644 --- a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S +++ b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S @@ -32,16 +32,10 @@ #define PUSH(REG) pushl REG; CFI_PUSH (REG) #define POP(REG) popl REG; CFI_POP (REG) -#ifdef USE_AS_BZERO -# define DEST PARMS -# define LEN DEST+4 -# define SETRTNVAL -#else -# define DEST PARMS -# define CHR DEST+4 -# define LEN CHR+4 -# define SETRTNVAL movl DEST(%esp), %eax -#endif +#define DEST PARMS +#define CHR DEST+4 +#define LEN CHR+4 +#define SETRTNVAL movl DEST(%esp), %eax #ifdef PIC # define ENTRANCE PUSH (%ebx); @@ -78,7 +72,7 @@ #endif .section .text.sse2,"ax",@progbits -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO +#if defined SHARED && IS_IN (libc) ENTRY (__memset_chk_sse2_rep) movl 12(%esp), %eax cmpl %eax, 16(%esp) @@ -89,16 +83,12 @@ ENTRY (__memset_sse2_rep) ENTRANCE movl LEN(%esp), %ecx -#ifdef USE_AS_BZERO - xor %eax, %eax -#else movzbl CHR(%esp), %eax movb %al, %ah /* Fill the whole EAX with pattern. */ movl %eax, %edx shl $16, %eax or %edx, %eax -#endif movl DEST(%esp), %edx cmp $32, %ecx jae L(32bytesormore) @@ -228,12 +218,8 @@ L(write_3bytes): /* ECX > 32 and EDX is 4 byte aligned. */ L(32bytesormore): /* Fill xmm0 with the pattern. */ -#ifdef USE_AS_BZERO - pxor %xmm0, %xmm0 -#else movd %eax, %xmm0 pshufd $0, %xmm0, %xmm0 -#endif testl $0xf, %edx jz L(aligned_16) /* ECX > 32 and EDX is not 16 byte aligned. */ diff --git a/sysdeps/i386/i686/multiarch/memset-sse2.S b/sysdeps/i386/i686/multiarch/memset-sse2.S index 455519c7ac..4e8414fd51 100644 --- a/sysdeps/i386/i686/multiarch/memset-sse2.S +++ b/sysdeps/i386/i686/multiarch/memset-sse2.S @@ -32,16 +32,10 @@ #define PUSH(REG) pushl REG; CFI_PUSH (REG) #define POP(REG) popl REG; CFI_POP (REG) -#ifdef USE_AS_BZERO -# define DEST PARMS -# define LEN DEST+4 -# define SETRTNVAL -#else -# define DEST PARMS -# define CHR DEST+4 -# define LEN CHR+4 -# define SETRTNVAL movl DEST(%esp), %eax -#endif +#define DEST PARMS +#define CHR DEST+4 +#define LEN CHR+4 +#define SETRTNVAL movl DEST(%esp), %eax #ifdef PIC # define ENTRANCE PUSH (%ebx); @@ -78,7 +72,7 @@ #endif .section .text.sse2,"ax",@progbits -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO +#if defined SHARED && IS_IN (libc) ENTRY (__memset_chk_sse2) movl 12(%esp), %eax cmpl %eax, 16(%esp) @@ -89,16 +83,12 @@ ENTRY (__memset_sse2) ENTRANCE movl LEN(%esp), %ecx -#ifdef USE_AS_BZERO - xor %eax, %eax -#else movzbl CHR(%esp), %eax movb %al, %ah /* Fill the whole EAX with pattern. */ movl %eax, %edx shl $16, %eax or %edx, %eax -#endif movl DEST(%esp), %edx cmp $32, %ecx jae L(32bytesormore) @@ -228,12 +218,8 @@ L(write_3bytes): /* ECX > 32 and EDX is 4 byte aligned. */ L(32bytesormore): /* Fill xmm0 with the pattern. */ -#ifdef USE_AS_BZERO - pxor %xmm0, %xmm0 -#else movd %eax, %xmm0 pshufd $0, %xmm0, %xmm0 -#endif testl $0xf, %edx jz L(aligned_16) /* ECX > 32 and EDX is not 16 byte aligned. */ diff --git a/sysdeps/i386/memset.S b/sysdeps/i386/memset.S index f470511b64..db2753eb2f 100644 --- a/sysdeps/i386/memset.S +++ b/sysdeps/i386/memset.S @@ -30,15 +30,11 @@ #define POP(REG) popl REG; CFI_POP (REG) #define STR1 8 -#ifdef USE_AS_BZERO -#define N STR1+4 -#else #define STR2 STR1+4 #define N STR2+4 -#endif .text -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO +#if defined SHARED && IS_IN (libc) ENTRY (__memset_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) @@ -49,20 +45,12 @@ ENTRY (memset) PUSH (%edi) movl N(%esp), %ecx movl STR1(%esp), %edi -#ifdef USE_AS_BZERO - xor %eax, %eax -#else movzbl STR2(%esp), %eax mov %edi, %edx -#endif rep stosb -#ifndef USE_AS_BZERO mov %edx, %eax -#endif POP (%edi) ret END (memset) -#ifndef USE_AS_BZERO libc_hidden_builtin_def (memset) -#endif -- 2.32.0