From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 3282 invoked by alias); 13 May 2013 22:47:25 -0000 Mailing-List: contact libc-ports-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: libc-ports-owner@sourceware.org Received: (qmail 3271 invoked by uid 89); 13 May 2013 22:47:25 -0000 X-Spam-SWARE-Status: No, score=-0.0 required=5.0 tests=AWL,BAYES_50,KAM_STOCKGEN,TW_CP,TW_FN,TW_HW,TW_VF autolearn=no version=3.3.1 Received: from toast.topped-with-meat.com (HELO topped-with-meat.com) (204.197.218.159) by sourceware.org (qpsmtpd/0.84/v0.84-167-ge50287c) with ESMTP; Mon, 13 May 2013 22:47:24 +0000 Received: by topped-with-meat.com (Postfix, from userid 5281) id 9F17C2C05E; Mon, 13 May 2013 15:47:22 -0700 (PDT) MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit From: Roland McGrath To: libc-ports@sourceware.org Subject: [PATCH roland/arm-memcpy] ARM: Make multiarch memcpy always use NEON when compiler does Message-Id: <20130513224722.9F17C2C05E@topped-with-meat.com> Date: Mon, 13 May 2013 22:47:00 -0000 X-CMAE-Score: 0 X-CMAE-Analysis: v=2.1 cv=LYSvtFvi c=1 sm=1 tr=0 a=WkljmVdYkabdwxfqvArNOQ==:117 a=14OXPxybAAAA:8 a=tbPnvLT72BkA:10 a=Z6MIti7PxpgA:10 a=kj9zAlcOel0A:10 a=hOe2yjtxAAAA:8 a=yfYBoU7Tba0A:10 a=CBcdSBlqRDw3jIzEHr0A:9 a=CjuIK1q_8ugA:10 X-SW-Source: 2013-05/txt/msg00050.txt.bz2 When the compiler is emitting NEON instructions anyway, there is no point in using IFUNC when we can just use the NEON memcpy unconditionally. Tested on armv7l-linux-gnueabihf with CC='gcc -mfpu=neon', no check-abi failures, no regressions in 'make check subdirs=string'. Thanks, Roland ports/ChangeLog.arm 2013-05-13 Roland McGrath * sysdeps/arm/armv7/multiarch/memcpy.S [__ARM_NEON__]: Don't define memcpy here, just __memcpy_arm and __aeabi_memcpy*. * sysdeps/arm/armv7/multiarch/memcpy_neon.S [__ARM_NEON__]: Define memcpy here, not __memcpy_neon. * sysdeps/arm/armv7/multiarch/memcpy_vfp.S [__ARM_NEON__]: Define nothing here. * sysdeps/arm/armv7/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list) [__ARM_NEON__]: Don't list __memcpy_vfp; use memcpy name for NEON implementation. --- a/ports/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c +++ b/ports/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c @@ -35,9 +35,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, memcpy, IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_ARM_NEON, - __memcpy_neon) +#ifdef __ARM_NEON__ + memcpy +#else + __memcpy_neon +#endif + ) +#ifndef __ARM_NEON__ IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_ARM_VFP, __memcpy_vfp) +#endif IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_arm)); return i; --- a/ports/sysdeps/arm/armv7/multiarch/memcpy.S +++ b/ports/sysdeps/arm/armv7/multiarch/memcpy.S @@ -22,27 +22,29 @@ #include #include -#if !defined NOT_IN_libc +#ifndef NOT_IN_libc +/* Under __ARM_NEON__, memcpy_neon.S defines the name memcpy. */ +# ifndef __ARM_NEON__ .text ENTRY(memcpy) .type memcpy, %gnu_indirect_function -#ifdef __SOFTFP__ +# ifdef __SOFTFP__ ldr r1, .Lmemcpy_arm tst r0, #HWCAP_ARM_VFP ldrne r1, .Lmemcpy_vfp -#else +# else ldr r1, .Lmemcpy_vfp -#endif +# endif tst r0, #HWCAP_ARM_NEON ldrne r1, .Lmemcpy_neon 1: add r0, r1, pc DO_RET(lr) -#ifdef __SOFTFP__ +# ifdef __SOFTFP__ .Lmemcpy_arm: .long C_SYMBOL_NAME(__memcpy_arm) - 1b - PC_OFS -#endif +# endif .Lmemcpy_neon: .long C_SYMBOL_NAME(__memcpy_neon) - 1b - PC_OFS .Lmemcpy_vfp: @@ -51,6 +53,7 @@ ENTRY(memcpy) END(memcpy) libc_hidden_builtin_def (memcpy) +#endif /* Not __ARM_NEON__. */ /* These versions of memcpy are defined not to clobber any VFP or NEON registers so they must always call the ARM variant of the memcpy code. */ --- a/ports/sysdeps/arm/armv7/multiarch/memcpy_neon.S +++ b/ports/sysdeps/arm/armv7/multiarch/memcpy_neon.S @@ -1,3 +1,9 @@ +#ifdef __ARM_NEON__ +/* Under __ARM_NEON__, this file defines memcpy directly. */ +libc_hidden_builtin_def (memcpy) +#else +# define memcpy __memcpy_neon +#endif + #define MEMCPY_NEON -#define memcpy __memcpy_neon #include "memcpy_impl.S" --- a/ports/sysdeps/arm/armv7/multiarch/memcpy_vfp.S +++ b/ports/sysdeps/arm/armv7/multiarch/memcpy_vfp.S @@ -1,3 +1,7 @@ -#define MEMCPY_VFP -#define memcpy __memcpy_vfp -#include "memcpy_impl.S" +/* Under __ARM_NEON__, memcpy_neon.S defines memcpy directly + and the __memcpy_vfp code will never be used. */ +#ifndef __ARM_NEON__ +# define MEMCPY_VFP +# define memcpy __memcpy_vfp +# include "memcpy_impl.S" +#endif