public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: "Pawar, Amit" <Amit.Pawar@amd.com>
To: "H.J. Lu" <hjl.tools@gmail.com>
Cc: "libc-alpha@sourceware.org" <libc-alpha@sourceware.org>
Subject: RE: [PATCH x86_64] Update memcpy, mempcpy and memmove selection order for Excavator CPU BZ #19583
Date: Wed, 23 Mar 2016 10:12:00 -0000	[thread overview]
Message-ID: <SN1PR12MB07339536A92E3475A46C113B97810@SN1PR12MB0733.namprd12.prod.outlook.com> (raw)
In-Reply-To: <CAMe9rOqTJA-LHeJEck82_3g-ezD2q-BB-jHs1puFLaugPWu=zA@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 220 bytes --]

> Then we should add Fast_Unaligned_Copy and only use it in memcpy.
PFA patch and ChangeLog files containing fix for memcpy IFUNC function. Is it OK else please suggest for any required changes.

Thanks,
Amit Pawar

[-- Attachment #2: 0001-x86_64-Fix-memcpy-IFUNC-selection-order-for-Excavato.patch --]
[-- Type: application/octet-stream, Size: 4108 bytes --]

From 77b89b605ed498e6ab32132e97b0efb8088fd4a6 Mon Sep 17 00:00:00 2001
From: Amit Pawar <Amit.Pawar@amd.com>
Date: Wed, 23 Mar 2016 15:35:27 +0530
Subject: [PATCH] x86_64 Fix memcpy IFUNC selection order for Excavator CPU.

Performance of memcpy implementation based on Fast_Copy_Backward is better
compare to currently selected Fast_Unaligned_Load based implementation on
Excavator cpu. New feature bit is required to fix this issue in memcpy IFUNC
function without affecting other targets. So defining two new
bit_arch_Fast_Unaligned_Copy and index_arch_Fast_Unaligned_Copy feature bits
macros and selection order of this functions is updated.

	[BZ #19583]
        * sysdeps/x86/cpu-features.h (bit_arch_Fast_Unaligned_Copy):
        New.
        (index_arch_Fast_Unaligned_Copy) Likewise.
        * sysdeps/x86/cpu-features.c
        (init_cpu_features, Fast_Copy_Backward): Set it for Excavator core.
        (init_cpu_features, Fast_Unaligned_Copy): Set it for Excavator core.
        * sysdeps/x86_64/multiarch/memcpy.S
        (__new_memcpy, Fast_Unaligned_Copy): Add check for
        Fast_Unaligned_Copy bit and select it on Excavator core.
---
 sysdeps/x86/cpu-features.c        | 11 ++++++++++-
 sysdeps/x86/cpu-features.h        |  3 +++
 sysdeps/x86_64/multiarch/memcpy.S | 12 +++++++-----
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index c8f81ef..7701548 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -220,10 +220,19 @@ init_cpu_features (struct cpu_features *cpu_features)
 
       if (family == 0x15)
 	{
+#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy
+# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy
+#endif
+#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward
+# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward
+#endif
 	  /* "Excavator"   */
 	  if (model >= 0x60 && model <= 0x7f)
 	    cpu_features->feature[index_arch_Fast_Unaligned_Load]
-	      |= bit_arch_Fast_Unaligned_Load;
+	      |= (bit_arch_Fast_Unaligned_Load
+		  | bit_arch_Fast_Unaligned_Copy
+		  | bit_arch_Fast_Copy_Backward);
+
 	}
     }
   else
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index e06eb7e..bfe1f4c 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -35,6 +35,7 @@
 #define bit_arch_I686				(1 << 15)
 #define bit_arch_Prefer_MAP_32BIT_EXEC		(1 << 16)
 #define bit_arch_Prefer_No_VZEROUPPER		(1 << 17)
+#define bit_arch_Fast_Unaligned_Copy		(1 << 18)
 
 /* CPUID Feature flags.  */
 
@@ -101,6 +102,7 @@
 # define index_arch_I686		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE
 # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE
+# define index_arch_Fast_Unaligned_Copy	FEATURE_INDEX_1*FEATURE_SIZE
 
 
 # if defined (_LIBC) && !IS_IN (nonlib)
@@ -265,6 +267,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_arch_I686		FEATURE_INDEX_1
 # define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1
 # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1
+# define index_arch_Fast_Unaligned_Copy	FEATURE_INDEX_1
 
 #endif	/* !__ASSEMBLER__ */
 
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index 8882590..9b37626 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -40,18 +40,20 @@ ENTRY(__new_memcpy)
 #endif
 1:	lea	__memcpy_avx_unaligned(%rip), %RAX_LP
 	HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
+	jnz	3f
+	HAS_ARCH_FEATURE (Fast_Unaligned_Copy)
 	jnz	2f
 	lea	__memcpy_sse2_unaligned(%rip), %RAX_LP
 	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-	jnz	2f
-	lea	__memcpy_sse2(%rip), %RAX_LP
+	jnz	3f
+2:	lea	__memcpy_sse2(%rip), %RAX_LP
 	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
+	jz	3f
 	lea    __memcpy_ssse3_back(%rip), %RAX_LP
 	HAS_ARCH_FEATURE (Fast_Copy_Backward)
-	jnz	2f
+	jnz	3f
 	lea	__memcpy_ssse3(%rip), %RAX_LP
-2:	ret
+3:	ret
 END(__new_memcpy)
 
 # undef ENTRY
-- 
2.1.4


[-- Attachment #3: ChangeLog --]
[-- Type: application/octet-stream, Size: 489 bytes --]

2016-03-23  Amit Pawar  <Amit.Pawar@amd.com>

	[BZ #19583]
	* sysdeps/x86/cpu-features.h (bit_arch_Fast_Unaligned_Copy): 
	New.
	(index_arch_Fast_Unaligned_Copy) Likewise.
	* sysdeps/x86/cpu-features.c
	(init_cpu_features, Fast_Copy_Backward): Set it for Excavator core.
	(init_cpu_features, Fast_Unaligned_Copy): Set it for Excavator core.
	* sysdeps/x86_64/multiarch/memcpy.S 
	(__new_memcpy, Fast_Unaligned_Copy): Add check for
	Fast_Unaligned_Copy bit and select it on Excavator core.

  reply	other threads:[~2016-03-23 10:12 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-17 10:52 Pawar, Amit
2016-03-17 11:53 ` H.J. Lu
2016-03-17 14:16   ` Pawar, Amit
2016-03-17 14:46     ` H.J. Lu
2016-03-18 11:43       ` Pawar, Amit
2016-03-18 11:51         ` H.J. Lu
2016-03-18 12:25           ` Pawar, Amit
2016-03-18 12:34             ` H.J. Lu
2016-03-18 13:22               ` Pawar, Amit
2016-03-18 13:51                 ` H.J. Lu
2016-03-18 13:55                   ` Adhemerval Zanella
2016-03-18 14:43                     ` H.J. Lu
2016-03-18 14:45                   ` H.J. Lu
2016-03-18 15:19                     ` Pawar, Amit
2016-03-18 15:24                       ` H.J. Lu
2016-03-22 11:08                         ` Pawar, Amit
2016-03-22 14:50                           ` H.J. Lu
2016-03-22 14:57                             ` Pawar, Amit
2016-03-22 15:03                               ` H.J. Lu
2016-03-23 10:12                                 ` Pawar, Amit [this message]
2016-03-23 17:59                                   ` H.J. Lu
2016-03-28  7:43                                     ` Pawar, Amit
2016-03-28 12:12                                       ` H.J. Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=SN1PR12MB07339536A92E3475A46C113B97810@SN1PR12MB0733.namprd12.prod.outlook.com \
    --to=amit.pawar@amd.com \
    --cc=hjl.tools@gmail.com \
    --cc=libc-alpha@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).