public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v2] x86: Align entry for memrchr to 64-bytes.
@ 2022-06-24 16:42 Noah Goldstein
  2022-06-24 16:42 ` [PATCH v2] x86: Rename strstr_sse2 to strstr_generic as it uses string/strstr.c Noah Goldstein
                   ` (4 more replies)
  0 siblings, 5 replies; 12+ messages in thread
From: Noah Goldstein @ 2022-06-24 16:42 UTC (permalink / raw)
  To: libc-alpha

The function was tuned around 64-byte entry alignment and performs
better for all sizes with it.

As well different code boths where explicitly written to touch the
minimum number of cache line i.e sizes <= 32 touch only the entry
cache line.
---
 sysdeps/x86_64/multiarch/memrchr-avx2.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S
index 9c83c76d3c..f300d7daf4 100644
--- a/sysdeps/x86_64/multiarch/memrchr-avx2.S
+++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S
@@ -35,7 +35,7 @@
 # define VEC_SIZE			32
 # define PAGE_SIZE			4096
 	.section SECTION(.text), "ax", @progbits
-ENTRY(MEMRCHR)
+ENTRY_P2ALIGN(MEMRCHR, 6)
 # ifdef __ILP32__
 	/* Clear upper bits.  */
 	and	%RDX_LP, %RDX_LP
-- 
2.34.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2] x86: Rename strstr_sse2 to strstr_generic as it uses string/strstr.c
  2022-06-24 16:42 [PATCH v2] x86: Align entry for memrchr to 64-bytes Noah Goldstein
@ 2022-06-24 16:42 ` Noah Goldstein
  2022-06-24 17:06   ` H.J. Lu
  2022-06-24 16:42 ` [PATCH v2] x86: Remove unused file wmemcmp-sse4 Noah Goldstein
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 12+ messages in thread
From: Noah Goldstein @ 2022-06-24 16:42 UTC (permalink / raw)
  To: libc-alpha

This is in accordance with other files in the multiarch directory.
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c       | 2 +-
 sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S | 2 +-
 sysdeps/x86_64/multiarch/strstr.c                | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index bf52cf96d0..0d28319905 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -627,7 +627,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
                                && CPU_FEATURE_USABLE (BMI2)),
                               __strstr_avx512)
 	      IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2))
+	      IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic))
 
   /* Support sysdeps/x86_64/multiarch/wcschr.c.  */
   IFUNC_IMPL (i, name, wcschr,
diff --git a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
index 3d12ffdf1e..c6aa8f45a6 100644
--- a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
@@ -267,7 +267,7 @@ L(next_pair3):
 	.p2align 4
 L(switch_strstr):
 	movq	%rdi, %rdi
-	jmp	__strstr_sse2
+	jmp	__strstr_generic
 
 	.p2align 4
 L(cross_page):
diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
index 2fb8b169b6..2b83199245 100644
--- a/sysdeps/x86_64/multiarch/strstr.c
+++ b/sysdeps/x86_64/multiarch/strstr.c
@@ -24,17 +24,17 @@
 #include <string.h>
 #undef  strstr
 
-#define STRSTR __strstr_sse2
+#define STRSTR __strstr_generic
 #ifdef SHARED
 # undef libc_hidden_builtin_def
 # define libc_hidden_builtin_def(name) \
-  __hidden_ver1 (__strstr_sse2, __GI_strstr, __strstr_sse2);
+  __hidden_ver1 (__strstr_generic, __GI_strstr, __strstr_generic);
 #endif
 
 #include "string/strstr.c"
 
 extern __typeof (__redirect_strstr) __strstr_sse2_unaligned attribute_hidden;
-extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden;
+extern __typeof (__redirect_strstr) __strstr_generic attribute_hidden;
 extern __typeof (__redirect_strstr) __strstr_avx512 attribute_hidden;
 
 #include "init-arch.h"
@@ -58,7 +58,7 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
     return __strstr_sse2_unaligned;
 
-  return __strstr_sse2;
+  return __strstr_generic;
 }
 
 libc_ifunc_redirected (__redirect_strstr, __libc_strstr, IFUNC_SELECTOR ());
-- 
2.34.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2] x86: Remove unused file wmemcmp-sse4
  2022-06-24 16:42 [PATCH v2] x86: Align entry for memrchr to 64-bytes Noah Goldstein
  2022-06-24 16:42 ` [PATCH v2] x86: Rename strstr_sse2 to strstr_generic as it uses string/strstr.c Noah Goldstein
@ 2022-06-24 16:42 ` Noah Goldstein
  2022-06-24 17:03   ` H.J. Lu
  2022-06-24 16:42 ` [PATCH v2] x86: Put wcs{n}len-sse4.1 in the sse4.1 text section Noah Goldstein
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 12+ messages in thread
From: Noah Goldstein @ 2022-06-24 16:42 UTC (permalink / raw)
  To: libc-alpha

The memcmp-sse4 was removed in:

commit 7cbc03d03091d5664060924789afe46d30a5477e
Author: Noah Goldstein <goldstein.w.n@gmail.com>
Date:   Fri Apr 15 12:28:00 2022 -0500

    x86: Remove memcmp-sse4.S

so this file does nothing.
---
 sysdeps/x86_64/multiarch/wmemcmp-sse4.S | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 sysdeps/x86_64/multiarch/wmemcmp-sse4.S

diff --git a/sysdeps/x86_64/multiarch/wmemcmp-sse4.S b/sysdeps/x86_64/multiarch/wmemcmp-sse4.S
deleted file mode 100644
index b07973a4f6..0000000000
--- a/sysdeps/x86_64/multiarch/wmemcmp-sse4.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_WMEMCMP 1
-#define MEMCMP __wmemcmp_sse4_1
-
-#include "memcmp-sse4.S"
-- 
2.34.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2] x86: Put wcs{n}len-sse4.1 in the sse4.1 text section
  2022-06-24 16:42 [PATCH v2] x86: Align entry for memrchr to 64-bytes Noah Goldstein
  2022-06-24 16:42 ` [PATCH v2] x86: Rename strstr_sse2 to strstr_generic as it uses string/strstr.c Noah Goldstein
  2022-06-24 16:42 ` [PATCH v2] x86: Remove unused file wmemcmp-sse4 Noah Goldstein
@ 2022-06-24 16:42 ` Noah Goldstein
  2022-06-24 17:05   ` H.J. Lu
  2022-06-24 16:42 ` [PATCH v2] x86: Add comment with ISA level for all targets support by GCC12.1 Noah Goldstein
  2022-06-24 17:15 ` [PATCH v2] x86: Align entry for memrchr to 64-bytes H.J. Lu
  4 siblings, 1 reply; 12+ messages in thread
From: Noah Goldstein @ 2022-06-24 16:42 UTC (permalink / raw)
  To: libc-alpha

Previously was missing but the two implementations shouldn't get in
the sse2 (generic) text section.
---
 sysdeps/x86_64/multiarch/strlen-vec.S     | 6 +++++-
 sysdeps/x86_64/multiarch/wcslen-sse4_1.S  | 1 +
 sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S | 1 +
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/sysdeps/x86_64/multiarch/strlen-vec.S b/sysdeps/x86_64/multiarch/strlen-vec.S
index 42b6124dfd..874123d604 100644
--- a/sysdeps/x86_64/multiarch/strlen-vec.S
+++ b/sysdeps/x86_64/multiarch/strlen-vec.S
@@ -28,6 +28,10 @@
 # define SHIFT_RETURN
 #endif
 
+#ifndef SECTION
+# define SECTION(p)	p
+#endif
+
 /* Long lived register in strlen(s), strnlen(s, n) are:
 
 	%xmm3 - zero
@@ -37,7 +41,7 @@
 */
 
 
-.text
+	.section SECTION(.text),"ax",@progbits
 ENTRY(strlen)
 
 /* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx.  */
diff --git a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
index 7e62621afc..e306a77f51 100644
--- a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
+++ b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
@@ -1,4 +1,5 @@
 #define AS_WCSLEN
 #define strlen	__wcslen_sse4_1
+#define SECTION(p)	p##.sse4.1
 
 #include "strlen-vec.S"
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
index 5fa51fe07c..d2f7dd6e22 100644
--- a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
+++ b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
@@ -1,5 +1,6 @@
 #define AS_WCSLEN
 #define AS_STRNLEN
 #define strlen	__wcsnlen_sse4_1
+#define SECTION(p)	p##.sse4.1
 
 #include "strlen-vec.S"
-- 
2.34.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2] x86: Add comment with ISA level for all targets support by GCC12.1
  2022-06-24 16:42 [PATCH v2] x86: Align entry for memrchr to 64-bytes Noah Goldstein
                   ` (2 preceding siblings ...)
  2022-06-24 16:42 ` [PATCH v2] x86: Put wcs{n}len-sse4.1 in the sse4.1 text section Noah Goldstein
@ 2022-06-24 16:42 ` Noah Goldstein
  2022-06-24 17:02   ` H.J. Lu
  2022-06-24 17:15 ` [PATCH v2] x86: Align entry for memrchr to 64-bytes H.J. Lu
  4 siblings, 1 reply; 12+ messages in thread
From: Noah Goldstein @ 2022-06-24 16:42 UTC (permalink / raw)
  To: libc-alpha

This is just a quality of life change to make it easier to see how the
ISA level will effect a given build.
---
 sysdeps/x86/isa-level.h | 67 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 65 insertions(+), 2 deletions(-)

diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index e1a30ed83e..f14ae5cc00 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -64,8 +64,71 @@
 #define MINIMUM_X86_ISA_LEVEL                                                 \
   (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
 
-
-/*
+/* ISA levels for known GCC targets as of GCC12.1:
+ *
+ * amdfam10       -> 1
+ * athlon-fx      -> 1
+ * athlon64       -> 1
+ * athlon64-sse3  -> 1
+ * atom           -> 1
+ * barcelona      -> 1
+ * bonnell        -> 1
+ * btver1         -> 1
+ * core2          -> 1
+ * eden-x2        -> 1
+ * eden-x4        -> 1
+ * k8             -> 1
+ * k8-sse3        -> 1
+ * nano           -> 1
+ * nano-1000      -> 1
+ * nano-2000      -> 1
+ * nano-3000      -> 1
+ * nano-x2        -> 1
+ * nano-x4        -> 1
+ * nocona         -> 1
+ * opteron        -> 1
+ * opteron-sse3   -> 1
+ * x86-64         -> 1
+ * bdver1         -> 2
+ * bdver2         -> 2
+ * bdver3         -> 2
+ * btver2         -> 2
+ * core-avx-i     -> 2
+ * corei7         -> 2
+ * corei7-avx     -> 2
+ * goldmont       -> 2
+ * goldmont-plus  -> 2
+ * ivybridge      -> 2
+ * nehalem        -> 2
+ * sandybridge    -> 2
+ * silvermont     -> 2
+ * slm            -> 2
+ * tremont        -> 2
+ * westmere       -> 2
+ * x86-64-v2      -> 2
+ * alderlake      -> 3
+ * bdver4         -> 3
+ * broadwell      -> 3
+ * core-avx2      -> 3
+ * haswell        -> 3
+ * knl            -> 3
+ * knm            -> 3
+ * skylake        -> 3
+ * x86-64-v3      -> 3
+ * znver1         -> 3
+ * znver2         -> 3
+ * znver3         -> 3
+ * cannonlake     -> 4
+ * cascadelake    -> 4
+ * cooperlake     -> 4
+ * icelake-client -> 4
+ * icelake-server -> 4
+ * rocketlake     -> 4
+ * sapphirerapids -> 4
+ * skylake-avx512 -> 4
+ * tigerlake      -> 4
+ * x86-64-v4      -> 4
+ *
  * CPU Features that are hard coded as enabled/disabled depending on
  * ISA build level.
  *    - Values > 0 features are always ENABLED if:
-- 
2.34.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] x86: Add comment with ISA level for all targets support by GCC12.1
  2022-06-24 16:42 ` [PATCH v2] x86: Add comment with ISA level for all targets support by GCC12.1 Noah Goldstein
@ 2022-06-24 17:02   ` H.J. Lu
  0 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2022-06-24 17:02 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell

On Fri, Jun 24, 2022 at 9:42 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> This is just a quality of life change to make it easier to see how the
> ISA level will effect a given build.
> ---
>  sysdeps/x86/isa-level.h | 67 +++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 65 insertions(+), 2 deletions(-)
>
> diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> index e1a30ed83e..f14ae5cc00 100644
> --- a/sysdeps/x86/isa-level.h
> +++ b/sysdeps/x86/isa-level.h
> @@ -64,8 +64,71 @@
>  #define MINIMUM_X86_ISA_LEVEL                                                 \
>    (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
>
> -
> -/*
> +/* ISA levels for known GCC targets as of GCC12.1:
> + *
> + * amdfam10       -> 1
> + * athlon-fx      -> 1
> + * athlon64       -> 1
> + * athlon64-sse3  -> 1
> + * atom           -> 1
> + * barcelona      -> 1
> + * bonnell        -> 1
> + * btver1         -> 1
> + * core2          -> 1
> + * eden-x2        -> 1
> + * eden-x4        -> 1
> + * k8             -> 1
> + * k8-sse3        -> 1
> + * nano           -> 1
> + * nano-1000      -> 1
> + * nano-2000      -> 1
> + * nano-3000      -> 1
> + * nano-x2        -> 1
> + * nano-x4        -> 1
> + * nocona         -> 1
> + * opteron        -> 1
> + * opteron-sse3   -> 1
> + * x86-64         -> 1
> + * bdver1         -> 2
> + * bdver2         -> 2
> + * bdver3         -> 2
> + * btver2         -> 2
> + * core-avx-i     -> 2
> + * corei7         -> 2
> + * corei7-avx     -> 2
> + * goldmont       -> 2
> + * goldmont-plus  -> 2
> + * ivybridge      -> 2
> + * nehalem        -> 2
> + * sandybridge    -> 2
> + * silvermont     -> 2
> + * slm            -> 2
> + * tremont        -> 2
> + * westmere       -> 2
> + * x86-64-v2      -> 2
> + * alderlake      -> 3
> + * bdver4         -> 3
> + * broadwell      -> 3
> + * core-avx2      -> 3
> + * haswell        -> 3
> + * knl            -> 3
> + * knm            -> 3
> + * skylake        -> 3
> + * x86-64-v3      -> 3
> + * znver1         -> 3
> + * znver2         -> 3
> + * znver3         -> 3
> + * cannonlake     -> 4
> + * cascadelake    -> 4
> + * cooperlake     -> 4
> + * icelake-client -> 4
> + * icelake-server -> 4
> + * rocketlake     -> 4
> + * sapphirerapids -> 4
> + * skylake-avx512 -> 4
> + * tigerlake      -> 4
> + * x86-64-v4      -> 4
> + *
>   * CPU Features that are hard coded as enabled/disabled depending on
>   * ISA build level.
>   *    - Values > 0 features are always ENABLED if:
> --
> 2.34.1
>

I don't think it is needed.

-- 
H.J.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] x86: Remove unused file wmemcmp-sse4
  2022-06-24 16:42 ` [PATCH v2] x86: Remove unused file wmemcmp-sse4 Noah Goldstein
@ 2022-06-24 17:03   ` H.J. Lu
  0 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2022-06-24 17:03 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell

On Fri, Jun 24, 2022 at 9:42 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> The memcmp-sse4 was removed in:
>
> commit 7cbc03d03091d5664060924789afe46d30a5477e
> Author: Noah Goldstein <goldstein.w.n@gmail.com>
> Date:   Fri Apr 15 12:28:00 2022 -0500
>
>     x86: Remove memcmp-sse4.S
>
> so this file does nothing.
> ---
>  sysdeps/x86_64/multiarch/wmemcmp-sse4.S | 4 ----
>  1 file changed, 4 deletions(-)
>  delete mode 100644 sysdeps/x86_64/multiarch/wmemcmp-sse4.S
>
> diff --git a/sysdeps/x86_64/multiarch/wmemcmp-sse4.S b/sysdeps/x86_64/multiarch/wmemcmp-sse4.S
> deleted file mode 100644
> index b07973a4f6..0000000000
> --- a/sysdeps/x86_64/multiarch/wmemcmp-sse4.S
> +++ /dev/null
> @@ -1,4 +0,0 @@
> -#define USE_AS_WMEMCMP 1
> -#define MEMCMP __wmemcmp_sse4_1
> -
> -#include "memcmp-sse4.S"
> --
> 2.34.1
>

LGTM.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] x86: Put wcs{n}len-sse4.1 in the sse4.1 text section
  2022-06-24 16:42 ` [PATCH v2] x86: Put wcs{n}len-sse4.1 in the sse4.1 text section Noah Goldstein
@ 2022-06-24 17:05   ` H.J. Lu
  2022-07-14  3:02     ` Sunil Pandey
  0 siblings, 1 reply; 12+ messages in thread
From: H.J. Lu @ 2022-06-24 17:05 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell

On Fri, Jun 24, 2022 at 9:42 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> Previously was missing but the two implementations shouldn't get in
> the sse2 (generic) text section.
> ---
>  sysdeps/x86_64/multiarch/strlen-vec.S     | 6 +++++-
>  sysdeps/x86_64/multiarch/wcslen-sse4_1.S  | 1 +
>  sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S | 1 +
>  3 files changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/sysdeps/x86_64/multiarch/strlen-vec.S b/sysdeps/x86_64/multiarch/strlen-vec.S
> index 42b6124dfd..874123d604 100644
> --- a/sysdeps/x86_64/multiarch/strlen-vec.S
> +++ b/sysdeps/x86_64/multiarch/strlen-vec.S
> @@ -28,6 +28,10 @@
>  # define SHIFT_RETURN
>  #endif
>
> +#ifndef SECTION
> +# define SECTION(p)    p
> +#endif
> +
>  /* Long lived register in strlen(s), strnlen(s, n) are:
>
>         %xmm3 - zero
> @@ -37,7 +41,7 @@
>  */
>
>
> -.text
> +       .section SECTION(.text),"ax",@progbits
>  ENTRY(strlen)
>
>  /* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx.  */
> diff --git a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
> index 7e62621afc..e306a77f51 100644
> --- a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
> +++ b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
> @@ -1,4 +1,5 @@
>  #define AS_WCSLEN
>  #define strlen __wcslen_sse4_1
> +#define SECTION(p)     p##.sse4.1
>
>  #include "strlen-vec.S"
> diff --git a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
> index 5fa51fe07c..d2f7dd6e22 100644
> --- a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
> +++ b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
> @@ -1,5 +1,6 @@
>  #define AS_WCSLEN
>  #define AS_STRNLEN
>  #define strlen __wcsnlen_sse4_1
> +#define SECTION(p)     p##.sse4.1
>
>  #include "strlen-vec.S"
> --
> 2.34.1
>

LGTM.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] x86: Rename strstr_sse2 to strstr_generic as it uses string/strstr.c
  2022-06-24 16:42 ` [PATCH v2] x86: Rename strstr_sse2 to strstr_generic as it uses string/strstr.c Noah Goldstein
@ 2022-06-24 17:06   ` H.J. Lu
  0 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2022-06-24 17:06 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell

On Fri, Jun 24, 2022 at 9:42 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> This is in accordance with other files in the multiarch directory.
> ---
>  sysdeps/x86_64/multiarch/ifunc-impl-list.c       | 2 +-
>  sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S | 2 +-
>  sysdeps/x86_64/multiarch/strstr.c                | 8 ++++----
>  3 files changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> index bf52cf96d0..0d28319905 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> @@ -627,7 +627,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>                                 && CPU_FEATURE_USABLE (BMI2)),
>                                __strstr_avx512)
>               IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
> -             IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2))
> +             IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic))
>
>    /* Support sysdeps/x86_64/multiarch/wcschr.c.  */
>    IFUNC_IMPL (i, name, wcschr,
> diff --git a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
> index 3d12ffdf1e..c6aa8f45a6 100644
> --- a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
> +++ b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
> @@ -267,7 +267,7 @@ L(next_pair3):
>         .p2align 4
>  L(switch_strstr):
>         movq    %rdi, %rdi
> -       jmp     __strstr_sse2
> +       jmp     __strstr_generic
>
>         .p2align 4
>  L(cross_page):
> diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
> index 2fb8b169b6..2b83199245 100644
> --- a/sysdeps/x86_64/multiarch/strstr.c
> +++ b/sysdeps/x86_64/multiarch/strstr.c
> @@ -24,17 +24,17 @@
>  #include <string.h>
>  #undef  strstr
>
> -#define STRSTR __strstr_sse2
> +#define STRSTR __strstr_generic
>  #ifdef SHARED
>  # undef libc_hidden_builtin_def
>  # define libc_hidden_builtin_def(name) \
> -  __hidden_ver1 (__strstr_sse2, __GI_strstr, __strstr_sse2);
> +  __hidden_ver1 (__strstr_generic, __GI_strstr, __strstr_generic);
>  #endif
>
>  #include "string/strstr.c"
>
>  extern __typeof (__redirect_strstr) __strstr_sse2_unaligned attribute_hidden;
> -extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden;
> +extern __typeof (__redirect_strstr) __strstr_generic attribute_hidden;
>  extern __typeof (__redirect_strstr) __strstr_avx512 attribute_hidden;
>
>  #include "init-arch.h"
> @@ -58,7 +58,7 @@ IFUNC_SELECTOR (void)
>    if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
>      return __strstr_sse2_unaligned;
>
> -  return __strstr_sse2;
> +  return __strstr_generic;
>  }
>
>  libc_ifunc_redirected (__redirect_strstr, __libc_strstr, IFUNC_SELECTOR ());
> --
> 2.34.1
>

LGTM.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] x86: Align entry for memrchr to 64-bytes.
  2022-06-24 16:42 [PATCH v2] x86: Align entry for memrchr to 64-bytes Noah Goldstein
                   ` (3 preceding siblings ...)
  2022-06-24 16:42 ` [PATCH v2] x86: Add comment with ISA level for all targets support by GCC12.1 Noah Goldstein
@ 2022-06-24 17:15 ` H.J. Lu
  2022-07-14  2:59   ` Sunil Pandey
  4 siblings, 1 reply; 12+ messages in thread
From: H.J. Lu @ 2022-06-24 17:15 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell

On Fri, Jun 24, 2022 at 9:42 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> The function was tuned around 64-byte entry alignment and performs
> better for all sizes with it.
>
> As well different code boths where explicitly written to touch the
> minimum number of cache line i.e sizes <= 32 touch only the entry
> cache line.
> ---
>  sysdeps/x86_64/multiarch/memrchr-avx2.S | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S
> index 9c83c76d3c..f300d7daf4 100644
> --- a/sysdeps/x86_64/multiarch/memrchr-avx2.S
> +++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S
> @@ -35,7 +35,7 @@
>  # define VEC_SIZE                      32
>  # define PAGE_SIZE                     4096
>         .section SECTION(.text), "ax", @progbits
> -ENTRY(MEMRCHR)
> +ENTRY_P2ALIGN(MEMRCHR, 6)
>  # ifdef __ILP32__
>         /* Clear upper bits.  */
>         and     %RDX_LP, %RDX_LP
> --
> 2.34.1
>

LGTM.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] x86: Align entry for memrchr to 64-bytes.
  2022-06-24 17:15 ` [PATCH v2] x86: Align entry for memrchr to 64-bytes H.J. Lu
@ 2022-07-14  2:59   ` Sunil Pandey
  0 siblings, 0 replies; 12+ messages in thread
From: Sunil Pandey @ 2022-07-14  2:59 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Noah Goldstein, GNU C Library

On Fri, Jun 24, 2022 at 10:16 AM H.J. Lu via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> On Fri, Jun 24, 2022 at 9:42 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > The function was tuned around 64-byte entry alignment and performs
> > better for all sizes with it.
> >
> > As well different code boths where explicitly written to touch the
> > minimum number of cache line i.e sizes <= 32 touch only the entry
> > cache line.
> > ---
> >  sysdeps/x86_64/multiarch/memrchr-avx2.S | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S
> > index 9c83c76d3c..f300d7daf4 100644
> > --- a/sysdeps/x86_64/multiarch/memrchr-avx2.S
> > +++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S
> > @@ -35,7 +35,7 @@
> >  # define VEC_SIZE                      32
> >  # define PAGE_SIZE                     4096
> >         .section SECTION(.text), "ax", @progbits
> > -ENTRY(MEMRCHR)
> > +ENTRY_P2ALIGN(MEMRCHR, 6)
> >  # ifdef __ILP32__
> >         /* Clear upper bits.  */
> >         and     %RDX_LP, %RDX_LP
> > --
> > 2.34.1
> >
>
> LGTM.
>
> Thanks.
>
> --
> H.J.

I would like to backport this patch to release branches.
Any comments or objections?

--Sunil

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] x86: Put wcs{n}len-sse4.1 in the sse4.1 text section
  2022-06-24 17:05   ` H.J. Lu
@ 2022-07-14  3:02     ` Sunil Pandey
  0 siblings, 0 replies; 12+ messages in thread
From: Sunil Pandey @ 2022-07-14  3:02 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Noah Goldstein, GNU C Library

On Fri, Jun 24, 2022 at 10:08 AM H.J. Lu via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> On Fri, Jun 24, 2022 at 9:42 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > Previously was missing but the two implementations shouldn't get in
> > the sse2 (generic) text section.
> > ---
> >  sysdeps/x86_64/multiarch/strlen-vec.S     | 6 +++++-
> >  sysdeps/x86_64/multiarch/wcslen-sse4_1.S  | 1 +
> >  sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S | 1 +
> >  3 files changed, 7 insertions(+), 1 deletion(-)
> >
> > diff --git a/sysdeps/x86_64/multiarch/strlen-vec.S b/sysdeps/x86_64/multiarch/strlen-vec.S
> > index 42b6124dfd..874123d604 100644
> > --- a/sysdeps/x86_64/multiarch/strlen-vec.S
> > +++ b/sysdeps/x86_64/multiarch/strlen-vec.S
> > @@ -28,6 +28,10 @@
> >  # define SHIFT_RETURN
> >  #endif
> >
> > +#ifndef SECTION
> > +# define SECTION(p)    p
> > +#endif
> > +
> >  /* Long lived register in strlen(s), strnlen(s, n) are:
> >
> >         %xmm3 - zero
> > @@ -37,7 +41,7 @@
> >  */
> >
> >
> > -.text
> > +       .section SECTION(.text),"ax",@progbits
> >  ENTRY(strlen)
> >
> >  /* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx.  */
> > diff --git a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
> > index 7e62621afc..e306a77f51 100644
> > --- a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
> > +++ b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
> > @@ -1,4 +1,5 @@
> >  #define AS_WCSLEN
> >  #define strlen __wcslen_sse4_1
> > +#define SECTION(p)     p##.sse4.1
> >
> >  #include "strlen-vec.S"
> > diff --git a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
> > index 5fa51fe07c..d2f7dd6e22 100644
> > --- a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
> > +++ b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
> > @@ -1,5 +1,6 @@
> >  #define AS_WCSLEN
> >  #define AS_STRNLEN
> >  #define strlen __wcsnlen_sse4_1
> > +#define SECTION(p)     p##.sse4.1
> >
> >  #include "strlen-vec.S"
> > --
> > 2.34.1
> >
>
> LGTM.
>
> Thanks.
>
> --
> H.J.

I would like to backport this patch to release branches.
Any comments or objections?

--Sunil

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2022-07-14  3:02 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-24 16:42 [PATCH v2] x86: Align entry for memrchr to 64-bytes Noah Goldstein
2022-06-24 16:42 ` [PATCH v2] x86: Rename strstr_sse2 to strstr_generic as it uses string/strstr.c Noah Goldstein
2022-06-24 17:06   ` H.J. Lu
2022-06-24 16:42 ` [PATCH v2] x86: Remove unused file wmemcmp-sse4 Noah Goldstein
2022-06-24 17:03   ` H.J. Lu
2022-06-24 16:42 ` [PATCH v2] x86: Put wcs{n}len-sse4.1 in the sse4.1 text section Noah Goldstein
2022-06-24 17:05   ` H.J. Lu
2022-07-14  3:02     ` Sunil Pandey
2022-06-24 16:42 ` [PATCH v2] x86: Add comment with ISA level for all targets support by GCC12.1 Noah Goldstein
2022-06-24 17:02   ` H.J. Lu
2022-06-24 17:15 ` [PATCH v2] x86: Align entry for memrchr to 64-bytes H.J. Lu
2022-07-14  2:59   ` Sunil Pandey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).