* [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8
@ 2016-04-29 8:51 Rajalakshmi Srinivasaraghavan
2016-06-13 19:14 ` Tulio Magno Quites Machado Filho
0 siblings, 1 reply; 11+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-04-29 8:51 UTC (permalink / raw)
To: libc-alpha; +Cc: Rajalakshmi Srinivasaraghavan
This implementation utilizes vectors to improve performance
compared to current byte by byte implementation for POWER7.
The performance improvement is upto 4x. This patch is tested
on powerpc64 and powerpc64le.
2016-04-29 Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
* sysdeps/powerpc/powerpc64/multiarch/Makefile:
(sysdep_routines): Add P8 and PPC64 strcasecmp/strncasecmp targets.
* sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c:
(__libc_ifunc_impl_list): Add entries for P8 and PPC64
ifunc'ed strcasecmp/strncasecmp.
* sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S:
[EALIGN]: Removed.
[END]: Likewise.
[__strcasecmp]: Define instead of the above to control symbol name.
* sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c: Add IFUNC selector
for __strcasecmp_power8.
* sysdeps/powerpc/powerpc64/multiarch/strncase.c: Add IFUNC selector
for __strncasecmp_power8.
* sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S: New File.
* sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c: Likewise.
* sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S: Likewise.
* sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c: Likewise.
* sysdeps/powerpc/powerpc64/power8/strcasecmp.S: Likewise.
* sysdeps/powerpc/powerpc64/power8/strncase.S: Likewise.
---
sysdeps/powerpc/powerpc64/multiarch/Makefile | 4 +-
.../powerpc/powerpc64/multiarch/ifunc-impl-list.c | 6 +
.../powerpc64/multiarch/strcasecmp-power7.S | 18 +-
.../powerpc64/multiarch/strcasecmp-power8.S | 28 ++
.../powerpc/powerpc64/multiarch/strcasecmp-ppc64.c | 25 ++
sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c | 29 +-
.../powerpc/powerpc64/multiarch/strncase-power8.S | 28 ++
.../powerpc/powerpc64/multiarch/strncase-ppc64.c | 25 ++
sysdeps/powerpc/powerpc64/multiarch/strncase.c | 24 +-
sysdeps/powerpc/powerpc64/power8/strcasecmp.S | 446 +++++++++++++++++++++
sysdeps/powerpc/powerpc64/power8/strncase.S | 20 +
11 files changed, 606 insertions(+), 47 deletions(-)
create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S
create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c
create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S
create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c
create mode 100644 sysdeps/powerpc/powerpc64/power8/strcasecmp.S
create mode 100644 sysdeps/powerpc/powerpc64/power8/strncase.S
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index 9ee9bc2..e3ac285 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -21,7 +21,9 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \
memmove-power7 memmove-ppc64 wordcopy-ppc64 bcopy-ppc64 \
strncpy-power8 strstr-power7 strstr-ppc64 \
strspn-power8 strspn-ppc64 strcspn-power8 strcspn-ppc64 \
- strlen-power8 strcasestr-power8 strcasestr-ppc64
+ strlen-power8 strcasestr-power8 strcasestr-ppc64 \
+ strcasecmp-ppc64 strcasecmp-power8 strncase-ppc64 \
+ strncase-power8
CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops
CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index a0dc8ad..9f6bd7c 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -204,6 +204,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c. */
IFUNC_IMPL (i, name, strcasecmp,
IFUNC_IMPL_ADD (array, i, strcasecmp,
+ hwcap2 & PPC_FEATURE2_ARCH_2_07,
+ __strcasecmp_power8)
+ IFUNC_IMPL_ADD (array, i, strcasecmp,
hwcap & PPC_FEATURE_HAS_VSX,
__strcasecmp_power7)
IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ppc))
@@ -219,6 +222,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/powerpc/powerpc64/multiarch/strncase.c. */
IFUNC_IMPL (i, name, strncasecmp,
IFUNC_IMPL_ADD (array, i, strncasecmp,
+ hwcap2 & PPC_FEATURE2_ARCH_2_07,
+ __strncasecmp_power8)
+ IFUNC_IMPL_ADD (array, i, strncasecmp,
hwcap & PPC_FEATURE_HAS_VSX,
__strncasecmp_power7)
IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_ppc))
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S
index 013dc62..99cd7bd 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S
@@ -1,4 +1,4 @@
-/* Optimized strcasecmp implementation foOWER7.
+/* Optimized strcasecmp implementation for POWER7.
Copyright (C) 2013-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -18,21 +18,7 @@
#include <sysdep.h>
-#undef ENTRY
-#define ENTRY(name) \
- .section ".text"; \
- ENTRY_2(__strcasecmp_power7) \
- .align ALIGNARG(2); \
- BODY_LABEL(__strcasecmp_power7): \
- cfi_startproc; \
- LOCALENTRY(__strcasecmp_power7)
-
-#undef END
-#define END(name) \
- cfi_endproc; \
- TRACEBACK(__strcasecmp_power7) \
- END_2(__strcasecmp_power7)
-
+#define __strcasecmp __strcasecmp_power7
#undef weak_alias
#define weak_alias(name, alias)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S
new file mode 100644
index 0000000..492047a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S
@@ -0,0 +1,28 @@
+/* Optimized strcasecmp implementation for POWER8.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#define __strcasecmp __strcasecmp_power8
+#undef weak_alias
+#define weak_alias(name, alias)
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c
new file mode 100644
index 0000000..90f75ce
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c
@@ -0,0 +1,25 @@
+/* Multiarch strcasecmp for PPC64.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+
+extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden;
+
+#define strcasecmp __strcasecmp_ppc
+
+#include <string/strcasecmp.c>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
index 1f22336..2b554dc 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
@@ -1,4 +1,4 @@
-/* Multiple versions of strcasecmp.
+/* Multiple versions of strcasecmp
Copyright (C) 2013-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -16,25 +16,22 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
-# include <string.h>
-# define strcasecmp __strcasecmp_ppc
-extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden;
-extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden;
-#endif
+#include <string.h>
+#include <shlib-compat.h>
+#include "init-arch.h"
-#include <string/strcasecmp.c>
#undef strcasecmp
+extern __typeof (__strcasecmp) __libc_strcasecmp;
-#if IS_IN (libc)
-# include <shlib-compat.h>
-# include "init-arch.h"
+extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden;
+extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden;
+extern __typeof (__strcasecmp) __strcasecmp_power8 attribute_hidden;
-extern __typeof (__strcasecmp) __libc_strcasecmp;
libc_ifunc (__libc_strcasecmp,
- (hwcap & PPC_FEATURE_HAS_VSX)
- ? __strcasecmp_power7
- : __strcasecmp_ppc);
+ (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+ ? __strcasecmp_power8:
+ (hwcap & PPC_FEATURE_HAS_VSX)
+ ? __strcasecmp_power7
+ : __strcasecmp_ppc);
weak_alias (__libc_strcasecmp, strcasecmp)
-#endif
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S
new file mode 100644
index 0000000..01a63b5
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S
@@ -0,0 +1,28 @@
+/* Optimized strncasecmp implementation for POWER8.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#define __strncasecmp __strncasecmp_power8
+#undef weak_alias
+#define weak_alias(name, alias)
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/powerpc/powerpc64/power8/strncase.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c
new file mode 100644
index 0000000..3123965
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c
@@ -0,0 +1,25 @@
+/* Multiarch strncasecmp for PPC64.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+
+extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;
+
+#define strncasecmp __strncasecmp_ppc
+
+#include <string/strncase.c>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase.c b/sysdeps/powerpc/powerpc64/multiarch/strncase.c
index 2729fce..7b8e7d3 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncase.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncase.c
@@ -16,26 +16,22 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
-# include <string.h>
-# define strncasecmp __strncasecmp_ppc
-extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;
-extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden;
-#endif
+#include <string.h>
+#include <shlib-compat.h>
+#include "init-arch.h"
-#include <string/strncase.c>
#undef strncasecmp
+extern __typeof (__strncasecmp) __libc_strncasecmp;
-#if IS_IN (libc)
-# include <shlib-compat.h>
-# include "init-arch.h"
+extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;
+extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden;
+extern __typeof (__strncasecmp) __strncasecmp_power8 attribute_hidden;
-/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
- ifunc symbol properly. */
-extern __typeof (__strncasecmp) __libc_strncasecmp;
libc_ifunc (__libc_strncasecmp,
+ (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+ ? __strncasecmp_power8:
(hwcap & PPC_FEATURE_HAS_VSX)
? __strncasecmp_power7
: __strncasecmp_ppc);
+
weak_alias (__libc_strncasecmp, strncasecmp)
-#endif
diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
new file mode 100644
index 0000000..63f6217
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
@@ -0,0 +1,446 @@
+/* Optimized strcasecmp implementation for PowerPC64.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <locale-defines.h>
+
+/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) */
+
+#ifndef USE_AS_STRNCASECMP
+# define __STRCASECMP __strcasecmp
+# define STRCASECMP strcasecmp
+#else
+# define __STRCASECMP __strncasecmp
+# define STRCASECMP strncasecmp
+#endif
+/* Convert 16 bytes to lowercase and compare */
+#define TOLOWER() \
+ vaddubm v8, v4, v1; \
+ vaddubm v7, v4, v3; \
+ vcmpgtub v8, v8, v2; \
+ vsel v4, v7, v4, v8; \
+ vaddubm v8, v5, v1; \
+ vaddubm v7, v5, v3; \
+ vcmpgtub v8, v8, v2; \
+ vsel v5, v7, v5, v8; \
+ vcmpequb. v7, v5, v4;
+
+/* Get 16 bytes for unaligned case. */
+#ifdef __LITTLE_ENDIAN__
+#define GET16BYTES(reg1, reg2, reg3) \
+ lvx reg1, 0, reg2; \
+ vcmpequb. v8, v0, reg1; \
+ beq cr6, 1f; \
+ vspltisb v9, 0; \
+ b 2f; \
+ .align 4; \
+1: \
+ addi r6, reg2, 16; \
+ lvx v9, 0, r6; \
+2: \
+ vperm reg1, v9, reg1, reg3;
+#else
+#define GET16BYTES(reg1, reg2, reg3) \
+ lvx reg1, 0, reg2; \
+ vcmpequb. v8, v0, reg1; \
+ beq cr6, 1f; \
+ vspltisb v9, 0; \
+ b 2f; \
+ .align 4; \
+1: \
+ addi r6, reg2, 16; \
+ lvx v9, 0, r6; \
+2: \
+ vperm reg1, reg1, v9, reg3;
+#endif
+
+/* Check null in v4, v5 and convert to lower. */
+#define CHECKNULLANDCONVERT() \
+ vcmpequb. v7, v0, v5; \
+ beq cr6, 3f; \
+ vcmpequb. v7, v0, v4; \
+ beq cr6, 3f; \
+ b L(null_found); \
+ .align 4; \
+3: \
+ TOLOWER()
+
+#ifdef _ARCH_PWR8
+# define VCLZD_V8_v7 vclzd v8, v7;
+# define MFVRD_R3_V1 mfvrd r3, v1;
+# define VSUBUDM_V9_V8 vsubudm v9, v9, v8;
+# define VPOPCNTD_V8_V8 vpopcntd v8, v8;
+# define VADDUQM_V7_V8 vadduqm v9, v7, v8;
+#else
+# define VCLZD_V8_v7 .long 0x11003fc2
+# define MFVRD_R3_V1 .long 0x7c230067
+# define VSUBUDM_V9_V8 .long 0x112944c0
+# define VPOPCNTD_V8_V8 .long 0x110047c3
+# define VADDUQM_V7_V8 .long 0x11274100
+#endif
+
+ .machine power7
+
+ENTRY (__STRCASECMP)
+#ifdef USE_AS_STRNCASECMP
+ CALL_MCOUNT 3
+#else
+ CALL_MCOUNT 2
+#endif
+#define rRTN r3 /* Return value */
+#define rSTR1 r10 /* 1st string */
+#define rSTR2 r4 /* 2nd string */
+#define rCHAR1 r6 /* Byte read from 1st string */
+#define rCHAR2 r7 /* Byte read from 2nd string */
+#define rADDR1 r8 /* Address of tolower(rCHAR1) */
+#define rADDR2 r12 /* Address of tolower(rCHAR2) */
+#define rLWR1 r8 /* Word tolower(rCHAR1) */
+#define rLWR2 r12 /* Word tolower(rCHAR2) */
+#define rTMP r9
+#define rLOC r11 /* Default locale address */
+
+ cmpd cr7, rRTN, rSTR2
+
+ /* Get locale address. */
+ ld rTMP, __libc_tsd_LOCALE@got@tprel(r2)
+ add rLOC, rTMP, __libc_tsd_LOCALE@tls
+ ld rLOC, 0(rLOC)
+
+ mr rSTR1, rRTN
+ li rRTN, 0
+ beqlr cr7
+#ifdef USE_AS_STRNCASECMP
+ cmpdi cr7, r5, 0
+ beq cr7, L(retnull)
+ cmpdi cr7, r5, 16
+ blt cr7, L(bytebybyte)
+#endif
+ vspltisb v0, 0
+ vspltisb v8, -1
+ /* Check for null in initial characters.
+ Check max of 16 char depending on the alignment.
+ If null is present, proceed byte by byte. */
+ lvx v4, 0, rSTR1
+#ifdef __LITTLE_ENDIAN__
+ lvsr v10, 0, rSTR1 /* Compute mask. */
+ vperm v9, v8, v4, v10 /* Mask bits that are not part of string. */
+#else
+ lvsl v10, 0, rSTR1
+ vperm v9, v4, v8, v10
+#endif
+ vcmpequb. v9, v0, v9 /* Check for null bytes. */
+ bne cr6, L(bytebybyte)
+ lvx v5, 0, rSTR2
+ /* Calculate alignment. */
+#ifdef __LITTLE_ENDIAN__
+ lvsr v6, 0, rSTR2
+ vperm v9, v8, v5, v6 /* Mask bits that are not part of string. */
+#else
+ lvsl v6, 0, rSTR2
+ vperm v9, v5, v8, v6
+#endif
+ vcmpequb. v9, v0, v9 /* Check for null bytes. */
+ bne cr6, L(bytebybyte)
+ /* Check if locale has non ascii characters. */
+ ld rTMP, 0(rLOC)
+ addi r6, rTMP,LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES
+ lwz rTMP, 0(r6)
+ cmpdi cr7, rTMP, 1
+ beq cr7, L(bytebybyte)
+
+ /* Load vector registers with values used for TOLOWER. */
+ /* Load v1 = 0xbf, v2 = 0x19 v3 = 0x20 in each byte. */
+ vspltisb v3, 2
+ vspltisb v9, 4
+ vsl v3, v3, v9
+ vaddubm v1, v3, v3
+ vnor v1, v1, v1
+ vspltisb v2, 7
+ vsububm v2, v3, v2
+
+ andi. rADDR1, rSTR1, 0xF
+ beq cr0, L(align)
+ addi r6, rSTR1, 16
+ lvx v9, 0, r6
+ /* Compute 16 bytes from previous two loads. */
+#ifdef __LITTLE_ENDIAN__
+ vperm v4, v9, v4, v10
+#else
+ vperm v4, v4, v9, v10
+#endif
+L(align):
+ andi. rADDR2, rSTR2, 0xF
+ beq cr0, L(align1)
+ addi r6, rSTR2, 16
+ lvx v9, 0, r6
+ /* Compute 16 bytes from previous two loads. */
+#ifdef __LITTLE_ENDIAN__
+ vperm v5, v9, v5, v6
+#else
+ vperm v5, v5, v9, v6
+#endif
+L(align1):
+ CHECKNULLANDCONVERT()
+ blt cr6, L(match)
+ b L(different)
+ .align 4
+L(match):
+ clrldi r6, rSTR1, 60
+ subfic r7, r6, 16
+#ifdef USE_AS_STRNCASECMP
+ sub r5, r5, r7
+#endif
+ add rSTR1, rSTR1, r7
+ add rSTR2, rSTR2, r7
+ andi. rADDR2, rSTR2, 0xF
+ addi rSTR1, rSTR1, -16
+ addi rSTR2, rSTR2, -16
+ beq cr0, L(aligned)
+#ifdef __LITTLE_ENDIAN__
+ lvsr v6, 0, rSTR2
+#else
+ lvsl v6, 0, rSTR2
+#endif
+ /* There are 2 loops depending on the input alignment.
+ Each loop gets 16 bytes from s1 and s2, check for null,
+ convert to lowercase and compare. Loop till difference
+ or null occurs. */
+L(s1_align):
+ addi rSTR1, rSTR1, 16
+ addi rSTR2, rSTR2, 16
+#ifdef USE_AS_STRNCASECMP
+ cmpdi cr7, r5, 16
+ blt cr7, L(bytebybyte)
+ addi r5, r5, -16
+#endif
+ lvx v4, 0, rSTR1
+ GET16BYTES(v5, rSTR2, v6)
+ CHECKNULLANDCONVERT()
+ blt cr6, L(s1_align)
+ b L(different)
+ .align 4
+L(aligned):
+ addi rSTR1, rSTR1, 16
+ addi rSTR2, rSTR2, 16
+#ifdef USE_AS_STRNCASECMP
+ cmpdi cr7, r5, 16
+ blt cr7, L(bytebybyte)
+ addi r5, r5, -16
+#endif
+ lvx v4, 0, rSTR1
+ lvx v5, 0, rSTR2
+ CHECKNULLANDCONVERT()
+ blt cr6, L(aligned)
+
+ /* Calculate and return the difference. */
+L(different):
+ vaddubm v1, v3, v3
+ vcmpequb v7, v0, v7
+#ifdef __LITTLE_ENDIAN__
+ /* Count trailing zero. */
+ vspltisb v8, -1
+ VADDUQM_V7_V8
+ vandc v8, v9, v7
+ VPOPCNTD_V8_V8
+ vspltb v6, v8, 15
+ vcmpequb. v6, v6, v1
+ blt cr6, L(shift8)
+#else
+ /* Count leading zero. */
+ VCLZD_V8_v7
+ vspltb v6, v8, 7
+ vcmpequb. v6, v6, v1
+ blt cr6, L(shift8)
+ vsro v8, v8, v1
+#endif
+ b L(skipsum)
+ .align 4
+L(shift8):
+ vsumsws v8, v8, v0
+L(skipsum):
+#ifdef __LITTLE_ENDIAN__
+ /* Shift registers based on leading zero count. */
+ vsro v6, v5, v8
+ vsro v7, v4, v8
+ /* Merge and move to GPR. */
+ vmrglb v6, v6, v7
+ vslo v1, v6, v1
+ MFVRD_R3_V1
+ /* Place the characters that are different in first position. */
+ sldi rSTR2, rRTN, 56
+ srdi rSTR2, rSTR2, 56
+ sldi rSTR1, rRTN, 48
+ srdi rSTR1, rSTR1, 56
+#else
+ vslo v6, v5, v8
+ vslo v7, v4, v8
+ vmrghb v1, v6, v7
+ MFVRD_R3_V1
+ srdi rSTR2, rRTN, 48
+ sldi rSTR2, rSTR2, 56
+ srdi rSTR2, rSTR2, 56
+ srdi rSTR1, rRTN, 56
+#endif
+ subf rRTN, rSTR1, rSTR2
+ extsw rRTN, rRTN
+ blr
+
+ .align 4
+ /* OK. We've hit the end of the string. We need to be careful that
+ we don't compare two strings as different because of junk beyond
+ the end of the strings... */
+L(null_found):
+ vaddubm v10, v3, v3
+#ifdef __LITTLE_ENDIAN__
+ /* Count trailing zero. */
+ vspltisb v8, -1
+ VADDUQM_V7_V8
+ vandc v8, v9, v7
+ VPOPCNTD_V8_V8
+ vspltb v6, v8, 15
+ vcmpequb. v6, v6, v10
+ blt cr6, L(shift_8)
+#else
+ /* Count leading zero. */
+ VCLZD_V8_v7
+ vspltb v6, v8, 7
+ vcmpequb. v6, v6, v10
+ blt cr6, L(shift_8)
+ vsro v8, v8, v10
+#endif
+ b L(skipsum1)
+ .align 4
+L(shift_8):
+ vsumsws v8, v8, v0
+L(skipsum1):
+ /* Calculate shift count based on count of zero. */
+ vspltisb v10, 7
+ vslb v10, v10, v10
+ vsldoi v9, v0, v10, 1
+ VSUBUDM_V9_V8
+ vspltisb v8, 8
+ vsldoi v8, v0, v8, 1
+ VSUBUDM_V9_V8
+ /* Shift and remove junk after null character. */
+#ifdef __LITTLE_ENDIAN__
+ vslo v5, v5, v9
+ vslo v4, v4, v9
+#else
+ vsro v5, v5, v9
+ vsro v4, v4, v9
+#endif
+ /* Convert and compare 16 bytes. */
+ TOLOWER()
+ blt cr6, L(retnull)
+ b L(different)
+ .align 4
+L(retnull):
+ li rRTN, 0
+ blr
+ .align 4
+L(bytebybyte):
+ /* Unrolling loop for POWER: loads are done with 'lbz' plus
+ offset and string descriptors are only updated in the end
+ of loop unrolling. */
+ ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC)
+ lbz rCHAR1, 0(rSTR1) /* Load char from s1 */
+ lbz rCHAR2, 0(rSTR2) /* Load char from s2 */
+#ifdef USE_AS_STRNCASECMP
+ rldicl rTMP, r5, 62, 2
+ cmpdi cr7, rTMP, 0
+ beq cr7, L(lessthan4)
+ mtctr rTMP
+#endif
+L(loop):
+ cmpdi rCHAR1, 0 /* *s1 == '\0' ? */
+ sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */
+ sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */
+ lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */
+ lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */
+ cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */
+ crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */
+ beq cr1, L(done)
+ lbz rCHAR1, 1(rSTR1)
+ lbz rCHAR2, 1(rSTR2)
+ cmpdi rCHAR1, 0
+ sldi rADDR1, rCHAR1, 2
+ sldi rADDR2, rCHAR2, 2
+ lwzx rLWR1, rLOC, rADDR1
+ lwzx rLWR2, rLOC, rADDR2
+ cmpw cr1, rLWR1, rLWR2
+ crorc 4*cr1+eq,eq,4*cr1+eq
+ beq cr1, L(done)
+ lbz rCHAR1, 2(rSTR1)
+ lbz rCHAR2, 2(rSTR2)
+ cmpdi rCHAR1, 0
+ sldi rADDR1, rCHAR1, 2
+ sldi rADDR2, rCHAR2, 2
+ lwzx rLWR1, rLOC, rADDR1
+ lwzx rLWR2, rLOC, rADDR2
+ cmpw cr1, rLWR1, rLWR2
+ crorc 4*cr1+eq,eq,4*cr1+eq
+ beq cr1, L(done)
+ lbz rCHAR1, 3(rSTR1)
+ lbz rCHAR2, 3(rSTR2)
+ cmpdi rCHAR1, 0
+ /* Increment both string descriptors */
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+ sldi rADDR1, rCHAR1, 2
+ sldi rADDR2, rCHAR2, 2
+ lwzx rLWR1, rLOC, rADDR1
+ lwzx rLWR2, rLOC, rADDR2
+ cmpw cr1, rLWR1, rLWR2
+ crorc 4*cr1+eq,eq,4*cr1+eq
+ beq cr1, L(done)
+ lbz rCHAR1, 0(rSTR1) /* Load char from s1 */
+ lbz rCHAR2, 0(rSTR2) /* Load char from s2 */
+#ifdef USE_AS_STRNCASECMP
+ bdnz L(loop)
+#else
+ b L(loop)
+#endif
+#ifdef USE_AS_STRNCASECMP
+L(lessthan4):
+ clrldi r5, r5, 62
+ cmpdi cr7, r5, 0
+ beq cr7, L(retnull)
+ mtctr r5
+L(loop1):
+ cmpdi rCHAR1, 0
+ sldi rADDR1, rCHAR1, 2
+ sldi rADDR2, rCHAR2, 2
+ lwzx rLWR1, rLOC, rADDR1
+ lwzx rLWR2, rLOC, rADDR2
+ cmpw cr1, rLWR1, rLWR2
+ crorc 4*cr1+eq,eq,4*cr1+eq
+ beq cr1, L(done)
+ addi rSTR1, rSTR1, 1
+ addi rSTR2, rSTR2, 1
+ lbz rCHAR1, 0(rSTR1)
+ lbz rCHAR2, 0(rSTR2)
+ bdnz L(loop1)
+#endif
+L(done):
+ subf r0, rLWR2, rLWR1
+ extsw rRTN, r0
+ blr
+END (__STRCASECMP)
+
+weak_alias (__STRCASECMP, STRCASECMP)
+libc_hidden_builtin_def (__STRCASECMP)
diff --git a/sysdeps/powerpc/powerpc64/power8/strncase.S b/sysdeps/powerpc/powerpc64/power8/strncase.S
new file mode 100644
index 0000000..7ce2ed0
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power8/strncase.S
@@ -0,0 +1,20 @@
+/* Optimized strncasecmp implementation for POWER8.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define USE_AS_STRNCASECMP 1
+#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S>
--
1.8.3.1
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8
2016-04-29 8:51 [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 Rajalakshmi Srinivasaraghavan
@ 2016-06-13 19:14 ` Tulio Magno Quites Machado Filho
2016-06-14 9:46 ` Rajalakshmi Srinivasaraghavan
0 siblings, 1 reply; 11+ messages in thread
From: Tulio Magno Quites Machado Filho @ 2016-06-13 19:14 UTC (permalink / raw)
To: Rajalakshmi Srinivasaraghavan, libc-alpha
Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> writes:
> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c
> new file mode 100644
> index 0000000..90f75ce
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c
> @@ -0,0 +1,25 @@
> +/* Multiarch strcasecmp for PPC64.
> + Copyright (C) 2016 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#include <string.h>
> +
> +extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden;
Can't you remove this include and this prototype?
> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
> index 1f22336..2b554dc 100644
> --- a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
> +++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
> @@ -1,4 +1,4 @@
> -/* Multiple versions of strcasecmp.
> +/* Multiple versions of strcasecmp
> Copyright (C) 2013-2016 Free Software Foundation, Inc.
> This file is part of the GNU C Library.
>
> @@ -16,25 +16,22 @@
> License along with the GNU C Library; if not, see
> <http://www.gnu.org/licenses/>. */
>
> -#if IS_IN (libc)
> -# include <string.h>
> -# define strcasecmp __strcasecmp_ppc
> -extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden;
> -extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden;
> -#endif
> +#include <string.h>
> +#include <shlib-compat.h>
> +#include "init-arch.h"
>
> -#include <string/strcasecmp.c>
> #undef strcasecmp
Can't you remove this undef too?
> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c
> new file mode 100644
> index 0000000..3123965
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c
> @@ -0,0 +1,25 @@
> +/* Multiarch strncasecmp for PPC64.
> + Copyright (C) 2016 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#include <string.h>
> +
> +extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;
Likewise.
> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase.c b/sysdeps/powerpc/powerpc64/multiarch/strncase.c
> index 2729fce..7b8e7d3 100644
> --- a/sysdeps/powerpc/powerpc64/multiarch/strncase.c
> +++ b/sysdeps/powerpc/powerpc64/multiarch/strncase.c
> @@ -16,26 +16,22 @@
> License along with the GNU C Library; if not, see
> <http://www.gnu.org/licenses/>. */
>
> -#if IS_IN (libc)
> -# include <string.h>
> -# define strncasecmp __strncasecmp_ppc
> -extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;
> -extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden;
> -#endif
> +#include <string.h>
> +#include <shlib-compat.h>
> +#include "init-arch.h"
>
> -#include <string/strncase.c>
> #undef strncasecmp
Likewise
LGTM after removing those lines.
--
Tulio Magno
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8
2016-06-13 19:14 ` Tulio Magno Quites Machado Filho
@ 2016-06-14 9:46 ` Rajalakshmi Srinivasaraghavan
2016-07-04 14:16 ` Florian Weimer
0 siblings, 1 reply; 11+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-06-14 9:46 UTC (permalink / raw)
To: libc-alpha; +Cc: tuliom
On 06/14/2016 12:43 AM, Tulio Magno Quites Machado Filho wrote:
> Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> writes:
>
>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c
>> new file mode 100644
>> index 0000000..90f75ce
>> --- /dev/null
>> +++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c
>> @@ -0,0 +1,25 @@
>> +/* Multiarch strcasecmp for PPC64.
>> + Copyright (C) 2016 Free Software Foundation, Inc.
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <http://www.gnu.org/licenses/>. */
>> +
>> +#include <string.h>
>> +
>> +extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden;
> Can't you remove this include and this prototype?
>
>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
>> index 1f22336..2b554dc 100644
>> --- a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
>> +++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
>> @@ -1,4 +1,4 @@
>> -/* Multiple versions of strcasecmp.
>> +/* Multiple versions of strcasecmp
>> Copyright (C) 2013-2016 Free Software Foundation, Inc.
>> This file is part of the GNU C Library.
>>
>> @@ -16,25 +16,22 @@
>> License along with the GNU C Library; if not, see
>> <http://www.gnu.org/licenses/>. */
>>
>> -#if IS_IN (libc)
>> -# include <string.h>
>> -# define strcasecmp __strcasecmp_ppc
>> -extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden;
>> -extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden;
>> -#endif
>> +#include <string.h>
>> +#include <shlib-compat.h>
>> +#include "init-arch.h"
>>
>> -#include <string/strcasecmp.c>
>> #undef strcasecmp
> Can't you remove this undef too?
>
>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c
>> new file mode 100644
>> index 0000000..3123965
>> --- /dev/null
>> +++ b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c
>> @@ -0,0 +1,25 @@
>> +/* Multiarch strncasecmp for PPC64.
>> + Copyright (C) 2016 Free Software Foundation, Inc.
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <http://www.gnu.org/licenses/>. */
>> +
>> +#include <string.h>
>> +
>> +extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;
> Likewise.
>
>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase.c b/sysdeps/powerpc/powerpc64/multiarch/strncase.c
>> index 2729fce..7b8e7d3 100644
>> --- a/sysdeps/powerpc/powerpc64/multiarch/strncase.c
>> +++ b/sysdeps/powerpc/powerpc64/multiarch/strncase.c
>> @@ -16,26 +16,22 @@
>> License along with the GNU C Library; if not, see
>> <http://www.gnu.org/licenses/>. */
>>
>> -#if IS_IN (libc)
>> -# include <string.h>
>> -# define strncasecmp __strncasecmp_ppc
>> -extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;
>> -extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden;
>> -#endif
>> +#include <string.h>
>> +#include <shlib-compat.h>
>> +#include "init-arch.h"
>>
>> -#include <string/strncase.c>
>> #undef strncasecmp
> Likewise
>
> LGTM after removing those lines.
Committed as c8376f3e07602aaef9cb843bb73cb5f2b860634a
after removing those lines.
>
>
--
Thanks
Rajalakshmi S
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8
2016-06-14 9:46 ` Rajalakshmi Srinivasaraghavan
@ 2016-07-04 14:16 ` Florian Weimer
2016-07-05 12:09 ` [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327] Rajalakshmi Srinivasaraghavan
0 siblings, 1 reply; 11+ messages in thread
From: Florian Weimer @ 2016-07-04 14:16 UTC (permalink / raw)
To: Rajalakshmi Srinivasaraghavan, libc-alpha; +Cc: tuliom
On 06/14/2016 11:45 AM, Rajalakshmi Srinivasaraghavan wrote:
> Committed as c8376f3e07602aaef9cb843bb73cb5f2b860634a
> after removing those lines.
This appears to have caused bug 20327.
Could you have a look?
Thanks,
Florian
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327]
2016-07-04 14:16 ` Florian Weimer
@ 2016-07-05 12:09 ` Rajalakshmi Srinivasaraghavan
2016-07-05 12:15 ` Florian Weimer
2016-07-05 14:01 ` Tulio Magno Quites Machado Filho
0 siblings, 2 replies; 11+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-07-05 12:09 UTC (permalink / raw)
To: Florian Weimer, libc-alpha
[-- Attachment #1: Type: text/plain, Size: 363 bytes --]
On 07/04/2016 07:46 PM, Florian Weimer wrote:
> On 06/14/2016 11:45 AM, Rajalakshmi Srinivasaraghavan wrote:
>
>> Committed as c8376f3e07602aaef9cb843bb73cb5f2b860634a
>> after removing those lines.
>
> This appears to have caused bug 20327.
>
> Could you have a look?
>
> Thanks,
> Florian
>
>
I have fixed it in the attached patch.
--
Thanks
Rajalakshmi S
[-- Attachment #2: 0001-POWER8-Fix-return-code-of-strcasecmp-for-unaligned-i.patch --]
[-- Type: text/x-patch, Size: 1490 bytes --]
From 42c2aeecdb371b7544b81cb09cfcb12c2f873c72 Mon Sep 17 00:00:00 2001
From: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Date: Tue, 5 Jul 2016 04:41:51 -0400
Subject: [PATCH] POWER8: Fix return code of strcasecmp for unaligned inputs
If the input values are unaligned and if there are null characters in the
memory before the starting address of the input values, strcasecmp
gives incorrect return code. Fixed it by adding mask the bits that
are not part of the string.
Tested on ppc64 and ppc64le.
[BZ #20327]
* sysdeps/powerpc/powerpc64/power8/strcasecmp.S: Mask bits that
are not part of the string.
---
sysdeps/powerpc/powerpc64/power8/strcasecmp.S | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
index 63f6217..d6a4df2 100644
--- a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
+++ b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
@@ -44,7 +44,9 @@
#ifdef __LITTLE_ENDIAN__
#define GET16BYTES(reg1, reg2, reg3) \
lvx reg1, 0, reg2; \
- vcmpequb. v8, v0, reg1; \
+ vspltisb v8, -1; \
+ vperm v8, v8, reg1, reg3; \
+ vcmpequb. v8, v0, v8; \
beq cr6, 1f; \
vspltisb v9, 0; \
b 2f; \
@@ -57,7 +59,9 @@
#else
#define GET16BYTES(reg1, reg2, reg3) \
lvx reg1, 0, reg2; \
- vcmpequb. v8, v0, reg1; \
+ vspltisb v8, -1; \
+ vperm v8, reg1, v8, reg3; \
+ vcmpequb. v8, v0, v8; \
beq cr6, 1f; \
vspltisb v9, 0; \
b 2f; \
--
1.8.3.1
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327]
2016-07-05 12:09 ` [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327] Rajalakshmi Srinivasaraghavan
@ 2016-07-05 12:15 ` Florian Weimer
2016-07-05 14:01 ` Tulio Magno Quites Machado Filho
1 sibling, 0 replies; 11+ messages in thread
From: Florian Weimer @ 2016-07-05 12:15 UTC (permalink / raw)
To: Rajalakshmi Srinivasaraghavan; +Cc: libc-alpha
On 07/05/2016 02:09 PM, Rajalakshmi Srinivasaraghavan wrote:
> On 07/04/2016 07:46 PM, Florian Weimer wrote:
>> On 06/14/2016 11:45 AM, Rajalakshmi Srinivasaraghavan wrote:
>>
>>> Committed as c8376f3e07602aaef9cb843bb73cb5f2b860634a
>>> after removing those lines.
>>
>> This appears to have caused bug 20327.
>>
>> Could you have a look?
>>
>> Thanks,
>> Florian
>>
>>
> I have fixed it in the attached patch.
I can confirm this addresses the issue we saw. Feel free to push if you
feel confident about this change. :)
Thanks,
Florian
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327]
2016-07-05 12:09 ` [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327] Rajalakshmi Srinivasaraghavan
2016-07-05 12:15 ` Florian Weimer
@ 2016-07-05 14:01 ` Tulio Magno Quites Machado Filho
2016-07-05 14:05 ` Tulio Magno Quites Machado Filho
` (2 more replies)
1 sibling, 3 replies; 11+ messages in thread
From: Tulio Magno Quites Machado Filho @ 2016-07-05 14:01 UTC (permalink / raw)
To: Rajalakshmi Srinivasaraghavan, Florian Weimer, libc-alpha,
Adhemerval Zanella
Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> writes:
> On 07/04/2016 07:46 PM, Florian Weimer wrote:
>> On 06/14/2016 11:45 AM, Rajalakshmi Srinivasaraghavan wrote:
> Subject: [PATCH] POWER8: Fix return code of strcasecmp for unaligned inputs
Could you replace POWER8 by powerpc, please?
> If the input values are unaligned and if there are null characters in the
> memory before the starting address of the input values, strcasecmp
> gives incorrect return code. Fixed it by adding mask the bits that
> are not part of the string.
>
> Tested on ppc64 and ppc64le.
Despite this being a bug fix, I believe we need the approval from Adhemerval
before integrating it during the freeze window.
> [BZ #20327]
> * sysdeps/powerpc/powerpc64/power8/strcasecmp.S: Mask bits that
> are not part of the string.
This is a very important case. Can we improve the current testcase to
validate this scenario too?
> ---
> sysdeps/powerpc/powerpc64/power8/strcasecmp.S | 8 ++++++--
> 1 file changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
> index 63f6217..d6a4df2 100644
> --- a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
> +++ b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
> @@ -44,7 +44,9 @@
> #ifdef __LITTLE_ENDIAN__
> #define GET16BYTES(reg1, reg2, reg3) \
> lvx reg1, 0, reg2; \
> - vcmpequb. v8, v0, reg1; \
> + vspltisb v8, -1; \
> + vperm v8, v8, reg1, reg3; \
> + vcmpequb. v8, v0, v8; \
> beq cr6, 1f; \
> vspltisb v9, 0; \
> b 2f; \
> @@ -57,7 +59,9 @@
> #else
> #define GET16BYTES(reg1, reg2, reg3) \
> lvx reg1, 0, reg2; \
> - vcmpequb. v8, v0, reg1; \
> + vspltisb v8, -1; \
> + vperm v8, reg1, v8, reg3; \
> + vcmpequb. v8, v0, v8; \
> beq cr6, 1f; \
> vspltisb v9, 0; \
> b 2f; \
Although this code is simple, I believe this macro is missing more comments.
I suggest to explain the following:
- How does this macro use reg1, reg2, reg3 and v8?
- Why is it setting v9 to 0?
--
Tulio Magno
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327]
2016-07-05 14:01 ` Tulio Magno Quites Machado Filho
@ 2016-07-05 14:05 ` Tulio Magno Quites Machado Filho
2016-07-05 15:34 ` Florian Weimer
2016-07-05 14:33 ` Adhemerval Zanella
2016-07-05 16:10 ` Rajalakshmi Srinivasaraghavan
2 siblings, 1 reply; 11+ messages in thread
From: Tulio Magno Quites Machado Filho @ 2016-07-05 14:05 UTC (permalink / raw)
To: Rajalakshmi Srinivasaraghavan, Florian Weimer, libc-alpha,
Adhemerval Zanella
Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com> writes:
> Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> writes:
>
>> On 07/04/2016 07:46 PM, Florian Weimer wrote:
>>> On 06/14/2016 11:45 AM, Rajalakshmi Srinivasaraghavan wrote:
>> [BZ #20327]
>> * sysdeps/powerpc/powerpc64/power8/strcasecmp.S: Mask bits that
>> are not part of the string.
>
> This is a very important case. Can we improve the current testcase to
> validate this scenario too?
Ooops. I just read in the bug report that Florian is already working on this,
so feel free to ignore this.
--
Tulio Magno
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327]
2016-07-05 14:01 ` Tulio Magno Quites Machado Filho
2016-07-05 14:05 ` Tulio Magno Quites Machado Filho
@ 2016-07-05 14:33 ` Adhemerval Zanella
2016-07-05 16:10 ` Rajalakshmi Srinivasaraghavan
2 siblings, 0 replies; 11+ messages in thread
From: Adhemerval Zanella @ 2016-07-05 14:33 UTC (permalink / raw)
To: Tulio Magno Quites Machado Filho, Rajalakshmi Srinivasaraghavan,
Florian Weimer, libc-alpha
On 05/07/2016 11:01, Tulio Magno Quites Machado Filho wrote:
> Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> writes:
>
>> On 07/04/2016 07:46 PM, Florian Weimer wrote:
>>> On 06/14/2016 11:45 AM, Rajalakshmi Srinivasaraghavan wrote:
>> Subject: [PATCH] POWER8: Fix return code of strcasecmp for unaligned inputs
>
> Could you replace POWER8 by powerpc, please?
>
>> If the input values are unaligned and if there are null characters in the
>> memory before the starting address of the input values, strcasecmp
>> gives incorrect return code. Fixed it by adding mask the bits that
>> are not part of the string.
>>
>> Tested on ppc64 and ppc64le.
>
> Despite this being a bug fix, I believe we need the approval from Adhemerval
> before integrating it during the freeze window.
>
Bugfixes are ok for current phase and I was sorting out the release blockers
yesterday. I will send a message about it in a couple of hours.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327]
2016-07-05 14:05 ` Tulio Magno Quites Machado Filho
@ 2016-07-05 15:34 ` Florian Weimer
0 siblings, 0 replies; 11+ messages in thread
From: Florian Weimer @ 2016-07-05 15:34 UTC (permalink / raw)
To: Tulio Magno Quites Machado Filho, Rajalakshmi Srinivasaraghavan,
libc-alpha, Adhemerval Zanella
On 07/05/2016 04:05 PM, Tulio Magno Quites Machado Filho wrote:
> Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com> writes:
>
>> Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> writes:
>>
>>> On 07/04/2016 07:46 PM, Florian Weimer wrote:
>>>> On 06/14/2016 11:45 AM, Rajalakshmi Srinivasaraghavan wrote:
>>> [BZ #20327]
>>> * sysdeps/powerpc/powerpc64/power8/strcasecmp.S: Mask bits that
>>> are not part of the string.
>>
>> This is a very important case. Can we improve the current testcase to
>> validate this scenario too?
>
> Ooops. I just read in the bug report that Florian is already working on this,
> so feel free to ignore this.
Yeah, the proposed patch is here:
<https://sourceware.org/ml/libc-alpha/2016-07/msg00126.html>
Thanks,
Florian
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327]
2016-07-05 14:01 ` Tulio Magno Quites Machado Filho
2016-07-05 14:05 ` Tulio Magno Quites Machado Filho
2016-07-05 14:33 ` Adhemerval Zanella
@ 2016-07-05 16:10 ` Rajalakshmi Srinivasaraghavan
2 siblings, 0 replies; 11+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-07-05 16:10 UTC (permalink / raw)
To: libc-alpha
On 07/05/2016 07:31 PM, Tulio Magno Quites Machado Filho wrote:
> Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> writes:
>
>> On 07/04/2016 07:46 PM, Florian Weimer wrote:
>>> On 06/14/2016 11:45 AM, Rajalakshmi Srinivasaraghavan wrote:
>> Subject: [PATCH] POWER8: Fix return code of strcasecmp for unaligned inputs
> Could you replace POWER8 by powerpc, please?
>
>> If the input values are unaligned and if there are null characters in the
>> memory before the starting address of the input values, strcasecmp
>> gives incorrect return code. Fixed it by adding mask the bits that
>> are not part of the string.
>>
>> Tested on ppc64 and ppc64le.
> Despite this being a bug fix, I believe we need the approval from Adhemerval
> before integrating it during the freeze window.
>
>> [BZ #20327]
>> * sysdeps/powerpc/powerpc64/power8/strcasecmp.S: Mask bits that
>> are not part of the string.
> This is a very important case. Can we improve the current testcase to
> validate this scenario too?
>
>> ---
>> sysdeps/powerpc/powerpc64/power8/strcasecmp.S | 8 ++++++--
>> 1 file changed, 6 insertions(+), 2 deletions(-)
>>
>> diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
>> index 63f6217..d6a4df2 100644
>> --- a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
>> +++ b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
>> @@ -44,7 +44,9 @@
>> #ifdef __LITTLE_ENDIAN__
>> #define GET16BYTES(reg1, reg2, reg3) \
>> lvx reg1, 0, reg2; \
>> - vcmpequb. v8, v0, reg1; \
>> + vspltisb v8, -1; \
>> + vperm v8, v8, reg1, reg3; \
>> + vcmpequb. v8, v0, v8; \
>> beq cr6, 1f; \
>> vspltisb v9, 0; \
>> b 2f; \
>> @@ -57,7 +59,9 @@
>> #else
>> #define GET16BYTES(reg1, reg2, reg3) \
>> lvx reg1, 0, reg2; \
>> - vcmpequb. v8, v0, reg1; \
>> + vspltisb v8, -1; \
>> + vperm v8, reg1, v8, reg3; \
>> + vcmpequb. v8, v0, v8; \
>> beq cr6, 1f; \
>> vspltisb v9, 0; \
>> b 2f; \
> Although this code is simple, I believe this macro is missing more comments.
>
> I suggest to explain the following:
> - How does this macro use reg1, reg2, reg3 and v8?
> - Why is it setting v9 to 0?
>
Added comments and Committed it as
30e4cc5413f72c2c728a544389da0c48500d9904
--
Thanks
Rajalakshmi S
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2016-07-05 16:10 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-29 8:51 [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 Rajalakshmi Srinivasaraghavan
2016-06-13 19:14 ` Tulio Magno Quites Machado Filho
2016-06-14 9:46 ` Rajalakshmi Srinivasaraghavan
2016-07-04 14:16 ` Florian Weimer
2016-07-05 12:09 ` [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327] Rajalakshmi Srinivasaraghavan
2016-07-05 12:15 ` Florian Weimer
2016-07-05 14:01 ` Tulio Magno Quites Machado Filho
2016-07-05 14:05 ` Tulio Magno Quites Machado Filho
2016-07-05 15:34 ` Florian Weimer
2016-07-05 14:33 ` Adhemerval Zanella
2016-07-05 16:10 ` Rajalakshmi Srinivasaraghavan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).