public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8
@ 2016-04-29  8:51 Rajalakshmi Srinivasaraghavan
  2016-06-13 19:14 ` Tulio Magno Quites Machado Filho
  0 siblings, 1 reply; 11+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-04-29  8:51 UTC (permalink / raw)
  To: libc-alpha; +Cc: Rajalakshmi Srinivasaraghavan

This implementation utilizes vectors to improve performance
compared to current byte by byte implementation for POWER7.
The performance improvement is upto 4x.  This patch is tested
on powerpc64 and powerpc64le.

2016-04-29  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>

	* sysdeps/powerpc/powerpc64/multiarch/Makefile:
	(sysdep_routines): Add P8 and PPC64 strcasecmp/strncasecmp targets.
	* sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c:
	(__libc_ifunc_impl_list): Add entries for P8 and PPC64
	ifunc'ed strcasecmp/strncasecmp.
	* sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S:
	[EALIGN]: Removed.
	[END]: Likewise.
	[__strcasecmp]: Define instead of the above to control symbol name.
	* sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c: Add IFUNC selector
	for __strcasecmp_power8.
	* sysdeps/powerpc/powerpc64/multiarch/strncase.c: Add IFUNC selector
	for __strncasecmp_power8.
	* sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S: New File.
	* sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c: Likewise.
	* sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S: Likewise.
	* sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c: Likewise.
	* sysdeps/powerpc/powerpc64/power8/strcasecmp.S: Likewise.
	* sysdeps/powerpc/powerpc64/power8/strncase.S: Likewise.
---
 sysdeps/powerpc/powerpc64/multiarch/Makefile       |   4 +-
 .../powerpc/powerpc64/multiarch/ifunc-impl-list.c  |   6 +
 .../powerpc64/multiarch/strcasecmp-power7.S        |  18 +-
 .../powerpc64/multiarch/strcasecmp-power8.S        |  28 ++
 .../powerpc/powerpc64/multiarch/strcasecmp-ppc64.c |  25 ++
 sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c   |  29 +-
 .../powerpc/powerpc64/multiarch/strncase-power8.S  |  28 ++
 .../powerpc/powerpc64/multiarch/strncase-ppc64.c   |  25 ++
 sysdeps/powerpc/powerpc64/multiarch/strncase.c     |  24 +-
 sysdeps/powerpc/powerpc64/power8/strcasecmp.S      | 446 +++++++++++++++++++++
 sysdeps/powerpc/powerpc64/power8/strncase.S        |  20 +
 11 files changed, 606 insertions(+), 47 deletions(-)
 create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S
 create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c
 create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S
 create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c
 create mode 100644 sysdeps/powerpc/powerpc64/power8/strcasecmp.S
 create mode 100644 sysdeps/powerpc/powerpc64/power8/strncase.S

diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index 9ee9bc2..e3ac285 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -21,7 +21,9 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \
 		   memmove-power7 memmove-ppc64 wordcopy-ppc64 bcopy-ppc64 \
 		   strncpy-power8 strstr-power7 strstr-ppc64 \
 		   strspn-power8 strspn-ppc64 strcspn-power8 strcspn-ppc64 \
-		   strlen-power8 strcasestr-power8 strcasestr-ppc64
+		   strlen-power8 strcasestr-power8 strcasestr-ppc64 \
+		   strcasecmp-ppc64 strcasecmp-power8 strncase-ppc64 \
+		   strncase-power8
 
 CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops
 CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index a0dc8ad..9f6bd7c 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -204,6 +204,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c.  */
   IFUNC_IMPL (i, name, strcasecmp,
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
+			      hwcap2 & PPC_FEATURE2_ARCH_2_07,
+			      __strcasecmp_power8)
+	      IFUNC_IMPL_ADD (array, i, strcasecmp,
 			      hwcap & PPC_FEATURE_HAS_VSX,
 			      __strcasecmp_power7)
 	      IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ppc))
@@ -219,6 +222,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/powerpc/powerpc64/multiarch/strncase.c.  */
   IFUNC_IMPL (i, name, strncasecmp,
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
+			      hwcap2 & PPC_FEATURE2_ARCH_2_07,
+			      __strncasecmp_power8)
+	      IFUNC_IMPL_ADD (array, i, strncasecmp,
 			      hwcap & PPC_FEATURE_HAS_VSX,
 			      __strncasecmp_power7)
 	      IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_ppc))
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S
index 013dc62..99cd7bd 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S
@@ -1,4 +1,4 @@
-/* Optimized strcasecmp implementation foOWER7.
+/* Optimized strcasecmp implementation for POWER7.
    Copyright (C) 2013-2016 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -18,21 +18,7 @@
 
 #include <sysdep.h>
 
-#undef ENTRY
-#define ENTRY(name)						\
-  .section ".text";						\
-  ENTRY_2(__strcasecmp_power7)					\
-  .align ALIGNARG(2);						\
-  BODY_LABEL(__strcasecmp_power7):				\
-  cfi_startproc;						\
-  LOCALENTRY(__strcasecmp_power7)
-
-#undef END
-#define END(name)						\
-  cfi_endproc;							\
-  TRACEBACK(__strcasecmp_power7)				\
-  END_2(__strcasecmp_power7)
-
+#define __strcasecmp __strcasecmp_power7
 #undef weak_alias
 #define weak_alias(name, alias)
 
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S
new file mode 100644
index 0000000..492047a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S
@@ -0,0 +1,28 @@
+/* Optimized strcasecmp implementation for POWER8.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#define __strcasecmp __strcasecmp_power8
+#undef weak_alias
+#define weak_alias(name, alias)
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c
new file mode 100644
index 0000000..90f75ce
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c
@@ -0,0 +1,25 @@
+/* Multiarch strcasecmp for PPC64.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <string.h>
+
+extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden;
+
+#define strcasecmp __strcasecmp_ppc
+
+#include <string/strcasecmp.c>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
index 1f22336..2b554dc 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
@@ -1,4 +1,4 @@
-/* Multiple versions of strcasecmp.
+/* Multiple versions of strcasecmp
    Copyright (C) 2013-2016 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -16,25 +16,22 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if IS_IN (libc)
-# include <string.h>
-# define strcasecmp __strcasecmp_ppc
-extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden;
-extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden;
-#endif
+#include <string.h>
+#include <shlib-compat.h>
+#include "init-arch.h"
 
-#include <string/strcasecmp.c>
 #undef strcasecmp
+extern __typeof (__strcasecmp) __libc_strcasecmp;
 
-#if IS_IN (libc)
-# include <shlib-compat.h>
-# include "init-arch.h"
+extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden;
+extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden;
+extern __typeof (__strcasecmp) __strcasecmp_power8 attribute_hidden;
 
-extern __typeof (__strcasecmp) __libc_strcasecmp;
 libc_ifunc (__libc_strcasecmp,
-	    (hwcap & PPC_FEATURE_HAS_VSX)
-            ? __strcasecmp_power7
-            : __strcasecmp_ppc);
+	     (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+             ? __strcasecmp_power8:
+	     (hwcap & PPC_FEATURE_HAS_VSX)
+             ? __strcasecmp_power7
+             : __strcasecmp_ppc);
 
 weak_alias (__libc_strcasecmp, strcasecmp)
-#endif
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S
new file mode 100644
index 0000000..01a63b5
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S
@@ -0,0 +1,28 @@
+/* Optimized strncasecmp implementation for POWER8.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#define __strncasecmp __strncasecmp_power8
+#undef weak_alias
+#define weak_alias(name, alias)
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/powerpc/powerpc64/power8/strncase.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c
new file mode 100644
index 0000000..3123965
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c
@@ -0,0 +1,25 @@
+/* Multiarch strncasecmp for PPC64.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <string.h>
+
+extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;
+
+#define strncasecmp __strncasecmp_ppc
+
+#include <string/strncase.c>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase.c b/sysdeps/powerpc/powerpc64/multiarch/strncase.c
index 2729fce..7b8e7d3 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncase.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncase.c
@@ -16,26 +16,22 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if IS_IN (libc)
-# include <string.h>
-# define strncasecmp __strncasecmp_ppc
-extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;
-extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden;
-#endif
+#include <string.h>
+#include <shlib-compat.h>
+#include "init-arch.h"
 
-#include <string/strncase.c>
 #undef strncasecmp
+extern __typeof (__strncasecmp) __libc_strncasecmp;
 
-#if IS_IN (libc)
-# include <shlib-compat.h>
-# include "init-arch.h"
+extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;
+extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden;
+extern __typeof (__strncasecmp) __strncasecmp_power8 attribute_hidden;
 
-/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
-   ifunc symbol properly.  */
-extern __typeof (__strncasecmp) __libc_strncasecmp;
 libc_ifunc (__libc_strncasecmp,
+	     (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+             ? __strncasecmp_power8:
 	     (hwcap & PPC_FEATURE_HAS_VSX)
              ? __strncasecmp_power7
              : __strncasecmp_ppc);
+
 weak_alias (__libc_strncasecmp, strncasecmp)
-#endif
diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
new file mode 100644
index 0000000..63f6217
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
@@ -0,0 +1,446 @@
+/* Optimized strcasecmp implementation for PowerPC64.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <locale-defines.h>
+
+/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) */
+
+#ifndef USE_AS_STRNCASECMP
+#  define __STRCASECMP __strcasecmp
+#  define STRCASECMP   strcasecmp
+#else
+#  define __STRCASECMP __strncasecmp
+#  define STRCASECMP   strncasecmp
+#endif
+/* Convert 16 bytes to lowercase and compare */
+#define TOLOWER()     \
+	vaddubm	v8, v4, v1; \
+	vaddubm	v7, v4, v3; \
+	vcmpgtub	v8, v8, v2; \
+	vsel	v4, v7, v4, v8; \
+	vaddubm	v8, v5, v1; \
+	vaddubm	v7, v5, v3; \
+	vcmpgtub	v8, v8, v2; \
+	vsel	v5, v7, v5, v8; \
+	vcmpequb.	v7, v5, v4;
+
+/* Get 16 bytes for unaligned case.  */
+#ifdef __LITTLE_ENDIAN__
+#define GET16BYTES(reg1, reg2, reg3) \
+	lvx	reg1, 0, reg2; \
+	vcmpequb.	v8, v0, reg1; \
+	beq	cr6, 1f; \
+	vspltisb	v9, 0; \
+	b	2f; \
+	.align 4; \
+1: \
+	addi	r6, reg2, 16; \
+	lvx	v9, 0, r6; \
+2: \
+	vperm	reg1, v9, reg1, reg3;
+#else
+#define GET16BYTES(reg1, reg2, reg3) \
+	lvx	reg1, 0, reg2; \
+	vcmpequb.	v8, v0, reg1; \
+	beq	cr6, 1f; \
+	vspltisb	v9, 0; \
+	b	2f; \
+	.align 4; \
+1: \
+	addi	r6, reg2, 16; \
+	lvx	v9, 0, r6; \
+2: \
+	vperm	reg1, reg1, v9, reg3;
+#endif
+
+/* Check null in v4, v5 and convert to lower.  */
+#define CHECKNULLANDCONVERT() \
+	vcmpequb.	v7, v0, v5; \
+	beq	cr6, 3f; \
+	vcmpequb.	v7, v0, v4; \
+	beq	cr6, 3f; \
+	b	L(null_found); \
+	.align  4; \
+3: \
+	TOLOWER()
+
+#ifdef _ARCH_PWR8
+#  define VCLZD_V8_v7	vclzd	v8, v7;
+#  define MFVRD_R3_V1	mfvrd	r3, v1;
+#  define VSUBUDM_V9_V8	vsubudm	v9, v9, v8;
+#  define VPOPCNTD_V8_V8	vpopcntd v8, v8;
+#  define VADDUQM_V7_V8	vadduqm	v9, v7, v8;
+#else
+#  define VCLZD_V8_v7	.long	0x11003fc2
+#  define MFVRD_R3_V1	.long	0x7c230067
+#  define VSUBUDM_V9_V8	.long	0x112944c0
+#  define VPOPCNTD_V8_V8	.long	0x110047c3
+#  define VADDUQM_V7_V8	.long	0x11274100
+#endif
+
+	.machine  power7
+
+ENTRY (__STRCASECMP)
+#ifdef USE_AS_STRNCASECMP
+	CALL_MCOUNT 3
+#else
+	CALL_MCOUNT 2
+#endif
+#define rRTN	r3	/* Return value */
+#define rSTR1	r10	/* 1st string */
+#define rSTR2	r4	/* 2nd string */
+#define rCHAR1	r6	/* Byte read from 1st string */
+#define rCHAR2	r7	/* Byte read from 2nd string */
+#define rADDR1	r8	/* Address of tolower(rCHAR1) */
+#define rADDR2	r12	/* Address of tolower(rCHAR2) */
+#define rLWR1	r8	/* Word tolower(rCHAR1) */
+#define rLWR2	r12	/* Word tolower(rCHAR2) */
+#define rTMP	r9
+#define rLOC	r11	/* Default locale address */
+
+	cmpd	cr7, rRTN, rSTR2
+
+	/* Get locale address.  */
+	ld 	rTMP, __libc_tsd_LOCALE@got@tprel(r2)
+	add 	rLOC, rTMP, __libc_tsd_LOCALE@tls
+	ld	rLOC, 0(rLOC)
+
+	mr	rSTR1, rRTN
+	li	rRTN, 0
+	beqlr	cr7
+#ifdef USE_AS_STRNCASECMP
+	cmpdi	cr7, r5, 0
+	beq	cr7, L(retnull)
+	cmpdi	cr7, r5, 16
+	blt	cr7, L(bytebybyte)
+#endif
+	vspltisb	v0, 0
+	vspltisb	v8, -1
+	/* Check for null in initial characters.
+	   Check max of 16 char depending on the alignment.
+	   If null is present, proceed byte by byte.  */
+	lvx	v4, 0, rSTR1
+#ifdef  __LITTLE_ENDIAN__
+	lvsr	v10, 0, rSTR1	/* Compute mask.  */
+	vperm	v9, v8, v4, v10	/* Mask bits that are not part of string.  */
+#else
+	lvsl	v10, 0, rSTR1
+	vperm	v9, v4, v8, v10
+#endif
+	vcmpequb.	v9, v0, v9	/* Check for null bytes.  */
+	bne	cr6, L(bytebybyte)
+	lvx	v5, 0, rSTR2
+	/* Calculate alignment.  */
+#ifdef __LITTLE_ENDIAN__
+	lvsr	v6, 0, rSTR2
+	vperm	v9, v8, v5, v6	/* Mask bits that are not part of string.  */
+#else
+	lvsl	v6, 0, rSTR2
+	vperm	v9, v5, v8, v6
+#endif
+	vcmpequb.	v9, v0, v9	/* Check for null bytes.  */
+	bne	cr6, L(bytebybyte)
+	/* Check if locale has non ascii characters.  */
+	ld	rTMP, 0(rLOC)
+	addi r6, rTMP,LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES
+	lwz	rTMP, 0(r6)
+	cmpdi	cr7, rTMP, 1
+	beq	cr7, L(bytebybyte)
+
+	/* Load vector registers with values used for TOLOWER.  */
+	/* Load v1 = 0xbf, v2 = 0x19 v3 = 0x20 in each byte.  */
+	vspltisb	v3, 2
+	vspltisb	v9, 4
+	vsl	v3, v3, v9
+	vaddubm	v1, v3, v3
+	vnor	v1, v1, v1
+	vspltisb	v2, 7
+	vsububm	v2, v3, v2
+
+	andi.	rADDR1, rSTR1, 0xF
+	beq	cr0, L(align)
+	addi	r6, rSTR1, 16
+	lvx	v9, 0, r6
+	/* Compute 16 bytes from previous two loads.  */
+#ifdef __LITTLE_ENDIAN__
+	vperm	v4, v9, v4, v10
+#else
+	vperm	v4, v4, v9, v10
+#endif
+L(align):
+	andi.	rADDR2, rSTR2, 0xF
+	beq	cr0, L(align1)
+	addi	r6, rSTR2, 16
+	lvx	v9, 0, r6
+	/* Compute 16 bytes from previous two loads.  */
+#ifdef __LITTLE_ENDIAN__
+	vperm	v5, v9, v5, v6
+#else
+	vperm	v5, v5, v9, v6
+#endif
+L(align1):
+	CHECKNULLANDCONVERT()
+	blt	cr6, L(match)
+	b	L(different)
+	.align 	4
+L(match):
+	clrldi	r6, rSTR1, 60
+	subfic	r7, r6, 16
+#ifdef USE_AS_STRNCASECMP
+	sub	r5, r5, r7
+#endif
+	add	rSTR1, rSTR1, r7
+	add	rSTR2, rSTR2, r7
+	andi.	rADDR2, rSTR2, 0xF
+	addi	rSTR1, rSTR1, -16
+	addi	rSTR2, rSTR2, -16
+	beq	cr0, L(aligned)
+#ifdef __LITTLE_ENDIAN__
+	lvsr	v6, 0, rSTR2
+#else
+	lvsl	v6, 0, rSTR2
+#endif
+	/* There are 2 loops depending on the input alignment.
+	   Each loop gets 16 bytes from s1 and s2, check for null,
+	   convert to lowercase and compare. Loop till difference
+	   or null occurs. */
+L(s1_align):
+	addi	rSTR1, rSTR1, 16
+	addi	rSTR2, rSTR2, 16
+#ifdef USE_AS_STRNCASECMP
+	cmpdi	cr7, r5, 16
+	blt	cr7, L(bytebybyte)
+	addi	r5, r5, -16
+#endif
+	lvx	v4, 0, rSTR1
+	GET16BYTES(v5, rSTR2, v6)
+	CHECKNULLANDCONVERT()
+	blt	cr6, L(s1_align)
+	b	L(different)
+	.align 	4
+L(aligned):
+	addi	rSTR1, rSTR1, 16
+	addi	rSTR2, rSTR2, 16
+#ifdef USE_AS_STRNCASECMP
+	cmpdi	cr7, r5, 16
+	blt	cr7, L(bytebybyte)
+	addi	r5, r5, -16
+#endif
+	lvx	v4, 0, rSTR1
+	lvx	v5, 0, rSTR2
+	CHECKNULLANDCONVERT()
+	blt	cr6, L(aligned)
+
+	/* Calculate and return the difference. */
+L(different):
+	vaddubm	v1, v3, v3
+	vcmpequb	v7, v0, v7
+#ifdef __LITTLE_ENDIAN__
+	/* Count trailing zero.  */
+	vspltisb	v8, -1
+	VADDUQM_V7_V8
+	vandc	v8, v9, v7
+	VPOPCNTD_V8_V8
+	vspltb	v6, v8, 15
+	vcmpequb.	v6, v6, v1
+	blt	cr6, L(shift8)
+#else
+	/* Count leading zero.  */
+	VCLZD_V8_v7
+	vspltb	v6, v8, 7
+	vcmpequb.	v6, v6, v1
+	blt	cr6, L(shift8)
+	vsro	v8, v8, v1
+#endif
+	b	L(skipsum)
+	.align  4
+L(shift8):
+	vsumsws		v8, v8, v0
+L(skipsum):
+#ifdef __LITTLE_ENDIAN__
+	/* Shift registers based on leading zero count.  */
+	vsro	v6, v5, v8
+	vsro	v7, v4, v8
+	/* Merge and move to GPR.  */
+	vmrglb	v6, v6, v7
+	vslo	v1, v6, v1
+	MFVRD_R3_V1
+	/* Place the characters that are different in first position.  */
+	sldi	rSTR2, rRTN, 56
+	srdi	rSTR2, rSTR2, 56
+	sldi	rSTR1, rRTN, 48
+	srdi	rSTR1, rSTR1, 56
+#else
+	vslo	v6, v5, v8
+	vslo	v7, v4, v8
+	vmrghb	v1, v6, v7
+	MFVRD_R3_V1
+	srdi	rSTR2, rRTN, 48
+	sldi	rSTR2, rSTR2, 56
+	srdi	rSTR2, rSTR2, 56
+	srdi	rSTR1, rRTN, 56
+#endif
+	subf  	rRTN, rSTR1, rSTR2
+	extsw 	rRTN, rRTN
+	blr
+
+	.align  4
+	/* OK. We've hit the end of the string. We need to be careful that
+	   we don't compare two strings as different because of junk beyond
+	   the end of the strings...  */
+L(null_found):
+	vaddubm	v10, v3, v3
+#ifdef __LITTLE_ENDIAN__
+	/* Count trailing zero.  */
+	vspltisb	v8, -1
+	VADDUQM_V7_V8
+	vandc	v8, v9, v7
+	VPOPCNTD_V8_V8
+	vspltb	v6, v8, 15
+	vcmpequb.	v6, v6, v10
+	blt	cr6, L(shift_8)
+#else
+	/* Count leading zero.  */
+	VCLZD_V8_v7
+	vspltb	v6, v8, 7
+	vcmpequb.	v6, v6, v10
+	blt	cr6, L(shift_8)
+	vsro	v8, v8, v10
+#endif
+	b	L(skipsum1)
+	.align  4
+L(shift_8):
+	vsumsws	v8, v8, v0
+L(skipsum1):
+	/* Calculate shift count based on count of zero.  */
+	vspltisb	v10, 7
+	vslb	v10, v10, v10
+	vsldoi	v9, v0, v10, 1
+	VSUBUDM_V9_V8
+	vspltisb	v8, 8
+	vsldoi	v8, v0, v8, 1
+	VSUBUDM_V9_V8
+	/* Shift and remove junk after null character.  */
+#ifdef __LITTLE_ENDIAN__
+	vslo	v5, v5, v9
+	vslo	v4, v4, v9
+#else
+	vsro	v5, v5, v9
+	vsro	v4, v4, v9
+#endif
+	/* Convert and compare 16 bytes.  */
+	TOLOWER()
+	blt	cr6, L(retnull)
+	b	L(different)
+	.align  4
+L(retnull):
+	li	rRTN, 0
+	blr
+	.align  4
+L(bytebybyte):
+	/* Unrolling loop for POWER: loads are done with 'lbz' plus
+	offset and string descriptors are only updated in the end
+	of loop unrolling. */
+	ld	rLOC, LOCALE_CTYPE_TOLOWER(rLOC)
+	lbz	rCHAR1, 0(rSTR1)	/* Load char from s1 */
+	lbz	rCHAR2, 0(rSTR2)	/* Load char from s2 */
+#ifdef USE_AS_STRNCASECMP
+	rldicl	rTMP, r5, 62, 2
+	cmpdi	cr7, rTMP, 0
+	beq	cr7, L(lessthan4)
+	mtctr	rTMP
+#endif
+L(loop):
+	cmpdi	rCHAR1, 0		/* *s1 == '\0' ? */
+	sldi	rADDR1, rCHAR1, 2	/* Calculate address for tolower(*s1) */
+	sldi	rADDR2, rCHAR2, 2	/* Calculate address for tolower(*s2) */
+	lwzx	rLWR1, rLOC, rADDR1	/* Load tolower(*s1) */
+	lwzx	rLWR2, rLOC, rADDR2	/* Load tolower(*s2) */
+	cmpw	cr1, rLWR1, rLWR2	/* r = tolower(*s1) == tolower(*s2) ? */
+	crorc	4*cr1+eq,eq,4*cr1+eq	/* (*s1 != '\0') || (r == 1) */
+	beq	cr1, L(done)
+	lbz	rCHAR1, 1(rSTR1)
+	lbz	rCHAR2, 1(rSTR2)
+	cmpdi	rCHAR1, 0
+	sldi	rADDR1, rCHAR1, 2
+	sldi	rADDR2, rCHAR2, 2
+	lwzx	rLWR1, rLOC, rADDR1
+	lwzx	rLWR2, rLOC, rADDR2
+	cmpw	cr1, rLWR1, rLWR2
+	crorc	4*cr1+eq,eq,4*cr1+eq
+	beq	cr1, L(done)
+	lbz	rCHAR1, 2(rSTR1)
+	lbz	rCHAR2, 2(rSTR2)
+	cmpdi	rCHAR1, 0
+	sldi	rADDR1, rCHAR1, 2
+	sldi	rADDR2, rCHAR2, 2
+	lwzx	rLWR1, rLOC, rADDR1
+	lwzx	rLWR2, rLOC, rADDR2
+	cmpw	cr1, rLWR1, rLWR2
+	crorc	4*cr1+eq,eq,4*cr1+eq
+	beq	cr1, L(done)
+	lbz	rCHAR1, 3(rSTR1)
+	lbz	rCHAR2, 3(rSTR2)
+	cmpdi	rCHAR1, 0
+	/* Increment both string descriptors */
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+	sldi	rADDR1, rCHAR1, 2
+	sldi	rADDR2, rCHAR2, 2
+	lwzx	rLWR1, rLOC, rADDR1
+	lwzx	rLWR2, rLOC, rADDR2
+	cmpw	cr1, rLWR1, rLWR2
+	crorc	4*cr1+eq,eq,4*cr1+eq
+	beq     cr1, L(done)
+	lbz	rCHAR1, 0(rSTR1)	/* Load char from s1 */
+	lbz	rCHAR2, 0(rSTR2)	/* Load char from s2 */
+#ifdef USE_AS_STRNCASECMP
+	bdnz	L(loop)
+#else
+	b	L(loop)
+#endif
+#ifdef USE_AS_STRNCASECMP
+L(lessthan4):
+	clrldi	r5, r5, 62
+	cmpdi	cr7, r5, 0
+	beq	cr7, L(retnull)
+	mtctr	r5
+L(loop1):
+	cmpdi	rCHAR1, 0
+	sldi	rADDR1, rCHAR1, 2
+	sldi	rADDR2, rCHAR2, 2
+	lwzx	rLWR1, rLOC, rADDR1
+	lwzx	rLWR2, rLOC, rADDR2
+	cmpw	cr1, rLWR1, rLWR2
+	crorc	4*cr1+eq,eq,4*cr1+eq
+	beq	cr1, L(done)
+	addi	rSTR1, rSTR1, 1
+	addi	rSTR2, rSTR2, 1
+	lbz	rCHAR1, 0(rSTR1)
+	lbz	rCHAR2, 0(rSTR2)
+	bdnz	L(loop1)
+#endif
+L(done):
+	subf	r0, rLWR2, rLWR1
+	extsw	rRTN, r0
+	blr
+END (__STRCASECMP)
+
+weak_alias (__STRCASECMP, STRCASECMP)
+libc_hidden_builtin_def (__STRCASECMP)
diff --git a/sysdeps/powerpc/powerpc64/power8/strncase.S b/sysdeps/powerpc/powerpc64/power8/strncase.S
new file mode 100644
index 0000000..7ce2ed0
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power8/strncase.S
@@ -0,0 +1,20 @@
+/* Optimized strncasecmp implementation for POWER8.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define USE_AS_STRNCASECMP 1
+#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S>
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8
  2016-04-29  8:51 [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 Rajalakshmi Srinivasaraghavan
@ 2016-06-13 19:14 ` Tulio Magno Quites Machado Filho
  2016-06-14  9:46   ` Rajalakshmi Srinivasaraghavan
  0 siblings, 1 reply; 11+ messages in thread
From: Tulio Magno Quites Machado Filho @ 2016-06-13 19:14 UTC (permalink / raw)
  To: Rajalakshmi Srinivasaraghavan, libc-alpha

Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> writes:

> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c
> new file mode 100644
> index 0000000..90f75ce
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c
> @@ -0,0 +1,25 @@
> +/* Multiarch strcasecmp for PPC64.
> +   Copyright (C) 2016 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <string.h>
> +
> +extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden;

Can't you remove this include and this prototype?

> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
> index 1f22336..2b554dc 100644
> --- a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
> +++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
> @@ -1,4 +1,4 @@
> -/* Multiple versions of strcasecmp.
> +/* Multiple versions of strcasecmp
>     Copyright (C) 2013-2016 Free Software Foundation, Inc.
>     This file is part of the GNU C Library.
>
> @@ -16,25 +16,22 @@
>     License along with the GNU C Library; if not, see
>     <http://www.gnu.org/licenses/>.  */
>
> -#if IS_IN (libc)
> -# include <string.h>
> -# define strcasecmp __strcasecmp_ppc
> -extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden;
> -extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden;
> -#endif
> +#include <string.h>
> +#include <shlib-compat.h>
> +#include "init-arch.h"
>
> -#include <string/strcasecmp.c>
>  #undef strcasecmp

Can't you remove this undef too?

> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c
> new file mode 100644
> index 0000000..3123965
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c
> @@ -0,0 +1,25 @@
> +/* Multiarch strncasecmp for PPC64.
> +   Copyright (C) 2016 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <string.h>
> +
> +extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;

Likewise.

> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase.c b/sysdeps/powerpc/powerpc64/multiarch/strncase.c
> index 2729fce..7b8e7d3 100644
> --- a/sysdeps/powerpc/powerpc64/multiarch/strncase.c
> +++ b/sysdeps/powerpc/powerpc64/multiarch/strncase.c
> @@ -16,26 +16,22 @@
>     License along with the GNU C Library; if not, see
>     <http://www.gnu.org/licenses/>.  */
>
> -#if IS_IN (libc)
> -# include <string.h>
> -# define strncasecmp __strncasecmp_ppc
> -extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;
> -extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden;
> -#endif
> +#include <string.h>
> +#include <shlib-compat.h>
> +#include "init-arch.h"
>
> -#include <string/strncase.c>
>  #undef strncasecmp

Likewise

LGTM after removing those lines.

-- 
Tulio Magno

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8
  2016-06-13 19:14 ` Tulio Magno Quites Machado Filho
@ 2016-06-14  9:46   ` Rajalakshmi Srinivasaraghavan
  2016-07-04 14:16     ` Florian Weimer
  0 siblings, 1 reply; 11+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-06-14  9:46 UTC (permalink / raw)
  To: libc-alpha; +Cc: tuliom



On 06/14/2016 12:43 AM, Tulio Magno Quites Machado Filho wrote:
> Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> writes:
>
>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c
>> new file mode 100644
>> index 0000000..90f75ce
>> --- /dev/null
>> +++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c
>> @@ -0,0 +1,25 @@
>> +/* Multiarch strcasecmp for PPC64.
>> +   Copyright (C) 2016 Free Software Foundation, Inc.
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <http://www.gnu.org/licenses/>.  */
>> +
>> +#include <string.h>
>> +
>> +extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden;
> Can't you remove this include and this prototype?
>
>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
>> index 1f22336..2b554dc 100644
>> --- a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
>> +++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
>> @@ -1,4 +1,4 @@
>> -/* Multiple versions of strcasecmp.
>> +/* Multiple versions of strcasecmp
>>      Copyright (C) 2013-2016 Free Software Foundation, Inc.
>>      This file is part of the GNU C Library.
>>
>> @@ -16,25 +16,22 @@
>>      License along with the GNU C Library; if not, see
>>      <http://www.gnu.org/licenses/>.  */
>>
>> -#if IS_IN (libc)
>> -# include <string.h>
>> -# define strcasecmp __strcasecmp_ppc
>> -extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden;
>> -extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden;
>> -#endif
>> +#include <string.h>
>> +#include <shlib-compat.h>
>> +#include "init-arch.h"
>>
>> -#include <string/strcasecmp.c>
>>   #undef strcasecmp
> Can't you remove this undef too?
>
>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c
>> new file mode 100644
>> index 0000000..3123965
>> --- /dev/null
>> +++ b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c
>> @@ -0,0 +1,25 @@
>> +/* Multiarch strncasecmp for PPC64.
>> +   Copyright (C) 2016 Free Software Foundation, Inc.
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <http://www.gnu.org/licenses/>.  */
>> +
>> +#include <string.h>
>> +
>> +extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;
> Likewise.
>
>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase.c b/sysdeps/powerpc/powerpc64/multiarch/strncase.c
>> index 2729fce..7b8e7d3 100644
>> --- a/sysdeps/powerpc/powerpc64/multiarch/strncase.c
>> +++ b/sysdeps/powerpc/powerpc64/multiarch/strncase.c
>> @@ -16,26 +16,22 @@
>>      License along with the GNU C Library; if not, see
>>      <http://www.gnu.org/licenses/>.  */
>>
>> -#if IS_IN (libc)
>> -# include <string.h>
>> -# define strncasecmp __strncasecmp_ppc
>> -extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;
>> -extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden;
>> -#endif
>> +#include <string.h>
>> +#include <shlib-compat.h>
>> +#include "init-arch.h"
>>
>> -#include <string/strncase.c>
>>   #undef strncasecmp
> Likewise
>
> LGTM after removing those lines.
Committed as c8376f3e07602aaef9cb843bb73cb5f2b860634a
after removing those lines.
>
>

-- 
Thanks
Rajalakshmi S

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8
  2016-06-14  9:46   ` Rajalakshmi Srinivasaraghavan
@ 2016-07-04 14:16     ` Florian Weimer
  2016-07-05 12:09       ` [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327] Rajalakshmi Srinivasaraghavan
  0 siblings, 1 reply; 11+ messages in thread
From: Florian Weimer @ 2016-07-04 14:16 UTC (permalink / raw)
  To: Rajalakshmi Srinivasaraghavan, libc-alpha; +Cc: tuliom

On 06/14/2016 11:45 AM, Rajalakshmi Srinivasaraghavan wrote:

> Committed as c8376f3e07602aaef9cb843bb73cb5f2b860634a
> after removing those lines.

This appears to have caused bug 20327.

Could you have a look?

Thanks,
Florian

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327]
  2016-07-04 14:16     ` Florian Weimer
@ 2016-07-05 12:09       ` Rajalakshmi Srinivasaraghavan
  2016-07-05 12:15         ` Florian Weimer
  2016-07-05 14:01         ` Tulio Magno Quites Machado Filho
  0 siblings, 2 replies; 11+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-07-05 12:09 UTC (permalink / raw)
  To: Florian Weimer, libc-alpha

[-- Attachment #1: Type: text/plain, Size: 363 bytes --]



On 07/04/2016 07:46 PM, Florian Weimer wrote:
> On 06/14/2016 11:45 AM, Rajalakshmi Srinivasaraghavan wrote:
>
>> Committed as c8376f3e07602aaef9cb843bb73cb5f2b860634a
>> after removing those lines.
>
> This appears to have caused bug 20327.
>
> Could you have a look?
>
> Thanks,
> Florian
>
>
I have fixed it in the attached patch.

-- 
Thanks
Rajalakshmi S


[-- Attachment #2: 0001-POWER8-Fix-return-code-of-strcasecmp-for-unaligned-i.patch --]
[-- Type: text/x-patch, Size: 1490 bytes --]

From 42c2aeecdb371b7544b81cb09cfcb12c2f873c72 Mon Sep 17 00:00:00 2001
From: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Date: Tue, 5 Jul 2016 04:41:51 -0400
Subject: [PATCH] POWER8: Fix return code of strcasecmp for unaligned inputs

If the input values are unaligned and if there are null characters in the
memory before the starting address of the input values, strcasecmp
gives incorrect return code. Fixed it by adding mask the bits that
are not part of the string.

Tested on ppc64 and ppc64le.

	[BZ #20327]
	* sysdeps/powerpc/powerpc64/power8/strcasecmp.S: Mask bits that
	are not part of the string.
---
 sysdeps/powerpc/powerpc64/power8/strcasecmp.S | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
index 63f6217..d6a4df2 100644
--- a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
+++ b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
@@ -44,7 +44,9 @@
 #ifdef __LITTLE_ENDIAN__
 #define GET16BYTES(reg1, reg2, reg3) \
 	lvx	reg1, 0, reg2; \
-	vcmpequb.	v8, v0, reg1; \
+	vspltisb	v8, -1; \
+	vperm	v8, v8, reg1, reg3; \
+	vcmpequb.	v8, v0, v8; \
 	beq	cr6, 1f; \
 	vspltisb	v9, 0; \
 	b	2f; \
@@ -57,7 +59,9 @@
 #else
 #define GET16BYTES(reg1, reg2, reg3) \
 	lvx	reg1, 0, reg2; \
-	vcmpequb.	v8, v0, reg1; \
+	vspltisb	 v8, -1; \
+	vperm	v8, reg1, v8,  reg3; \
+	vcmpequb.	v8, v0, v8; \
 	beq	cr6, 1f; \
 	vspltisb	v9, 0; \
 	b	2f; \
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327]
  2016-07-05 12:09       ` [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327] Rajalakshmi Srinivasaraghavan
@ 2016-07-05 12:15         ` Florian Weimer
  2016-07-05 14:01         ` Tulio Magno Quites Machado Filho
  1 sibling, 0 replies; 11+ messages in thread
From: Florian Weimer @ 2016-07-05 12:15 UTC (permalink / raw)
  To: Rajalakshmi Srinivasaraghavan; +Cc: libc-alpha

On 07/05/2016 02:09 PM, Rajalakshmi Srinivasaraghavan wrote:

> On 07/04/2016 07:46 PM, Florian Weimer wrote:
>> On 06/14/2016 11:45 AM, Rajalakshmi Srinivasaraghavan wrote:
>>
>>> Committed as c8376f3e07602aaef9cb843bb73cb5f2b860634a
>>> after removing those lines.
>>
>> This appears to have caused bug 20327.
>>
>> Could you have a look?
>>
>> Thanks,
>> Florian
>>
>>
> I have fixed it in the attached patch.

I can confirm this addresses the issue we saw.  Feel free to push if you 
feel confident about this change. :)

Thanks,
Florian

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327]
  2016-07-05 12:09       ` [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327] Rajalakshmi Srinivasaraghavan
  2016-07-05 12:15         ` Florian Weimer
@ 2016-07-05 14:01         ` Tulio Magno Quites Machado Filho
  2016-07-05 14:05           ` Tulio Magno Quites Machado Filho
                             ` (2 more replies)
  1 sibling, 3 replies; 11+ messages in thread
From: Tulio Magno Quites Machado Filho @ 2016-07-05 14:01 UTC (permalink / raw)
  To: Rajalakshmi Srinivasaraghavan, Florian Weimer, libc-alpha,
	Adhemerval Zanella

Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> writes:

> On 07/04/2016 07:46 PM, Florian Weimer wrote:
>> On 06/14/2016 11:45 AM, Rajalakshmi Srinivasaraghavan wrote:
> Subject: [PATCH] POWER8: Fix return code of strcasecmp for unaligned inputs

Could you replace POWER8 by powerpc, please?

> If the input values are unaligned and if there are null characters in the
> memory before the starting address of the input values, strcasecmp
> gives incorrect return code. Fixed it by adding mask the bits that
> are not part of the string.
>
> Tested on ppc64 and ppc64le.

Despite this being a bug fix, I believe we need the approval from Adhemerval
before integrating it during the freeze window.

> 	[BZ #20327]
> 	* sysdeps/powerpc/powerpc64/power8/strcasecmp.S: Mask bits that
> 	are not part of the string.

This is a very important case.  Can we improve the current testcase to
validate this scenario too?

> ---
>  sysdeps/powerpc/powerpc64/power8/strcasecmp.S | 8 ++++++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
> index 63f6217..d6a4df2 100644
> --- a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
> +++ b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
> @@ -44,7 +44,9 @@
>  #ifdef __LITTLE_ENDIAN__
>  #define GET16BYTES(reg1, reg2, reg3) \
>  	lvx	reg1, 0, reg2; \
> -	vcmpequb.	v8, v0, reg1; \
> +	vspltisb	v8, -1; \
> +	vperm	v8, v8, reg1, reg3; \
> +	vcmpequb.	v8, v0, v8; \
>  	beq	cr6, 1f; \
>  	vspltisb	v9, 0; \
>  	b	2f; \
> @@ -57,7 +59,9 @@
>  #else
>  #define GET16BYTES(reg1, reg2, reg3) \
>  	lvx	reg1, 0, reg2; \
> -	vcmpequb.	v8, v0, reg1; \
> +	vspltisb	 v8, -1; \
> +	vperm	v8, reg1, v8,  reg3; \
> +	vcmpequb.	v8, v0, v8; \
>  	beq	cr6, 1f; \
>  	vspltisb	v9, 0; \
>  	b	2f; \

Although this code is simple, I believe this macro is missing more comments.

I suggest to explain the following:
 - How does this macro use reg1, reg2, reg3 and v8?
 - Why is it setting v9 to 0?

-- 
Tulio Magno

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327]
  2016-07-05 14:01         ` Tulio Magno Quites Machado Filho
@ 2016-07-05 14:05           ` Tulio Magno Quites Machado Filho
  2016-07-05 15:34             ` Florian Weimer
  2016-07-05 14:33           ` Adhemerval Zanella
  2016-07-05 16:10           ` Rajalakshmi Srinivasaraghavan
  2 siblings, 1 reply; 11+ messages in thread
From: Tulio Magno Quites Machado Filho @ 2016-07-05 14:05 UTC (permalink / raw)
  To: Rajalakshmi Srinivasaraghavan, Florian Weimer, libc-alpha,
	Adhemerval Zanella

Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com> writes:

> Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> writes:
>
>> On 07/04/2016 07:46 PM, Florian Weimer wrote:
>>> On 06/14/2016 11:45 AM, Rajalakshmi Srinivasaraghavan wrote:
>> 	[BZ #20327]
>> 	* sysdeps/powerpc/powerpc64/power8/strcasecmp.S: Mask bits that
>> 	are not part of the string.
>
> This is a very important case.  Can we improve the current testcase to
> validate this scenario too?

Ooops.  I just read in the bug report that Florian is already working on this,
so feel free to ignore this.

-- 
Tulio Magno

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327]
  2016-07-05 14:01         ` Tulio Magno Quites Machado Filho
  2016-07-05 14:05           ` Tulio Magno Quites Machado Filho
@ 2016-07-05 14:33           ` Adhemerval Zanella
  2016-07-05 16:10           ` Rajalakshmi Srinivasaraghavan
  2 siblings, 0 replies; 11+ messages in thread
From: Adhemerval Zanella @ 2016-07-05 14:33 UTC (permalink / raw)
  To: Tulio Magno Quites Machado Filho, Rajalakshmi Srinivasaraghavan,
	Florian Weimer, libc-alpha



On 05/07/2016 11:01, Tulio Magno Quites Machado Filho wrote:
> Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> writes:
> 
>> On 07/04/2016 07:46 PM, Florian Weimer wrote:
>>> On 06/14/2016 11:45 AM, Rajalakshmi Srinivasaraghavan wrote:
>> Subject: [PATCH] POWER8: Fix return code of strcasecmp for unaligned inputs
> 
> Could you replace POWER8 by powerpc, please?
> 
>> If the input values are unaligned and if there are null characters in the
>> memory before the starting address of the input values, strcasecmp
>> gives incorrect return code. Fixed it by adding mask the bits that
>> are not part of the string.
>>
>> Tested on ppc64 and ppc64le.
> 
> Despite this being a bug fix, I believe we need the approval from Adhemerval
> before integrating it during the freeze window.
> 

Bugfixes are ok for current phase and I was sorting out the release blockers
yesterday.  I will send a message about it in a couple of hours.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327]
  2016-07-05 14:05           ` Tulio Magno Quites Machado Filho
@ 2016-07-05 15:34             ` Florian Weimer
  0 siblings, 0 replies; 11+ messages in thread
From: Florian Weimer @ 2016-07-05 15:34 UTC (permalink / raw)
  To: Tulio Magno Quites Machado Filho, Rajalakshmi Srinivasaraghavan,
	libc-alpha, Adhemerval Zanella

On 07/05/2016 04:05 PM, Tulio Magno Quites Machado Filho wrote:
> Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com> writes:
>
>> Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> writes:
>>
>>> On 07/04/2016 07:46 PM, Florian Weimer wrote:
>>>> On 06/14/2016 11:45 AM, Rajalakshmi Srinivasaraghavan wrote:
>>> 	[BZ #20327]
>>> 	* sysdeps/powerpc/powerpc64/power8/strcasecmp.S: Mask bits that
>>> 	are not part of the string.
>>
>> This is a very important case.  Can we improve the current testcase to
>> validate this scenario too?
>
> Ooops.  I just read in the bug report that Florian is already working on this,
> so feel free to ignore this.

Yeah, the proposed patch is here:

   <https://sourceware.org/ml/libc-alpha/2016-07/msg00126.html>

Thanks,
Florian

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327]
  2016-07-05 14:01         ` Tulio Magno Quites Machado Filho
  2016-07-05 14:05           ` Tulio Magno Quites Machado Filho
  2016-07-05 14:33           ` Adhemerval Zanella
@ 2016-07-05 16:10           ` Rajalakshmi Srinivasaraghavan
  2 siblings, 0 replies; 11+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-07-05 16:10 UTC (permalink / raw)
  To: libc-alpha



On 07/05/2016 07:31 PM, Tulio Magno Quites Machado Filho wrote:
> Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> writes:
>
>> On 07/04/2016 07:46 PM, Florian Weimer wrote:
>>> On 06/14/2016 11:45 AM, Rajalakshmi Srinivasaraghavan wrote:
>> Subject: [PATCH] POWER8: Fix return code of strcasecmp for unaligned inputs
> Could you replace POWER8 by powerpc, please?
>
>> If the input values are unaligned and if there are null characters in the
>> memory before the starting address of the input values, strcasecmp
>> gives incorrect return code. Fixed it by adding mask the bits that
>> are not part of the string.
>>
>> Tested on ppc64 and ppc64le.
> Despite this being a bug fix, I believe we need the approval from Adhemerval
> before integrating it during the freeze window.
>
>> 	[BZ #20327]
>> 	* sysdeps/powerpc/powerpc64/power8/strcasecmp.S: Mask bits that
>> 	are not part of the string.
> This is a very important case.  Can we improve the current testcase to
> validate this scenario too?
>
>> ---
>>   sysdeps/powerpc/powerpc64/power8/strcasecmp.S | 8 ++++++--
>>   1 file changed, 6 insertions(+), 2 deletions(-)
>>
>> diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
>> index 63f6217..d6a4df2 100644
>> --- a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
>> +++ b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
>> @@ -44,7 +44,9 @@
>>   #ifdef __LITTLE_ENDIAN__
>>   #define GET16BYTES(reg1, reg2, reg3) \
>>   	lvx	reg1, 0, reg2; \
>> -	vcmpequb.	v8, v0, reg1; \
>> +	vspltisb	v8, -1; \
>> +	vperm	v8, v8, reg1, reg3; \
>> +	vcmpequb.	v8, v0, v8; \
>>   	beq	cr6, 1f; \
>>   	vspltisb	v9, 0; \
>>   	b	2f; \
>> @@ -57,7 +59,9 @@
>>   #else
>>   #define GET16BYTES(reg1, reg2, reg3) \
>>   	lvx	reg1, 0, reg2; \
>> -	vcmpequb.	v8, v0, reg1; \
>> +	vspltisb	 v8, -1; \
>> +	vperm	v8, reg1, v8,  reg3; \
>> +	vcmpequb.	v8, v0, v8; \
>>   	beq	cr6, 1f; \
>>   	vspltisb	v9, 0; \
>>   	b	2f; \
> Although this code is simple, I believe this macro is missing more comments.
>
> I suggest to explain the following:
>   - How does this macro use reg1, reg2, reg3 and v8?
>   - Why is it setting v9 to 0?
>
Added comments and Committed it as
30e4cc5413f72c2c728a544389da0c48500d9904

-- 
Thanks
Rajalakshmi S

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2016-07-05 16:10 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-29  8:51 [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 Rajalakshmi Srinivasaraghavan
2016-06-13 19:14 ` Tulio Magno Quites Machado Filho
2016-06-14  9:46   ` Rajalakshmi Srinivasaraghavan
2016-07-04 14:16     ` Florian Weimer
2016-07-05 12:09       ` [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 [BZ 20327] Rajalakshmi Srinivasaraghavan
2016-07-05 12:15         ` Florian Weimer
2016-07-05 14:01         ` Tulio Magno Quites Machado Filho
2016-07-05 14:05           ` Tulio Magno Quites Machado Filho
2016-07-05 15:34             ` Florian Weimer
2016-07-05 14:33           ` Adhemerval Zanella
2016-07-05 16:10           ` Rajalakshmi Srinivasaraghavan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).