This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch master updated. glibc-2.19-104-gc7debbd


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  c7debbdfacbef150aaf9113eb05ccaf2b9e7af6c (commit)
      from  235eed8644eb2deebf839b13b418dbd47bb14c5a (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=c7debbdfacbef150aaf9113eb05ccaf2b9e7af6c

commit c7debbdfacbef150aaf9113eb05ccaf2b9e7af6c
Author: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Date:   Mon Mar 3 08:06:41 2014 -0600

    PowerPC: strrchr optimization for POWER7/PPC64
    
    This patch optimizes strrchr() for ppc64. It uses aligned memory
    access along with cmpb instruction and CPU prefetch to avoid
    cache misses for speed improvement.

diff --git a/ChangeLog b/ChangeLog
index 45eb5ab..b4a7daa 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2014-03-03  Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
+
+	* sysdeps/powerpc/powerpc64/power7/strrchr.S: New file.
+	* sysdeps/powerpc/powerpc64/multiarch/Makefile: Add strrchr multiarch
+	implementation.
+	* sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c:
+	(__libc_ifunc_impl_list): Likewise.
+	* sysdeps/powerpc/powerpc64/multiarch/strrchr.c: New file.
+	* sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c: New file.
+	* sysdeps/powerpc/powerpc64/multiarch/strrchr-power7.S: New file.
+	* string/strrchr.c: Define STRRCHR.
+
 2014-02-28  OndÅ?ej Bílka  <neleai@seznam.cz>
 
 	* benchtest/bench-strtok.c (simple_strtok): Delete.
diff --git a/string/strrchr.c b/string/strrchr.c
index b5b4bc6..47ff08c 100644
--- a/string/strrchr.c
+++ b/string/strrchr.c
@@ -19,9 +19,13 @@
 
 #undef strrchr
 
+#ifndef STRRCHR
+# define STRRCHR strrchr
+#endif
+
 /* Find the last occurrence of C in S.  */
 char *
-strrchr (const char *s, int c)
+STRRCHR (const char *s, int c)
 {
   const char *found, *p;
 
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index 3c47316..d09f2e3 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -13,7 +13,8 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \
 		   wcschr-power6 wcschr-ppc64 wcsrchr-power7 wcsrchr-power6 \
 		   wcsrchr-ppc64 wcscpy-power7 wcscpy-power6 wcscpy-ppc64 \
 		   wordcopy-power7 wordcopy-power6 wordcopy-ppc64 \
-		   strcpy-power7 strcpy-ppc64 stpcpy-power7 stpcpy-ppc64
+		   strcpy-power7 strcpy-ppc64 stpcpy-power7 stpcpy-ppc64 \
+		   strrchr-power7 strrchr-ppc64
 
 CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops
 CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index 6bbdd4e..8789483 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -238,5 +238,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, wcscpy, 1,
 			      __wcscpy_ppc))
 
+  /* Support sysdeps/powerpc/powerpc64/multiarch/strrchr.c.  */
+  IFUNC_IMPL (i, name, strrchr,
+	      IFUNC_IMPL_ADD (array, i, strrchr,
+			      hwcap & PPC_FEATURE_HAS_VSX,
+			      __strrchr_power7)
+	      IFUNC_IMPL_ADD (array, i, strrchr, 1,
+			      __strrchr_ppc))
+
   return i;
 }
diff --git a/string/strrchr.c b/sysdeps/powerpc/powerpc64/multiarch/strrchr-power7.S
similarity index 55%
copy from string/strrchr.c
copy to sysdeps/powerpc/powerpc64/multiarch/strrchr-power7.S
index b5b4bc6..78e15e3 100644
--- a/string/strrchr.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strrchr-power7.S
@@ -1,4 +1,5 @@
-/* Copyright (C) 1991-2014 Free Software Foundation, Inc.
+/* Optimized strrchr implementation for POWER7.
+   Copyright (C) 2014 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -15,35 +16,24 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <string.h>
+#include <sysdep.h>
 
-#undef strrchr
+#undef ENTRY
+#define ENTRY(name)						\
+  .section ".text";						\
+  ENTRY_2(__strrchr_power7)					\
+  .align ALIGNARG(2);						\
+  BODY_LABEL(__strrchr_power7):					\
+  cfi_startproc;						\
+  LOCALENTRY(__strrchr_power7)
 
-/* Find the last occurrence of C in S.  */
-char *
-strrchr (const char *s, int c)
-{
-  const char *found, *p;
+#undef END
+#define END(name)						\
+  cfi_endproc;							\
+  TRACEBACK(__strrchr_power7)					\
+  END_2(__strrchr_power7)
 
-  c = (unsigned char) c;
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
 
-  /* Since strchr is fast, we use it rather than the obvious loop.  */
-
-  if (c == '\0')
-    return strchr (s, '\0');
-
-  found = NULL;
-  while ((p = strchr (s, c)) != NULL)
-    {
-      found = p;
-      s = p + 1;
-    }
-
-  return (char *) found;
-}
-
-#ifdef weak_alias
-#undef rindex
-weak_alias (strrchr, rindex)
-#endif
-libc_hidden_builtin_def (strrchr)
+#include <sysdeps/powerpc/powerpc64/power7/strrchr.S>
diff --git a/string/strrchr.c b/sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c
similarity index 58%
copy from string/strrchr.c
copy to sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c
index b5b4bc6..5633a9f 100644
--- a/string/strrchr.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991-2014 Free Software Foundation, Inc.
+/* Copyright (C) 2014 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,33 +17,17 @@
 
 #include <string.h>
 
-#undef strrchr
-
-/* Find the last occurrence of C in S.  */
-char *
-strrchr (const char *s, int c)
-{
-  const char *found, *p;
-
-  c = (unsigned char) c;
-
-  /* Since strchr is fast, we use it rather than the obvious loop.  */
-
-  if (c == '\0')
-    return strchr (s, '\0');
-
-  found = NULL;
-  while ((p = strchr (s, c)) != NULL)
-    {
-      found = p;
-      s = p + 1;
-    }
+#define STRRCHR __strrchr_ppc
+#undef weak_alias
+#define weak_alias(name, aliasname) \
+  extern __typeof (__strrchr_ppc) aliasname \
+    __attribute__ ((weak, alias ("__strrchr_ppc")));
+#if !defined(NOT_IN_libc) && defined(SHARED)
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name) \
+  __hidden_ver1(__strrchr_ppc, __GI_strrchr, __strrchr_ppc);
+#endif
 
-  return (char *) found;
-}
+extern __typeof (strrchr) __strrchr_ppc attribute_hidden;
 
-#ifdef weak_alias
-#undef rindex
-weak_alias (strrchr, rindex)
-#endif
-libc_hidden_builtin_def (strrchr)
+#include <string/strrchr.c>
diff --git a/string/strrchr.c b/sysdeps/powerpc/powerpc64/multiarch/strrchr.c
similarity index 56%
copy from string/strrchr.c
copy to sysdeps/powerpc/powerpc64/multiarch/strrchr.c
index b5b4bc6..046162f 100644
--- a/string/strrchr.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strrchr.c
@@ -1,4 +1,5 @@
-/* Copyright (C) 1991-2014 Free Software Foundation, Inc.
+/* Multiple versions of strrchr. PowerPC64 version.
+   Copyright (C) 2014 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -15,35 +16,20 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <string.h>
-
-#undef strrchr
-
-/* Find the last occurrence of C in S.  */
-char *
-strrchr (const char *s, int c)
-{
-  const char *found, *p;
-
-  c = (unsigned char) c;
-
-  /* Since strchr is fast, we use it rather than the obvious loop.  */
-
-  if (c == '\0')
-    return strchr (s, '\0');
-
-  found = NULL;
-  while ((p = strchr (s, c)) != NULL)
-    {
-      found = p;
-      s = p + 1;
-    }
-
-  return (char *) found;
-}
-
-#ifdef weak_alias
-#undef rindex
+/* Define multiple versions only for definition in libc.  */
+#ifndef NOT_IN_libc
+# include <string.h>
+# include <shlib-compat.h>
+# include "init-arch.h"
+
+extern __typeof (strrchr) __strrchr_ppc attribute_hidden;
+extern __typeof (strrchr) __strrchr_power7 attribute_hidden;
+
+/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
+   ifunc symbol properly.  */
+libc_ifunc (strrchr,
+            (hwcap & PPC_FEATURE_HAS_VSX)
+            ? __strrchr_power7
+            : __strrchr_ppc);
 weak_alias (strrchr, rindex)
 #endif
-libc_hidden_builtin_def (strrchr)
diff --git a/sysdeps/powerpc/powerpc64/power7/strrchr.S b/sysdeps/powerpc/powerpc64/power7/strrchr.S
new file mode 100644
index 0000000..e4a76c8
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/strrchr.S
@@ -0,0 +1,255 @@
+/* Optimized strrchr implementation for PowerPC64/POWER7 using cmpb insn.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* int [r3] strrchr (char *s [r3], int c [r4])  */
+	.machine  power7
+ENTRY (strrchr)
+	CALL_MCOUNT 2
+	dcbt	0,r3
+	clrrdi	r8,r3,3	      /* Align the address to doubleword boundary.  */
+	cmpdi	cr7,r4,0
+	ld	r12,0(r8)     /* Load doubleword from memory.  */
+	li	r9,0	      /* used to store last occurence */
+	li	r0,0	      /* Doubleword with null chars to use
+				 with cmpb.  */
+
+	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
+
+	beq	cr7,L(null_match)
+
+	/* Replicate byte to doubleword.  */
+	insrdi	r4,r4,8,48
+	insrdi	r4,r4,16,32
+	insrdi	r4,r4,32,0
+
+	/* r4 is changed now ,if its passed as more chars
+	   check for null again */
+	cmpdi	cr7,r4,0
+	beq	cr7,L(null_match)
+	/* Now r4 has a doubleword of c bytes and r0 has
+	   a doubleword of null bytes.  */
+
+	cmpb	r10,r12,r4     /* Compare each byte against c byte.  */
+	cmpb	r11,r12,r0     /* Compare each byte against null byte.  */
+
+	/* Move the doublewords left and right to discard the bits that are
+	   not part of the string and bring them back as zeros.  */
+#ifdef __LITTLE_ENDIAN__
+	srd	r10,r10,r6
+	srd	r11,r11,r6
+	sld	r10,r10,r6
+	sld	r11,r11,r6
+#else
+	sld	r10,r10,r6
+	sld	r11,r11,r6
+	srd	r10,r10,r6
+	srd	r11,r11,r6
+#endif
+	or	r5,r10,r11    /* OR the results to speed things up.  */
+	cmpdi	cr7,r5,0      /* If r5 == 0, no c or null bytes
+				 have been found.  */
+	bne	cr7,L(done)
+
+L(align):
+	mtcrf	0x01,r8
+
+	/* Are we now aligned to a doubleword boundary?  If so, skip to
+	   the main loop.  Otherwise, go through the alignment code.  */
+
+	bt	28,L(loop)
+
+	/* Handle WORD2 of pair.  */
+	ldu	r12,8(r8)
+	cmpb	r10,r12,r4
+	cmpb	r11,r12,r0
+	or	r5,r10,r11
+	cmpdi	cr7,r5,0
+	bne	cr7,L(done)
+	b	L(loop)	      /* We branch here (rather than falling through)
+				 to skip the nops due to heavy alignment
+				 of the loop below.  */
+	.p2align  5
+L(loop):
+	/* Load two doublewords, compare and merge in a
+	   single register for speed.  This is an attempt
+	   to speed up the null-checking process for bigger strings.  */
+	ld	r12,8(r8)
+	ldu	r7,16(r8)
+	cmpb	r10,r12,r4
+	cmpb	r11,r12,r0
+	cmpb	r6,r7,r4
+	cmpb	r7,r7,r0
+	or	r12,r10,r11
+	or	r5,r6,r7
+	or	r5,r12,r5
+	cmpdi	cr7,r5,0
+	beq	cr7,L(loop)
+
+	/* OK, one (or both) of the doublewords contains a c/null byte.  Check
+	   the first doubleword and decrement the address in case the first
+	   doubleword really contains a c/null byte.  */
+	cmpdi	cr6,r12,0
+	addi	r8,r8,-8
+	bne	cr6,L(done)
+
+	/* The c/null byte must be in the second doubleword.  Adjust the
+	   address again and move the result of cmpb to r10 so we can calculate
+	   the pointer.  */
+
+	mr	r10,r6
+	mr	r11,r7
+	addi	r8,r8,8
+
+	/* r10/r11 have the output of the cmpb instructions, that is,
+	   0xff in the same position as the c/null byte in the original
+	   doubleword from the string.  Use that to calculate the pointer.  */
+
+L(done):
+	/* if there are more than one 0xff in r11, find the first pos of ff
+	   in r11 and fill r10 with 0 from that position */
+	cmpdi	cr7,r11,0
+	beq	cr7,L(no_null)
+#ifdef __LITTLE_ENDIAN__
+	addi	r3,r11,-1
+	andc	r3,r3,r11
+	popcntd r0,r3
+#else
+	cntlzd	r0,r11
+#endif
+	subfic	r0,r0,63
+	li	r6,-1
+#ifdef __LITTLE_ENDIAN__
+	srd	r0,r6,r0
+#else
+	sld	r0,r6,r0
+#endif
+	and	r10,r0,r10
+L(no_null):
+#ifdef __LITTLE_ENDIAN__
+	cntlzd	r0,r10		/* Count leading zeros before c matches.  */
+	addi	r3,r10,-1
+	andc	r3,r3,r10
+	addi	r10,r11,-1
+	andc	r10,r10,r11
+	cmpld	cr7,r3,r10
+	bgt	cr7,L(no_match)
+#else
+	addi	r3,r10,-1	/* Count trailing zeros before c matches.  */
+	andc	r3,r3,r10
+	popcntd	r0,r3
+	cmpld	cr7,r11,r10
+	bgt	cr7,L(no_match)
+#endif
+	srdi	r0,r0,3		/* Convert trailing zeros to bytes.  */
+	subfic	r0,r0,7
+	add	r9,r8,r0      /* Return address of the matching c byte
+				 or null in case c was not found.  */
+	li	r0,0
+	cmpdi	cr7,r11,0     /* If r11 == 0, no null's have been found.  */
+	beq	cr7,L(align)
+
+	.align	4
+L(no_match):
+	mr	r3,r9
+	blr
+
+/* We are here because strrchr was called with a null byte.  */
+	.align	4
+L(null_match):
+	/* r0 has a doubleword of null bytes.  */
+
+	cmpb	r5,r12,r0     /* Compare each byte against null bytes.  */
+
+	/* Move the doublewords left and right to discard the bits that are
+	   not part of the string and bring them back as zeros.  */
+#ifdef __LITTLE_ENDIAN__
+	srd	r5,r5,r6
+	sld	r5,r5,r6
+#else
+	sld	r5,r5,r6
+	srd	r5,r5,r6
+#endif
+	cmpdi	cr7,r5,0      /* If r10 == 0, no c or null bytes
+				 have been found.  */
+	bne	cr7,L(done_null)
+
+	mtcrf	0x01,r8
+
+	/* Are we now aligned to a quadword boundary?  If so, skip to
+	   the main loop.  Otherwise, go through the alignment code.  */
+
+	bt	28,L(loop_null)
+
+	/* Handle WORD2 of pair.  */
+	ldu	r12,8(r8)
+	cmpb	r5,r12,r0
+	cmpdi	cr7,r5,0
+	bne	cr7,L(done_null)
+	b	L(loop_null)  /* We branch here (rather than falling through)
+				 to skip the nops due to heavy alignment
+				 of the loop below.  */
+
+	/* Main loop to look for the end of the string.  Since it's a
+	   small loop (< 8 instructions), align it to 32-bytes.  */
+	.p2align  5
+L(loop_null):
+	/* Load two doublewords, compare and merge in a
+	   single register for speed.  This is an attempt
+	   to speed up the null-checking process for bigger strings.  */
+	ld	r12,8(r8)
+	ldu	r11,16(r8)
+	cmpb	r5,r12,r0
+	cmpb	r10,r11,r0
+	or	r6,r5,r10
+	cmpdi	cr7,r6,0
+	beq	cr7,L(loop_null)
+
+	/* OK, one (or both) of the doublewords contains a null byte.  Check
+	   the first doubleword and decrement the address in case the first
+	   doubleword really contains a null byte.  */
+
+	cmpdi	cr6,r5,0
+	addi	r8,r8,-8
+	bne	cr6,L(done_null)
+
+	/* The null byte must be in the second doubleword.  Adjust the address
+	   again and move the result of cmpb to r10 so we can calculate the
+	   pointer.  */
+
+	mr	r5,r10
+	addi	r8,r8,8
+
+	/* r5 has the output of the cmpb instruction, that is, it contains
+	   0xff in the same position as the null byte in the original
+	   doubleword from the string.  Use that to calculate the pointer.  */
+L(done_null):
+#ifdef __LITTLE_ENDIAN__
+	addi	r0,r5,-1
+	andc	r0,r0,r5
+	popcntd	r0,r0
+#else
+	cntlzd	r0,r5	      /* Count leading zeros before the match.  */
+#endif
+	srdi	r0,r0,3	      /* Convert trailing zeros to bytes.  */
+	add	r3,r8,r0      /* Return address of the matching null byte.  */
+	blr
+END (strrchr)
+weak_alias (strrchr, rindex)
+libc_hidden_builtin_def (strrchr)

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                          |   12 +
 string/strrchr.c                                   |    6 +-
 sysdeps/powerpc/powerpc64/multiarch/Makefile       |    3 +-
 .../powerpc/powerpc64/multiarch/ifunc-impl-list.c  |    8 +
 .../powerpc/powerpc64/multiarch/strrchr-power7.S   |   39 +++
 .../powerpc/powerpc64/multiarch/strrchr-ppc64.c    |   33 +++
 sysdeps/powerpc/powerpc64/multiarch/strrchr.c      |   35 +++
 sysdeps/powerpc/powerpc64/power7/strrchr.S         |  255 ++++++++++++++++++++
 8 files changed, 389 insertions(+), 2 deletions(-)
 create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strrchr-power7.S
 create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c
 create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strrchr.c
 create mode 100644 sysdeps/powerpc/powerpc64/power7/strrchr.S


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]