This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch, master, updated. glibc-2.14-429-gfc2ee42


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  fc2ee42abe595bbf6b8bbf0637648ad8b5d4faab (commit)
      from  09229f3e1b617d9dcfa3227f32bb72436d7fcac4 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=fc2ee42abe595bbf6b8bbf0637648ad8b5d4faab

commit fc2ee42abe595bbf6b8bbf0637648ad8b5d4faab
Author: Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
Date:   Sun Oct 23 15:17:23 2011 -0400

    Add optimized wcslen and strnlen for x86-32

diff --git a/ChangeLog b/ChangeLog
index 542869b..c538e40 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2011-10-23  Ulrich Drepper  <drepper@gmail.com>
+
+	* string/strnlen.c: Define and use STRNLEN macro.
+	* sysdeps/i386/i686/multiarch/Makefile [string] (sysdep_routines):
+	Add strnlen-sse2, strnlen-c, wcslen-sse2, and wcslen-c.
+	* sysdeps/i386/i686/multiarch/strlen-sse2.S: Add support for strnlen.
+	* wcsmbs/wcslen.c: Define and use WCSLEN.
+	* sysdeps/i386/i686/multiarch/strnlen-c.c: New file.
+	* sysdeps/i386/i686/multiarch/strnlen-sse2.S: New file.
+	* sysdeps/i386/i686/multiarch/strnlen.S: New file.
+	* sysdeps/i386/i686/multiarch/wcslen-c.c: New file.
+	* sysdeps/i386/i686/multiarch/wcslen-sse2.S: New file.
+	* sysdeps/i386/i686/multiarch/wcslen.S: New file.
+	Patch by Liubov Dmitrieva <liubov.dmitrieva@gmail.com>.
+
 2011-10-20  Liubov Dmitrieva  <liubov.dmitrieva@gmail.com>
 
 	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
diff --git a/NEWS b/NEWS
index ad6ddc7..e0bb0ef 100644
--- a/NEWS
+++ b/NEWS
@@ -26,8 +26,8 @@ Version 2.15
 * Improved strcpy, strncpy, stpcpy, stpncpy for SSE2 and SSSE3 on x86-64.
   Contributed by HJ Lu.
 
-* Optimized strcat, strncat, wcslen, strnlen on x86-64 and optimized
-  wcscmp on x86-32 and x86-64.
+* Optimized strcat, strncat on x86-64 and optimized wcscmp, wcslen, strnlen
+  on x86-32 and x86-64.
   Contributed by Liubov Dmitrieva.
 
 * Optimized strchr and strrchr for SSE on x86-32.
diff --git a/string/strnlen.c b/string/strnlen.c
index 454257b..3f52c49 100644
--- a/string/strnlen.c
+++ b/string/strnlen.c
@@ -1,5 +1,5 @@
 /* Find the length of STRING, but scan at most MAXLEN characters.
-   Copyright (C) 1991,1993,1997,2000,2001,2005 Free Software Foundation, Inc.
+   Copyright (C) 1991, 1993, 1997, 2000, 2001, 2005, 2011 Free Software Foundation, Inc.
    Contributed by Jakub Jelinek <jakub@redhat.com>.
 
    Based on strlen written by Torbjorn Granlund (tege@sics.se),
@@ -26,8 +26,13 @@
 
 /* Find the length of S, but scan at most MAXLEN characters.  If no
    '\0' terminator is found in that many characters, return MAXLEN.  */
+
+#ifndef STRNLEN
+# define STRNLEN __strnlen
+#endif
+
 size_t
-__strnlen (const char *str, size_t maxlen)
+STRNLEN (const char *str, size_t maxlen)
 {
   const char *char_ptr, *end_ptr = str + maxlen;
   const unsigned long int *longword_ptr;
@@ -157,5 +162,7 @@ __strnlen (const char *str, size_t maxlen)
     char_ptr = end_ptr;
   return char_ptr - str;
 }
+#ifndef STRNLEN
 weak_alias (__strnlen, strnlen)
+#endif
 libc_hidden_def (strnlen)
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index 98d1ad6..5f18538 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -18,6 +18,7 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
 		   wcscmp-sse2 wcscmp-c memchr-sse2 memchr-sse2-bsf \
 		   memrchr-sse2 memrchr-sse2-bsf memrchr-c \
 		   rawmemchr-sse2 rawmemchr-sse2-bsf \
+		   strnlen-sse2 strnlen-c wcslen-sse2 wcslen-c \
 		   wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c
 ifeq (yes,$(config-cflags-sse4))
 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2.S b/sysdeps/i386/i686/multiarch/strlen-sse2.S
index 2dbc4a9..91b6d79 100644
--- a/sysdeps/i386/i686/multiarch/strlen-sse2.S
+++ b/sysdeps/i386/i686/multiarch/strlen-sse2.S
@@ -18,29 +18,46 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
-#if (defined USE_AS_STRCAT || defined SHARED) && !defined NOT_IN_libc
+/* for strlen only SHARED version is optimized, for strcat, strncat, strnlen both STATIC and SHARED are optimized */
+
+#if (defined USE_AS_STRNLEN || defined USE_AS_STRCAT || defined SHARED) && !defined NOT_IN_libc
+
 # ifndef USE_AS_STRCAT
 
 #  include <sysdep.h>
-#  define CFI_PUSH(REG)	\
+#  define PARMS	4
+#  define STR	PARMS
+#  define RETURN	ret
+
+#  ifdef USE_AS_STRNLEN
+#   define LEN	PARMS + 8
+#   define CFI_PUSH(REG)	\
 	cfi_adjust_cfa_offset (4);	\
 	cfi_rel_offset (REG, 0)
 
-#  define CFI_POP(REG)	\
+#   define CFI_POP(REG)	\
 	cfi_adjust_cfa_offset (-4);	\
 	cfi_restore (REG)
 
-#  define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#  define POP(REG)	popl REG; CFI_POP (REG)
-#  define PARMS		4
-#  define STR		PARMS
-#  define ENTRANCE
-#  define RETURN		ret
+#   define PUSH(REG)	pushl	REG;	CFI_PUSH (REG)
+#   define POP(REG)	popl	REG;	CFI_POP (REG)
+#   undef RETURN
+#   define RETURN	POP (%edi); CFI_PUSH(%edi); ret
+#  endif
+
+#  ifndef STRLEN
+#   define STRLEN	__strlen_sse2
+#  endif
 
 	atom_text_section
-ENTRY (__strlen_sse2)
-	ENTRANCE
+ENTRY (STRLEN)
 	mov	STR(%esp), %edx
+#  ifdef USE_AS_STRNLEN
+	PUSH	(%edi)
+	movl	LEN(%esp), %edi
+	sub	$4, %edi
+	jbe	L(len_less4_prolog)
+#  endif
 # endif
 	xor	%eax, %eax
 	cmpb	$0, (%edx)
@@ -51,6 +68,12 @@ ENTRY (__strlen_sse2)
 	jz	L(exit_tail2)
 	cmpb	$0, 3(%edx)
 	jz	L(exit_tail3)
+
+# ifdef USE_AS_STRNLEN
+	sub	$4, %edi
+	jbe	L(len_less8_prolog)
+# endif
+
 	cmpb	$0, 4(%edx)
 	jz	L(exit_tail4)
 	cmpb	$0, 5(%edx)
@@ -59,6 +82,12 @@ ENTRY (__strlen_sse2)
 	jz	L(exit_tail6)
 	cmpb	$0, 7(%edx)
 	jz	L(exit_tail7)
+
+# ifdef USE_AS_STRNLEN
+	sub	$4, %edi
+	jbe	L(len_less12_prolog)
+# endif
+
 	cmpb	$0, 8(%edx)
 	jz	L(exit_tail8)
 	cmpb	$0, 9(%edx)
@@ -67,6 +96,12 @@ ENTRY (__strlen_sse2)
 	jz	L(exit_tail10)
 	cmpb	$0, 11(%edx)
 	jz	L(exit_tail11)
+
+# ifdef USE_AS_STRNLEN
+	sub	$4, %edi
+	jbe	L(len_less16_prolog)
+# endif
+
 	cmpb	$0, 12(%edx)
 	jz	L(exit_tail12)
 	cmpb	$0, 13(%edx)
@@ -75,11 +110,18 @@ ENTRY (__strlen_sse2)
 	jz	L(exit_tail14)
 	cmpb	$0, 15(%edx)
 	jz	L(exit_tail15)
+
 	pxor	%xmm0, %xmm0
-	mov	%edx, %eax
-	lea	16(%edx), %ecx
+	lea	16(%edx), %eax
+	mov	%eax, %ecx
 	and	$-16, %eax
-	add	$16, %eax
+
+# ifdef USE_AS_STRNLEN
+	and	$15, %edx
+	add	%edx, %edi
+	sub	$64, %edi
+	jbe	L(len_less64)
+# endif
 
 	pcmpeqb	(%eax), %xmm0
 	pmovmskb %xmm0, %edx
@@ -95,7 +137,6 @@ ENTRY (__strlen_sse2)
 	lea	16(%eax), %eax
 	jnz	L(exit)
 
-
 	pcmpeqb	(%eax), %xmm2
 	pmovmskb %xmm2, %edx
 	pxor	%xmm3, %xmm3
@@ -109,6 +150,11 @@ ENTRY (__strlen_sse2)
 	lea	16(%eax), %eax
 	jnz	L(exit)
 
+# ifdef USE_AS_STRNLEN
+	sub	$64, %edi
+	jbe	L(len_less64)
+# endif
+
 	pcmpeqb	(%eax), %xmm0
 	pmovmskb %xmm0, %edx
 	test	%edx, %edx
@@ -133,6 +179,11 @@ ENTRY (__strlen_sse2)
 	lea	16(%eax), %eax
 	jnz	L(exit)
 
+# ifdef USE_AS_STRNLEN
+	sub	$64, %edi
+	jbe	L(len_less64)
+# endif
+
 	pcmpeqb	(%eax), %xmm0
 	pmovmskb %xmm0, %edx
 	test	%edx, %edx
@@ -157,6 +208,11 @@ ENTRY (__strlen_sse2)
 	lea	16(%eax), %eax
 	jnz	L(exit)
 
+# ifdef USE_AS_STRNLEN
+	sub	$64, %edi
+	jbe	L(len_less64)
+# endif
+
 	pcmpeqb	(%eax), %xmm0
 	pmovmskb %xmm0, %edx
 	test	%edx, %edx
@@ -181,8 +237,20 @@ ENTRY (__strlen_sse2)
 	lea	16(%eax), %eax
 	jnz	L(exit)
 
+# ifdef USE_AS_STRNLEN
+	mov	%eax, %edx
+	and	$63, %edx
+	add	%edx, %edi
+# endif
+
 	and	$-0x40, %eax
-L(aligned_64):
+
+	.p2align 4
+L(aligned_64_loop):
+# ifdef USE_AS_STRNLEN
+	sub	$64, %edi
+	jbe	L(len_less64)
+# endif
 	movaps	(%eax), %xmm0
 	movaps	16(%eax), %xmm1
 	movaps	32(%eax), %xmm2
@@ -194,7 +262,7 @@ L(aligned_64):
 	pmovmskb %xmm2, %edx
 	test	%edx, %edx
 	lea	64(%eax), %eax
-	jz	L(aligned_64)
+	jz	L(aligned_64_loop)
 
 	pcmpeqb	-64(%eax), %xmm3
 	pmovmskb %xmm3, %edx
@@ -221,56 +289,348 @@ L(exit):
 	sub	%ecx, %eax
 	test	%dl, %dl
 	jz	L(exit_high)
+
+	mov	%dl, %cl
+	and	$15, %cl
+	jz	L(exit_8)
 	test	$0x01, %dl
 	jnz	L(exit_tail0)
-
 	test	$0x02, %dl
 	jnz	L(exit_tail1)
-
 	test	$0x04, %dl
 	jnz	L(exit_tail2)
+	add	$3, %eax
+	RETURN
 
-	test	$0x08, %dl
-	jnz	L(exit_tail3)
-
+	.p2align 4
+L(exit_8):
 	test	$0x10, %dl
 	jnz	L(exit_tail4)
-
 	test	$0x20, %dl
 	jnz	L(exit_tail5)
-
 	test	$0x40, %dl
 	jnz	L(exit_tail6)
 	add	$7, %eax
-L(exit_tail0):
 	RETURN
 
+	.p2align 4
 L(exit_high):
-	add	$8, %eax
+	mov	%dh, %ch
+	and	$15, %ch
+	jz	L(exit_high_8)
 	test	$0x01, %dh
+	jnz	L(exit_tail8)
+	test	$0x02, %dh
+	jnz	L(exit_tail9)
+	test	$0x04, %dh
+	jnz	L(exit_tail10)
+	add	$11, %eax
+	RETURN
+
+	.p2align 4
+L(exit_high_8):
+	test	$0x10, %dh
+	jnz	L(exit_tail12)
+	test	$0x20, %dh
+	jnz	L(exit_tail13)
+	test	$0x40, %dh
+	jnz	L(exit_tail14)
+	add	$15, %eax
+L(exit_tail0):
+	RETURN
+
+# ifdef USE_AS_STRNLEN
+
+	.p2align 4
+L(len_less64):
+	pxor	%xmm0, %xmm0
+	add	$64, %edi
+
+	pcmpeqb	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	pxor	%xmm1, %xmm1
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(strnlen_exit)
+
+	sub	$16, %edi
+	jbe	L(return_start_len)
+
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(strnlen_exit)
+
+	sub	$16, %edi
+	jbe	L(return_start_len)
+
+	pcmpeqb	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(strnlen_exit)
+
+	sub	$16, %edi
+	jbe	L(return_start_len)
+
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(strnlen_exit)
+
+	movl	LEN(%esp), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit):
+	sub	%ecx, %eax
+
+	test	%dl, %dl
+	jz	L(strnlen_exit_high)
+	mov	%dl, %cl
+	and	$15, %cl
+	jz	L(strnlen_exit_8)
+	test	$0x01, %dl
 	jnz	L(exit_tail0)
+	test	$0x02, %dl
+	jnz	L(strnlen_exit_tail1)
+	test	$0x04, %dl
+	jnz	L(strnlen_exit_tail2)
+	sub	$4, %edi
+	jb	L(return_start_len)
+	lea	3(%eax), %eax
+	RETURN
 
-	test	$0x02, %dh
-	jnz	L(exit_tail1)
+	.p2align 4
+L(strnlen_exit_8):
+	test	$0x10, %dl
+	jnz	L(strnlen_exit_tail4)
+	test	$0x20, %dl
+	jnz	L(strnlen_exit_tail5)
+	test	$0x40, %dl
+	jnz	L(strnlen_exit_tail6)
+	sub	$8, %edi
+	jb	L(return_start_len)
+	lea	7(%eax), %eax
+	RETURN
 
+	.p2align 4
+L(strnlen_exit_high):
+	mov	%dh, %ch
+	and	$15, %ch
+	jz	L(strnlen_exit_high_8)
+	test	$0x01, %dh
+	jnz	L(strnlen_exit_tail8)
+	test	$0x02, %dh
+	jnz	L(strnlen_exit_tail9)
 	test	$0x04, %dh
-	jnz	L(exit_tail2)
-
-	test	$0x08, %dh
-	jnz	L(exit_tail3)
+	jnz	L(strnlen_exit_tail10)
+	sub	$12, %edi
+	jb	L(return_start_len)
+	lea	11(%eax), %eax
+	RETURN
 
+	.p2align 4
+L(strnlen_exit_high_8):
 	test	$0x10, %dh
-	jnz	L(exit_tail4)
-
+	jnz	L(strnlen_exit_tail12)
 	test	$0x20, %dh
-	jnz	L(exit_tail5)
-
+	jnz	L(strnlen_exit_tail13)
 	test	$0x40, %dh
-	jnz	L(exit_tail6)
-	add	$7, %eax
+	jnz	L(strnlen_exit_tail14)
+	sub	$16, %edi
+	jb	L(return_start_len)
+	lea	15(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail1):
+	sub	$2, %edi
+	jb	L(return_start_len)
+	lea	1(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail2):
+	sub	$3, %edi
+	jb	L(return_start_len)
+	lea	2(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail4):
+	sub	$5, %edi
+	jb	L(return_start_len)
+	lea	4(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail5):
+	sub	$6, %edi
+	jb	L(return_start_len)
+	lea	5(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail6):
+	sub	$7, %edi
+	jb	L(return_start_len)
+	lea	6(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail8):
+	sub	$9, %edi
+	jb	L(return_start_len)
+	lea	8(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail9):
+	sub	$10, %edi
+	jb	L(return_start_len)
+	lea	9(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail10):
+	sub	$11, %edi
+	jb	L(return_start_len)
+	lea	10(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail12):
+	sub	$13, %edi
+	jb	L(return_start_len)
+	lea	12(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail13):
+	sub	$14, %edi
+	jb	L(return_start_len)
+	lea	13(%eax), %eax
 	RETURN
 
 	.p2align 4
+L(strnlen_exit_tail14):
+	sub	$15, %edi
+	jb	L(return_start_len)
+	lea	14(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(return_start_len):
+	movl	LEN(%esp), %eax
+	RETURN
+
+/* for prolog only */
+
+	.p2align 4
+L(len_less4_prolog):
+	xor	%eax, %eax
+
+	add	$4, %edi
+	jz	L(exit_tail0)
+
+	cmpb	$0, (%edx)
+	jz	L(exit_tail0)
+	cmp	$1, %edi
+	je	L(exit_tail1)
+
+	cmpb	$0, 1(%edx)
+	jz	L(exit_tail1)
+	cmp	$2, %edi
+	je	L(exit_tail2)
+
+	cmpb	$0, 2(%edx)
+	jz	L(exit_tail2)
+	cmp	$3, %edi
+	je	L(exit_tail3)
+
+	cmpb	$0, 3(%edx)
+	jz	L(exit_tail3)
+	mov	$4, %eax
+	RETURN
+
+	.p2align 4
+L(len_less8_prolog):
+	add	$4, %edi
+
+	cmpb	$0, 4(%edx)
+	jz	L(exit_tail4)
+	cmp	$1, %edi
+	je	L(exit_tail5)
+
+	cmpb	$0, 5(%edx)
+	jz	L(exit_tail5)
+	cmp	$2, %edi
+	je	L(exit_tail6)
+
+	cmpb	$0, 6(%edx)
+	jz	L(exit_tail6)
+	cmp	$3, %edi
+	je	L(exit_tail7)
+
+	cmpb	$0, 7(%edx)
+	jz	L(exit_tail7)
+	mov	$8, %eax
+	RETURN
+
+
+	.p2align 4
+L(len_less12_prolog):
+	add	$4, %edi
+
+	cmpb	$0, 8(%edx)
+	jz	L(exit_tail8)
+	cmp	$1, %edi
+	je	L(exit_tail9)
+
+	cmpb	$0, 9(%edx)
+	jz	L(exit_tail9)
+	cmp	$2, %edi
+	je	L(exit_tail10)
+
+	cmpb	$0, 10(%edx)
+	jz	L(exit_tail10)
+	cmp	$3, %edi
+	je	L(exit_tail11)
+
+	cmpb	$0, 11(%edx)
+	jz	L(exit_tail11)
+	mov	$12, %eax
+	RETURN
+
+	.p2align 4
+L(len_less16_prolog):
+	add	$4, %edi
+
+	cmpb	$0, 12(%edx)
+	jz	L(exit_tail12)
+	cmp	$1, %edi
+	je	L(exit_tail13)
+
+	cmpb	$0, 13(%edx)
+	jz	L(exit_tail13)
+	cmp	$2, %edi
+	je	L(exit_tail14)
+
+	cmpb	$0, 14(%edx)
+	jz	L(exit_tail14)
+	cmp	$3, %edi
+	je	L(exit_tail15)
+
+	cmpb	$0, 15(%edx)
+	jz	L(exit_tail15)
+	mov	$16, %eax
+	RETURN
+# endif
+
+	.p2align 4
 L(exit_tail1):
 	add	$1, %eax
 	RETURN
@@ -330,7 +690,7 @@ L(exit_tail14):
 L(exit_tail15):
 	add	$15, %eax
 # ifndef USE_AS_STRCAT
-	ret
-END (__strlen_sse2)
+	RETURN
+END (STRLEN)
 # endif
 #endif
diff --git a/sysdeps/i386/i686/multiarch/strnlen-c.c b/sysdeps/i386/i686/multiarch/strnlen-c.c
new file mode 100644
index 0000000..567af2c
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strnlen-c.c
@@ -0,0 +1,8 @@
+#ifndef NOT_IN_libc
+# define STRNLEN  __strnlen_ia32
+# undef libc_hidden_builtin_def
+# define libc_hidden_def(name)  \
+    __hidden_ver1 (__strnlen_ia32, __GI_strnlen, __strnlen_ia32);
+#endif
+
+#include "string/strnlen.c"
diff --git a/sysdeps/i386/i686/multiarch/strnlen-sse2.S b/sysdeps/i386/i686/multiarch/strnlen-sse2.S
new file mode 100644
index 0000000..56b6ae2
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strnlen-sse2.S
@@ -0,0 +1,3 @@
+#define USE_AS_STRNLEN
+#define STRLEN __strnlen_sse2
+#include "strlen-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/strnlen.S b/sysdeps/i386/i686/multiarch/strnlen.S
new file mode 100644
index 0000000..7e542d9
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strnlen.S
@@ -0,0 +1,56 @@
+/* Multiple versions of strnlen
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#ifndef  NOT_IN_libc
+	.section	.gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+	.globl	__i686.get_pc_thunk.bx
+	.hidden	__i686.get_pc_thunk.bx
+	.p2align 4
+	.type	__i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+	movl	(%esp), %ebx
+	ret
+
+	.text
+ENTRY(__strnlen)
+	.type	__strnlen, @gnu_indirect_function
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+	cfi_rel_offset (ebx, 0)
+	call	__i686.get_pc_thunk.bx
+	addl	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
+	jne	1f
+	call	__init_cpu_features
+1:	leal	__strnlen_ia32@GOTOFF(%ebx), %eax
+	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	jz	2f
+	leal	__strnlen_sse2@GOTOFF(%ebx), %eax
+2:	popl	%ebx
+	cfi_adjust_cfa_offset (-4);
+	cfi_restore (ebx)
+	ret
+END(__strnlen)
+
+weak_alias(__strnlen, strnlen)
+#endif
diff --git a/sysdeps/i386/i686/multiarch/wcslen-c.c b/sysdeps/i386/i686/multiarch/wcslen-c.c
new file mode 100644
index 0000000..49f32a2
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/wcslen-c.c
@@ -0,0 +1,5 @@
+#ifndef NOT_IN_libc
+# define WCSLEN  __wcslen_ia32
+#endif
+
+#include "wcsmbs/wcslen.c"
diff --git a/sysdeps/i386/i686/multiarch/wcslen-sse2.S b/sysdeps/i386/i686/multiarch/wcslen-sse2.S
new file mode 100644
index 0000000..d41d623
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/wcslen-sse2.S
@@ -0,0 +1,194 @@
+/* wcslen with SSE2
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef NOT_IN_libc
+# include <sysdep.h>
+# define STR	4
+
+	.text
+ENTRY (__wcslen_sse2)
+	mov	STR(%esp), %edx
+
+	cmp	$0, (%edx)
+	jz	L(exit_tail0)
+	cmp	$0, 4(%edx)
+	jz	L(exit_tail1)
+	cmp	$0, 8(%edx)
+	jz	L(exit_tail2)
+	cmp	$0, 12(%edx)
+	jz	L(exit_tail3)
+	cmp	$0, 16(%edx)
+	jz	L(exit_tail4)
+	cmp	$0, 20(%edx)
+	jz	L(exit_tail5)
+	cmp	$0, 24(%edx)
+	jz	L(exit_tail6)
+	cmp	$0, 28(%edx)
+	jz	L(exit_tail7)
+
+	pxor	%xmm0, %xmm0
+
+	lea	32(%edx), %eax
+	lea	16(%edx), %ecx
+	and	$-16, %eax
+
+	pcmpeqd	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	pxor	%xmm1, %xmm1
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	pxor	%xmm2, %xmm2
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	pxor	%xmm3, %xmm3
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	and	$-0x40, %eax
+
+	.p2align 4
+L(aligned_64_loop):
+	movaps	(%eax), %xmm0
+	movaps	16(%eax), %xmm1
+	movaps	32(%eax), %xmm2
+	movaps	48(%eax), %xmm6
+
+	pminub	%xmm1, %xmm0
+	pminub	%xmm6, %xmm2
+	pminub	%xmm0, %xmm2
+	pcmpeqd	%xmm3, %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	lea	64(%eax), %eax
+	jz	L(aligned_64_loop)
+
+	pcmpeqd	-64(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	48(%ecx), %ecx
+	jnz	L(exit)
+
+	pcmpeqd	%xmm1, %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	-16(%ecx), %ecx
+	jnz	L(exit)
+
+	pcmpeqd	-32(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	-16(%ecx), %ecx
+	jnz	L(exit)
+
+	pcmpeqd	%xmm6, %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	-16(%ecx), %ecx
+	jnz	L(exit)
+
+	jmp	L(aligned_64_loop)
+
+	.p2align 4
+L(exit):
+	sub	%ecx, %eax
+	shr	$2, %eax
+	test	%dl, %dl
+	jz	L(exit_high)
+
+	mov	%dl, %cl
+	and	$15, %cl
+	jz	L(exit_1)
+	ret
+
+	.p2align 4
+L(exit_high):
+	mov	%dh, %ch
+	and	$15, %ch
+	jz	L(exit_3)
+	add	$2, %eax
+	ret
+
+	.p2align 4
+L(exit_1):
+	add	$1, %eax
+	ret
+
+	.p2align 4
+L(exit_3):
+	add	$3, %eax
+	ret
+
+	.p2align 4
+L(exit_tail0):
+	xor	%eax, %eax
+	ret
+
+	.p2align 4
+L(exit_tail1):
+	mov	$1, %eax
+	ret
+
+	.p2align 4
+L(exit_tail2):
+	mov	$2, %eax
+	ret
+
+	.p2align 4
+L(exit_tail3):
+	mov	$3, %eax
+	ret
+
+	.p2align 4
+L(exit_tail4):
+	mov	$4, %eax
+	ret
+
+	.p2align 4
+L(exit_tail5):
+	mov	$5, %eax
+	ret
+
+	.p2align 4
+L(exit_tail6):
+	mov	$6, %eax
+	ret
+
+	.p2align 4
+L(exit_tail7):
+	mov	$7, %eax
+	ret
+
+END (__wcslen_sse2)
+#endif
diff --git a/sysdeps/i386/i686/multiarch/wcslen.S b/sysdeps/i386/i686/multiarch/wcslen.S
new file mode 100644
index 0000000..5867037
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/wcslen.S
@@ -0,0 +1,56 @@
+/* Multiple versions of wcslen
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#ifndef  NOT_IN_libc
+	.section	.gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+	.globl	__i686.get_pc_thunk.bx
+	.hidden	__i686.get_pc_thunk.bx
+	.p2align 4
+	.type	__i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+	movl	(%esp), %ebx
+	ret
+
+	.text
+ENTRY(__wcslen)
+	.type	__wcslen, @gnu_indirect_function
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+	cfi_rel_offset (ebx, 0)
+	call	__i686.get_pc_thunk.bx
+	addl	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
+	jne	1f
+	call	__init_cpu_features
+1:	leal	__wcslen_ia32@GOTOFF(%ebx), %eax
+	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	jz	2f
+	leal	__wcslen_sse2@GOTOFF(%ebx), %eax
+2:	popl	%ebx
+	cfi_adjust_cfa_offset (-4);
+	cfi_restore (ebx)
+	ret
+END(__wcslen)
+
+weak_alias(__wcslen, wcslen)
+#endif
diff --git a/wcsmbs/wcslen.c b/wcsmbs/wcslen.c
index 1bced4b..4d7972b 100644
--- a/wcsmbs/wcslen.c
+++ b/wcsmbs/wcslen.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
+/* Copyright (C) 1995, 1996, 1997, 1998, 2011  Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
 
@@ -19,10 +19,13 @@
 
 #include <wchar.h>
 
-
 /* Return length of string S.  */
+#ifndef WCSLEN
+# define WCSLEN __wcslen
+#endif
+
 size_t
-__wcslen (s)
+WCSLEN (s)
      const wchar_t *s;
 {
   size_t len = 0;
@@ -40,4 +43,6 @@ __wcslen (s)
 
   return len;
 }
+#ifndef WCSLEN
 weak_alias (__wcslen, wcslen)
+#endif

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                  |   15 +
 NEWS                                       |    4 +-
 string/strnlen.c                           |   11 +-
 sysdeps/i386/i686/multiarch/Makefile       |    1 +
 sysdeps/i386/i686/multiarch/strlen-sse2.S  |  440 +++++++++++++++++++++++++---
 sysdeps/i386/i686/multiarch/strnlen-c.c    |    8 +
 sysdeps/i386/i686/multiarch/strnlen-sse2.S |    3 +
 sysdeps/i386/i686/multiarch/strnlen.S      |   56 ++++
 sysdeps/i386/i686/multiarch/wcslen-c.c     |    5 +
 sysdeps/i386/i686/multiarch/wcslen-sse2.S  |  194 ++++++++++++
 sysdeps/i386/i686/multiarch/wcslen.S       |   56 ++++
 wcsmbs/wcslen.c                            |   11 +-
 12 files changed, 757 insertions(+), 47 deletions(-)
 create mode 100644 sysdeps/i386/i686/multiarch/strnlen-c.c
 create mode 100644 sysdeps/i386/i686/multiarch/strnlen-sse2.S
 create mode 100644 sysdeps/i386/i686/multiarch/strnlen.S
 create mode 100644 sysdeps/i386/i686/multiarch/wcslen-c.c
 create mode 100644 sysdeps/i386/i686/multiarch/wcslen-sse2.S
 create mode 100644 sysdeps/i386/i686/multiarch/wcslen.S


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]