This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch, master, updated. glibc-2.14-429-gfc2ee42
- From: drepper at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 23 Oct 2011 19:18:32 -0000
- Subject: GNU C Library master sources branch, master, updated. glibc-2.14-429-gfc2ee42
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via fc2ee42abe595bbf6b8bbf0637648ad8b5d4faab (commit)
from 09229f3e1b617d9dcfa3227f32bb72436d7fcac4 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=fc2ee42abe595bbf6b8bbf0637648ad8b5d4faab
commit fc2ee42abe595bbf6b8bbf0637648ad8b5d4faab
Author: Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
Date: Sun Oct 23 15:17:23 2011 -0400
Add optimized wcslen and strnlen for x86-32
diff --git a/ChangeLog b/ChangeLog
index 542869b..c538e40 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2011-10-23 Ulrich Drepper <drepper@gmail.com>
+
+ * string/strnlen.c: Define and use STRNLEN macro.
+ * sysdeps/i386/i686/multiarch/Makefile [string] (sysdep_routines):
+ Add strnlen-sse2, strnlen-c, wcslen-sse2, and wcslen-c.
+ * sysdeps/i386/i686/multiarch/strlen-sse2.S: Add support for strnlen.
+ * wcsmbs/wcslen.c: Define and use WCSLEN.
+ * sysdeps/i386/i686/multiarch/strnlen-c.c: New file.
+ * sysdeps/i386/i686/multiarch/strnlen-sse2.S: New file.
+ * sysdeps/i386/i686/multiarch/strnlen.S: New file.
+ * sysdeps/i386/i686/multiarch/wcslen-c.c: New file.
+ * sysdeps/i386/i686/multiarch/wcslen-sse2.S: New file.
+ * sysdeps/i386/i686/multiarch/wcslen.S: New file.
+ Patch by Liubov Dmitrieva <liubov.dmitrieva@gmail.com>.
+
2011-10-20 Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
diff --git a/NEWS b/NEWS
index ad6ddc7..e0bb0ef 100644
--- a/NEWS
+++ b/NEWS
@@ -26,8 +26,8 @@ Version 2.15
* Improved strcpy, strncpy, stpcpy, stpncpy for SSE2 and SSSE3 on x86-64.
Contributed by HJ Lu.
-* Optimized strcat, strncat, wcslen, strnlen on x86-64 and optimized
- wcscmp on x86-32 and x86-64.
+* Optimized strcat, strncat on x86-64 and optimized wcscmp, wcslen, strnlen
+ on x86-32 and x86-64.
Contributed by Liubov Dmitrieva.
* Optimized strchr and strrchr for SSE on x86-32.
diff --git a/string/strnlen.c b/string/strnlen.c
index 454257b..3f52c49 100644
--- a/string/strnlen.c
+++ b/string/strnlen.c
@@ -1,5 +1,5 @@
/* Find the length of STRING, but scan at most MAXLEN characters.
- Copyright (C) 1991,1993,1997,2000,2001,2005 Free Software Foundation, Inc.
+ Copyright (C) 1991, 1993, 1997, 2000, 2001, 2005, 2011 Free Software Foundation, Inc.
Contributed by Jakub Jelinek <jakub@redhat.com>.
Based on strlen written by Torbjorn Granlund (tege@sics.se),
@@ -26,8 +26,13 @@
/* Find the length of S, but scan at most MAXLEN characters. If no
'\0' terminator is found in that many characters, return MAXLEN. */
+
+#ifndef STRNLEN
+# define STRNLEN __strnlen
+#endif
+
size_t
-__strnlen (const char *str, size_t maxlen)
+STRNLEN (const char *str, size_t maxlen)
{
const char *char_ptr, *end_ptr = str + maxlen;
const unsigned long int *longword_ptr;
@@ -157,5 +162,7 @@ __strnlen (const char *str, size_t maxlen)
char_ptr = end_ptr;
return char_ptr - str;
}
+#ifndef STRNLEN
weak_alias (__strnlen, strnlen)
+#endif
libc_hidden_def (strnlen)
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index 98d1ad6..5f18538 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -18,6 +18,7 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
wcscmp-sse2 wcscmp-c memchr-sse2 memchr-sse2-bsf \
memrchr-sse2 memrchr-sse2-bsf memrchr-c \
rawmemchr-sse2 rawmemchr-sse2-bsf \
+ strnlen-sse2 strnlen-c wcslen-sse2 wcslen-c \
wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2.S b/sysdeps/i386/i686/multiarch/strlen-sse2.S
index 2dbc4a9..91b6d79 100644
--- a/sysdeps/i386/i686/multiarch/strlen-sse2.S
+++ b/sysdeps/i386/i686/multiarch/strlen-sse2.S
@@ -18,29 +18,46 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
-#if (defined USE_AS_STRCAT || defined SHARED) && !defined NOT_IN_libc
+/* for strlen only SHARED version is optimized, for strcat, strncat, strnlen both STATIC and SHARED are optimized */
+
+#if (defined USE_AS_STRNLEN || defined USE_AS_STRCAT || defined SHARED) && !defined NOT_IN_libc
+
# ifndef USE_AS_STRCAT
# include <sysdep.h>
-# define CFI_PUSH(REG) \
+# define PARMS 4
+# define STR PARMS
+# define RETURN ret
+
+# ifdef USE_AS_STRNLEN
+# define LEN PARMS + 8
+# define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
-# define CFI_POP(REG) \
+# define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-# define PARMS 4
-# define STR PARMS
-# define ENTRANCE
-# define RETURN ret
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+# undef RETURN
+# define RETURN POP (%edi); CFI_PUSH(%edi); ret
+# endif
+
+# ifndef STRLEN
+# define STRLEN __strlen_sse2
+# endif
atom_text_section
-ENTRY (__strlen_sse2)
- ENTRANCE
+ENTRY (STRLEN)
mov STR(%esp), %edx
+# ifdef USE_AS_STRNLEN
+ PUSH (%edi)
+ movl LEN(%esp), %edi
+ sub $4, %edi
+ jbe L(len_less4_prolog)
+# endif
# endif
xor %eax, %eax
cmpb $0, (%edx)
@@ -51,6 +68,12 @@ ENTRY (__strlen_sse2)
jz L(exit_tail2)
cmpb $0, 3(%edx)
jz L(exit_tail3)
+
+# ifdef USE_AS_STRNLEN
+ sub $4, %edi
+ jbe L(len_less8_prolog)
+# endif
+
cmpb $0, 4(%edx)
jz L(exit_tail4)
cmpb $0, 5(%edx)
@@ -59,6 +82,12 @@ ENTRY (__strlen_sse2)
jz L(exit_tail6)
cmpb $0, 7(%edx)
jz L(exit_tail7)
+
+# ifdef USE_AS_STRNLEN
+ sub $4, %edi
+ jbe L(len_less12_prolog)
+# endif
+
cmpb $0, 8(%edx)
jz L(exit_tail8)
cmpb $0, 9(%edx)
@@ -67,6 +96,12 @@ ENTRY (__strlen_sse2)
jz L(exit_tail10)
cmpb $0, 11(%edx)
jz L(exit_tail11)
+
+# ifdef USE_AS_STRNLEN
+ sub $4, %edi
+ jbe L(len_less16_prolog)
+# endif
+
cmpb $0, 12(%edx)
jz L(exit_tail12)
cmpb $0, 13(%edx)
@@ -75,11 +110,18 @@ ENTRY (__strlen_sse2)
jz L(exit_tail14)
cmpb $0, 15(%edx)
jz L(exit_tail15)
+
pxor %xmm0, %xmm0
- mov %edx, %eax
- lea 16(%edx), %ecx
+ lea 16(%edx), %eax
+ mov %eax, %ecx
and $-16, %eax
- add $16, %eax
+
+# ifdef USE_AS_STRNLEN
+ and $15, %edx
+ add %edx, %edi
+ sub $64, %edi
+ jbe L(len_less64)
+# endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
@@ -95,7 +137,6 @@ ENTRY (__strlen_sse2)
lea 16(%eax), %eax
jnz L(exit)
-
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
@@ -109,6 +150,11 @@ ENTRY (__strlen_sse2)
lea 16(%eax), %eax
jnz L(exit)
+# ifdef USE_AS_STRNLEN
+ sub $64, %edi
+ jbe L(len_less64)
+# endif
+
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
@@ -133,6 +179,11 @@ ENTRY (__strlen_sse2)
lea 16(%eax), %eax
jnz L(exit)
+# ifdef USE_AS_STRNLEN
+ sub $64, %edi
+ jbe L(len_less64)
+# endif
+
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
@@ -157,6 +208,11 @@ ENTRY (__strlen_sse2)
lea 16(%eax), %eax
jnz L(exit)
+# ifdef USE_AS_STRNLEN
+ sub $64, %edi
+ jbe L(len_less64)
+# endif
+
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
@@ -181,8 +237,20 @@ ENTRY (__strlen_sse2)
lea 16(%eax), %eax
jnz L(exit)
+# ifdef USE_AS_STRNLEN
+ mov %eax, %edx
+ and $63, %edx
+ add %edx, %edi
+# endif
+
and $-0x40, %eax
-L(aligned_64):
+
+ .p2align 4
+L(aligned_64_loop):
+# ifdef USE_AS_STRNLEN
+ sub $64, %edi
+ jbe L(len_less64)
+# endif
movaps (%eax), %xmm0
movaps 16(%eax), %xmm1
movaps 32(%eax), %xmm2
@@ -194,7 +262,7 @@ L(aligned_64):
pmovmskb %xmm2, %edx
test %edx, %edx
lea 64(%eax), %eax
- jz L(aligned_64)
+ jz L(aligned_64_loop)
pcmpeqb -64(%eax), %xmm3
pmovmskb %xmm3, %edx
@@ -221,56 +289,348 @@ L(exit):
sub %ecx, %eax
test %dl, %dl
jz L(exit_high)
+
+ mov %dl, %cl
+ and $15, %cl
+ jz L(exit_8)
test $0x01, %dl
jnz L(exit_tail0)
-
test $0x02, %dl
jnz L(exit_tail1)
-
test $0x04, %dl
jnz L(exit_tail2)
+ add $3, %eax
+ RETURN
- test $0x08, %dl
- jnz L(exit_tail3)
-
+ .p2align 4
+L(exit_8):
test $0x10, %dl
jnz L(exit_tail4)
-
test $0x20, %dl
jnz L(exit_tail5)
-
test $0x40, %dl
jnz L(exit_tail6)
add $7, %eax
-L(exit_tail0):
RETURN
+ .p2align 4
L(exit_high):
- add $8, %eax
+ mov %dh, %ch
+ and $15, %ch
+ jz L(exit_high_8)
test $0x01, %dh
+ jnz L(exit_tail8)
+ test $0x02, %dh
+ jnz L(exit_tail9)
+ test $0x04, %dh
+ jnz L(exit_tail10)
+ add $11, %eax
+ RETURN
+
+ .p2align 4
+L(exit_high_8):
+ test $0x10, %dh
+ jnz L(exit_tail12)
+ test $0x20, %dh
+ jnz L(exit_tail13)
+ test $0x40, %dh
+ jnz L(exit_tail14)
+ add $15, %eax
+L(exit_tail0):
+ RETURN
+
+# ifdef USE_AS_STRNLEN
+
+ .p2align 4
+L(len_less64):
+ pxor %xmm0, %xmm0
+ add $64, %edi
+
+ pcmpeqb (%eax), %xmm0
+ pmovmskb %xmm0, %edx
+ pxor %xmm1, %xmm1
+ lea 16(%eax), %eax
+ test %edx, %edx
+ jnz L(strnlen_exit)
+
+ sub $16, %edi
+ jbe L(return_start_len)
+
+ pcmpeqb (%eax), %xmm1
+ pmovmskb %xmm1, %edx
+ lea 16(%eax), %eax
+ test %edx, %edx
+ jnz L(strnlen_exit)
+
+ sub $16, %edi
+ jbe L(return_start_len)
+
+ pcmpeqb (%eax), %xmm0
+ pmovmskb %xmm0, %edx
+ lea 16(%eax), %eax
+ test %edx, %edx
+ jnz L(strnlen_exit)
+
+ sub $16, %edi
+ jbe L(return_start_len)
+
+ pcmpeqb (%eax), %xmm1
+ pmovmskb %xmm1, %edx
+ lea 16(%eax), %eax
+ test %edx, %edx
+ jnz L(strnlen_exit)
+
+ movl LEN(%esp), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit):
+ sub %ecx, %eax
+
+ test %dl, %dl
+ jz L(strnlen_exit_high)
+ mov %dl, %cl
+ and $15, %cl
+ jz L(strnlen_exit_8)
+ test $0x01, %dl
jnz L(exit_tail0)
+ test $0x02, %dl
+ jnz L(strnlen_exit_tail1)
+ test $0x04, %dl
+ jnz L(strnlen_exit_tail2)
+ sub $4, %edi
+ jb L(return_start_len)
+ lea 3(%eax), %eax
+ RETURN
- test $0x02, %dh
- jnz L(exit_tail1)
+ .p2align 4
+L(strnlen_exit_8):
+ test $0x10, %dl
+ jnz L(strnlen_exit_tail4)
+ test $0x20, %dl
+ jnz L(strnlen_exit_tail5)
+ test $0x40, %dl
+ jnz L(strnlen_exit_tail6)
+ sub $8, %edi
+ jb L(return_start_len)
+ lea 7(%eax), %eax
+ RETURN
+ .p2align 4
+L(strnlen_exit_high):
+ mov %dh, %ch
+ and $15, %ch
+ jz L(strnlen_exit_high_8)
+ test $0x01, %dh
+ jnz L(strnlen_exit_tail8)
+ test $0x02, %dh
+ jnz L(strnlen_exit_tail9)
test $0x04, %dh
- jnz L(exit_tail2)
-
- test $0x08, %dh
- jnz L(exit_tail3)
+ jnz L(strnlen_exit_tail10)
+ sub $12, %edi
+ jb L(return_start_len)
+ lea 11(%eax), %eax
+ RETURN
+ .p2align 4
+L(strnlen_exit_high_8):
test $0x10, %dh
- jnz L(exit_tail4)
-
+ jnz L(strnlen_exit_tail12)
test $0x20, %dh
- jnz L(exit_tail5)
-
+ jnz L(strnlen_exit_tail13)
test $0x40, %dh
- jnz L(exit_tail6)
- add $7, %eax
+ jnz L(strnlen_exit_tail14)
+ sub $16, %edi
+ jb L(return_start_len)
+ lea 15(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail1):
+ sub $2, %edi
+ jb L(return_start_len)
+ lea 1(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail2):
+ sub $3, %edi
+ jb L(return_start_len)
+ lea 2(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail4):
+ sub $5, %edi
+ jb L(return_start_len)
+ lea 4(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail5):
+ sub $6, %edi
+ jb L(return_start_len)
+ lea 5(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail6):
+ sub $7, %edi
+ jb L(return_start_len)
+ lea 6(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail8):
+ sub $9, %edi
+ jb L(return_start_len)
+ lea 8(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail9):
+ sub $10, %edi
+ jb L(return_start_len)
+ lea 9(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail10):
+ sub $11, %edi
+ jb L(return_start_len)
+ lea 10(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail12):
+ sub $13, %edi
+ jb L(return_start_len)
+ lea 12(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail13):
+ sub $14, %edi
+ jb L(return_start_len)
+ lea 13(%eax), %eax
RETURN
.p2align 4
+L(strnlen_exit_tail14):
+ sub $15, %edi
+ jb L(return_start_len)
+ lea 14(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(return_start_len):
+ movl LEN(%esp), %eax
+ RETURN
+
+/* for prolog only */
+
+ .p2align 4
+L(len_less4_prolog):
+ xor %eax, %eax
+
+ add $4, %edi
+ jz L(exit_tail0)
+
+ cmpb $0, (%edx)
+ jz L(exit_tail0)
+ cmp $1, %edi
+ je L(exit_tail1)
+
+ cmpb $0, 1(%edx)
+ jz L(exit_tail1)
+ cmp $2, %edi
+ je L(exit_tail2)
+
+ cmpb $0, 2(%edx)
+ jz L(exit_tail2)
+ cmp $3, %edi
+ je L(exit_tail3)
+
+ cmpb $0, 3(%edx)
+ jz L(exit_tail3)
+ mov $4, %eax
+ RETURN
+
+ .p2align 4
+L(len_less8_prolog):
+ add $4, %edi
+
+ cmpb $0, 4(%edx)
+ jz L(exit_tail4)
+ cmp $1, %edi
+ je L(exit_tail5)
+
+ cmpb $0, 5(%edx)
+ jz L(exit_tail5)
+ cmp $2, %edi
+ je L(exit_tail6)
+
+ cmpb $0, 6(%edx)
+ jz L(exit_tail6)
+ cmp $3, %edi
+ je L(exit_tail7)
+
+ cmpb $0, 7(%edx)
+ jz L(exit_tail7)
+ mov $8, %eax
+ RETURN
+
+
+ .p2align 4
+L(len_less12_prolog):
+ add $4, %edi
+
+ cmpb $0, 8(%edx)
+ jz L(exit_tail8)
+ cmp $1, %edi
+ je L(exit_tail9)
+
+ cmpb $0, 9(%edx)
+ jz L(exit_tail9)
+ cmp $2, %edi
+ je L(exit_tail10)
+
+ cmpb $0, 10(%edx)
+ jz L(exit_tail10)
+ cmp $3, %edi
+ je L(exit_tail11)
+
+ cmpb $0, 11(%edx)
+ jz L(exit_tail11)
+ mov $12, %eax
+ RETURN
+
+ .p2align 4
+L(len_less16_prolog):
+ add $4, %edi
+
+ cmpb $0, 12(%edx)
+ jz L(exit_tail12)
+ cmp $1, %edi
+ je L(exit_tail13)
+
+ cmpb $0, 13(%edx)
+ jz L(exit_tail13)
+ cmp $2, %edi
+ je L(exit_tail14)
+
+ cmpb $0, 14(%edx)
+ jz L(exit_tail14)
+ cmp $3, %edi
+ je L(exit_tail15)
+
+ cmpb $0, 15(%edx)
+ jz L(exit_tail15)
+ mov $16, %eax
+ RETURN
+# endif
+
+ .p2align 4
L(exit_tail1):
add $1, %eax
RETURN
@@ -330,7 +690,7 @@ L(exit_tail14):
L(exit_tail15):
add $15, %eax
# ifndef USE_AS_STRCAT
- ret
-END (__strlen_sse2)
+ RETURN
+END (STRLEN)
# endif
#endif
diff --git a/sysdeps/i386/i686/multiarch/strnlen-c.c b/sysdeps/i386/i686/multiarch/strnlen-c.c
new file mode 100644
index 0000000..567af2c
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strnlen-c.c
@@ -0,0 +1,8 @@
+#ifndef NOT_IN_libc
+# define STRNLEN __strnlen_ia32
+# undef libc_hidden_builtin_def
+# define libc_hidden_def(name) \
+ __hidden_ver1 (__strnlen_ia32, __GI_strnlen, __strnlen_ia32);
+#endif
+
+#include "string/strnlen.c"
diff --git a/sysdeps/i386/i686/multiarch/strnlen-sse2.S b/sysdeps/i386/i686/multiarch/strnlen-sse2.S
new file mode 100644
index 0000000..56b6ae2
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strnlen-sse2.S
@@ -0,0 +1,3 @@
+#define USE_AS_STRNLEN
+#define STRLEN __strnlen_sse2
+#include "strlen-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/strnlen.S b/sysdeps/i386/i686/multiarch/strnlen.S
new file mode 100644
index 0000000..7e542d9
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strnlen.S
@@ -0,0 +1,56 @@
+/* Multiple versions of strnlen
+ Copyright (C) 2011 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#ifndef NOT_IN_libc
+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+ .globl __i686.get_pc_thunk.bx
+ .hidden __i686.get_pc_thunk.bx
+ .p2align 4
+ .type __i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+ movl (%esp), %ebx
+ ret
+
+ .text
+ENTRY(__strnlen)
+ .type __strnlen, @gnu_indirect_function
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+ call __i686.get_pc_thunk.bx
+ addl $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
+ jne 1f
+ call __init_cpu_features
+1: leal __strnlen_ia32@GOTOFF(%ebx), %eax
+ testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal __strnlen_sse2@GOTOFF(%ebx), %eax
+2: popl %ebx
+ cfi_adjust_cfa_offset (-4);
+ cfi_restore (ebx)
+ ret
+END(__strnlen)
+
+weak_alias(__strnlen, strnlen)
+#endif
diff --git a/sysdeps/i386/i686/multiarch/wcslen-c.c b/sysdeps/i386/i686/multiarch/wcslen-c.c
new file mode 100644
index 0000000..49f32a2
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/wcslen-c.c
@@ -0,0 +1,5 @@
+#ifndef NOT_IN_libc
+# define WCSLEN __wcslen_ia32
+#endif
+
+#include "wcsmbs/wcslen.c"
diff --git a/sysdeps/i386/i686/multiarch/wcslen-sse2.S b/sysdeps/i386/i686/multiarch/wcslen-sse2.S
new file mode 100644
index 0000000..d41d623
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/wcslen-sse2.S
@@ -0,0 +1,194 @@
+/* wcslen with SSE2
+ Copyright (C) 2011 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef NOT_IN_libc
+# include <sysdep.h>
+# define STR 4
+
+ .text
+ENTRY (__wcslen_sse2)
+ mov STR(%esp), %edx
+
+ cmp $0, (%edx)
+ jz L(exit_tail0)
+ cmp $0, 4(%edx)
+ jz L(exit_tail1)
+ cmp $0, 8(%edx)
+ jz L(exit_tail2)
+ cmp $0, 12(%edx)
+ jz L(exit_tail3)
+ cmp $0, 16(%edx)
+ jz L(exit_tail4)
+ cmp $0, 20(%edx)
+ jz L(exit_tail5)
+ cmp $0, 24(%edx)
+ jz L(exit_tail6)
+ cmp $0, 28(%edx)
+ jz L(exit_tail7)
+
+ pxor %xmm0, %xmm0
+
+ lea 32(%edx), %eax
+ lea 16(%edx), %ecx
+ and $-16, %eax
+
+ pcmpeqd (%eax), %xmm0
+ pmovmskb %xmm0, %edx
+ pxor %xmm1, %xmm1
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqd (%eax), %xmm1
+ pmovmskb %xmm1, %edx
+ pxor %xmm2, %xmm2
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqd (%eax), %xmm2
+ pmovmskb %xmm2, %edx
+ pxor %xmm3, %xmm3
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqd (%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ and $-0x40, %eax
+
+ .p2align 4
+L(aligned_64_loop):
+ movaps (%eax), %xmm0
+ movaps 16(%eax), %xmm1
+ movaps 32(%eax), %xmm2
+ movaps 48(%eax), %xmm6
+
+ pminub %xmm1, %xmm0
+ pminub %xmm6, %xmm2
+ pminub %xmm0, %xmm2
+ pcmpeqd %xmm3, %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ lea 64(%eax), %eax
+ jz L(aligned_64_loop)
+
+ pcmpeqd -64(%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea 48(%ecx), %ecx
+ jnz L(exit)
+
+ pcmpeqd %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea -16(%ecx), %ecx
+ jnz L(exit)
+
+ pcmpeqd -32(%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea -16(%ecx), %ecx
+ jnz L(exit)
+
+ pcmpeqd %xmm6, %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea -16(%ecx), %ecx
+ jnz L(exit)
+
+ jmp L(aligned_64_loop)
+
+ .p2align 4
+L(exit):
+ sub %ecx, %eax
+ shr $2, %eax
+ test %dl, %dl
+ jz L(exit_high)
+
+ mov %dl, %cl
+ and $15, %cl
+ jz L(exit_1)
+ ret
+
+ .p2align 4
+L(exit_high):
+ mov %dh, %ch
+ and $15, %ch
+ jz L(exit_3)
+ add $2, %eax
+ ret
+
+ .p2align 4
+L(exit_1):
+ add $1, %eax
+ ret
+
+ .p2align 4
+L(exit_3):
+ add $3, %eax
+ ret
+
+ .p2align 4
+L(exit_tail0):
+ xor %eax, %eax
+ ret
+
+ .p2align 4
+L(exit_tail1):
+ mov $1, %eax
+ ret
+
+ .p2align 4
+L(exit_tail2):
+ mov $2, %eax
+ ret
+
+ .p2align 4
+L(exit_tail3):
+ mov $3, %eax
+ ret
+
+ .p2align 4
+L(exit_tail4):
+ mov $4, %eax
+ ret
+
+ .p2align 4
+L(exit_tail5):
+ mov $5, %eax
+ ret
+
+ .p2align 4
+L(exit_tail6):
+ mov $6, %eax
+ ret
+
+ .p2align 4
+L(exit_tail7):
+ mov $7, %eax
+ ret
+
+END (__wcslen_sse2)
+#endif
diff --git a/sysdeps/i386/i686/multiarch/wcslen.S b/sysdeps/i386/i686/multiarch/wcslen.S
new file mode 100644
index 0000000..5867037
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/wcslen.S
@@ -0,0 +1,56 @@
+/* Multiple versions of wcslen
+ Copyright (C) 2011 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#ifndef NOT_IN_libc
+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+ .globl __i686.get_pc_thunk.bx
+ .hidden __i686.get_pc_thunk.bx
+ .p2align 4
+ .type __i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+ movl (%esp), %ebx
+ ret
+
+ .text
+ENTRY(__wcslen)
+ .type __wcslen, @gnu_indirect_function
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+ call __i686.get_pc_thunk.bx
+ addl $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
+ jne 1f
+ call __init_cpu_features
+1: leal __wcslen_ia32@GOTOFF(%ebx), %eax
+ testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal __wcslen_sse2@GOTOFF(%ebx), %eax
+2: popl %ebx
+ cfi_adjust_cfa_offset (-4);
+ cfi_restore (ebx)
+ ret
+END(__wcslen)
+
+weak_alias(__wcslen, wcslen)
+#endif
diff --git a/wcsmbs/wcslen.c b/wcsmbs/wcslen.c
index 1bced4b..4d7972b 100644
--- a/wcsmbs/wcslen.c
+++ b/wcsmbs/wcslen.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
+/* Copyright (C) 1995, 1996, 1997, 1998, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
@@ -19,10 +19,13 @@
#include <wchar.h>
-
/* Return length of string S. */
+#ifndef WCSLEN
+# define WCSLEN __wcslen
+#endif
+
size_t
-__wcslen (s)
+WCSLEN (s)
const wchar_t *s;
{
size_t len = 0;
@@ -40,4 +43,6 @@ __wcslen (s)
return len;
}
+#ifndef WCSLEN
weak_alias (__wcslen, wcslen)
+#endif
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 15 +
NEWS | 4 +-
string/strnlen.c | 11 +-
sysdeps/i386/i686/multiarch/Makefile | 1 +
sysdeps/i386/i686/multiarch/strlen-sse2.S | 440 +++++++++++++++++++++++++---
sysdeps/i386/i686/multiarch/strnlen-c.c | 8 +
sysdeps/i386/i686/multiarch/strnlen-sse2.S | 3 +
sysdeps/i386/i686/multiarch/strnlen.S | 56 ++++
sysdeps/i386/i686/multiarch/wcslen-c.c | 5 +
sysdeps/i386/i686/multiarch/wcslen-sse2.S | 194 ++++++++++++
sysdeps/i386/i686/multiarch/wcslen.S | 56 ++++
wcsmbs/wcslen.c | 11 +-
12 files changed, 757 insertions(+), 47 deletions(-)
create mode 100644 sysdeps/i386/i686/multiarch/strnlen-c.c
create mode 100644 sysdeps/i386/i686/multiarch/strnlen-sse2.S
create mode 100644 sysdeps/i386/i686/multiarch/strnlen.S
create mode 100644 sysdeps/i386/i686/multiarch/wcslen-c.c
create mode 100644 sysdeps/i386/i686/multiarch/wcslen-sse2.S
create mode 100644 sysdeps/i386/i686/multiarch/wcslen.S
hooks/post-receive
--
GNU C Library master sources