This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch neleai/strlen created. glibc-2.17-172-g3d4bf13


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, neleai/strlen has been created
        at  3d4bf1319ff1c00654b41ff54a317c0d8ca616c9 (commit)

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=3d4bf1319ff1c00654b41ff54a317c0d8ca616c9

commit 3d4bf1319ff1c00654b41ff54a317c0d8ca616c9
Author: Ondrej Bilka <neleai@seznam.cz>
Date:   Tue Jan 29 14:53:31 2013 +0100

    Faster strlen for x64

diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index dd6c27d..41cc882 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -10,14 +10,14 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
 		   strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
 		   memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
 		   memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
-		   strncase_l-ssse3 strlen-sse4 strlen-sse2-no-bsf memset-x86-64 \
+		   strncase_l-ssse3 memset-x86-64 \
 		   strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
 		   strcpy-sse2-unaligned strncpy-sse2-unaligned \
 		   stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
 		   strcat-sse2-unaligned strncat-sse2-unaligned \
-		   strcat-ssse3 strncat-ssse3 strlen-sse2-pminub \
-		   strnlen-sse2-no-bsf strrchr-sse2-no-bsf strchr-sse2-no-bsf \
-		   memcmp-ssse3
+		   strcat-ssse3 strncat-ssse3  \
+		   strrchr-sse2-no-bsf strchr-sse2-no-bsf \
+		   memcmp-ssse3 
 ifeq (yes,$(config-cflags-sse4))
 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
 CFLAGS-varshift.c += -msse4
@@ -30,6 +30,7 @@ CFLAGS-strcasestr-nonascii.c += -msse4
 endif
 endif
 
+
 ifeq ($(subdir),wcsmbs)
 sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcscpy-ssse3 wcscpy-c
 endif
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 643cb2d..ffa0f63 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -188,9 +188,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
 
   /* Support sysdeps/x86_64/multiarch/strnlen.S.  */
-  IFUNC_IMPL (i, name, strnlen,
-	      IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2_no_bsf)
-	      IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2))
 
   /* Support sysdeps/x86_64/multiarch/strpbrk.S.  */
   IFUNC_IMPL (i, name, strpbrk,
@@ -264,9 +261,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 
   /* Support sysdeps/x86_64/multiarch/strlen.S.  */
   IFUNC_IMPL (i, name, strlen,
-	      IFUNC_IMPL_ADD (array, i, strlen, HAS_SSE4_2, __strlen_sse42)
-	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2_pminub)
-	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2_no_bsf)
 	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2)
 	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2))
 
diff --git a/sysdeps/x86_64/multiarch/strlen.S b/sysdeps/x86_64/multiarch/strlen.S
deleted file mode 100644
index ab29cef..0000000
--- a/sysdeps/x86_64/multiarch/strlen.S
+++ /dev/null
@@ -1,68 +0,0 @@
-/* Multiple versions of strlen(str) -- determine the length of the string STR.
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2009-2013 Free Software Foundation, Inc.
-   Contributed by Ulrich Drepper <drepper@redhat.com>.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-
-/* Define multiple versions only for the definition in libc and for
-   the DSO.  In static binaries we need strlen before the initialization
-   happened.  */
-#if defined SHARED && !defined NOT_IN_libc
-	.text
-ENTRY(strlen)
-	.type	strlen, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
-	jne	1f
-	call	__init_cpu_features
-1:	leaq	__strlen_sse2_pminub(%rip), %rax
-	testl	$bit_Prefer_PMINUB_for_stringop, __cpu_features+FEATURE_OFFSET+index_Prefer_PMINUB_for_stringop(%rip)
-	jnz	2f
-	leaq	__strlen_sse2(%rip), %rax
-	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
-	jz	2f
-	leaq	__strlen_sse42(%rip), %rax
-	ret
-2:	testl	$bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
-	jz	3f
-	leaq    __strlen_sse2_no_bsf(%rip), %rax
-3:	ret
-END(strlen)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __strlen_sse2, @function; \
-	.align 16; \
-	.globl __strlen_sse2; \
-	.hidden __strlen_sse2; \
-	__strlen_sse2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __strlen_sse2, .-__strlen_sse2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strlen calls through a PLT.
-   The speedup we get from using SSE4.2 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_strlen; __GI_strlen = __strlen_sse2
-#endif
-
-#include "../strlen.S"
diff --git a/sysdeps/x86_64/multiarch/strnlen-sse2-no-bsf.S b/sysdeps/x86_64/multiarch/strnlen-sse2-no-bsf.S
deleted file mode 100644
index 248328d..0000000
--- a/sysdeps/x86_64/multiarch/strnlen-sse2-no-bsf.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STRNLEN
-#define STRLEN __strnlen_sse2_no_bsf
-#include "strlen-sse2-no-bsf.S"
diff --git a/sysdeps/x86_64/multiarch/strnlen.S b/sysdeps/x86_64/multiarch/strnlen.S
deleted file mode 100644
index 124f845..0000000
--- a/sysdeps/x86_64/multiarch/strnlen.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* multiple version of strnlen
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2013 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-
-/* Define multiple versions only for the definition in libc.  */
-#ifndef NOT_IN_libc
-
-	.text
-ENTRY(__strnlen)
-	.type	__strnlen, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
-	jne	1f
-	call	__init_cpu_features
-1:	leaq	__strnlen_sse2(%rip), %rax
-	testl	$bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
-	jz	2f
-	leaq	__strnlen_sse2_no_bsf(%rip), %rax
-2:	ret
-END(__strnlen)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __strnlen_sse2, @function; \
-	.align 16; \
-	.globl __strnlen_sse2; \
-	.hidden __strnlen_sse2; \
-	__strnlen_sse2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __strnlen_sse2, .-__strnlen_sse2
-
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
-	.globl __GI_strnlen; __GI_strnlen = __strnlen_sse2
-#endif
-
-#include "../strnlen.S"
diff --git a/sysdeps/x86_64/rtld-strlen.S b/sysdeps/x86_64/rtld-strlen.S
index bb23b55..71be00b 100644
--- a/sysdeps/x86_64/rtld-strlen.S
+++ b/sysdeps/x86_64/rtld-strlen.S
@@ -1,138 +1 @@
-/* strlen(str) -- determine the length of the string STR.
-   Copyright (C) 2002-2013 Free Software Foundation, Inc.
-   Based on i486 version contributed by Ulrich Drepper <drepper@redhat.com>.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-#include "bp-sym.h"
-#include "bp-asm.h"
-
-
-	.text
-ENTRY (strlen)
-	movq %rdi, %rcx		/* Duplicate source pointer. */
-	andl $7, %ecx		/* mask alignment bits */
-	movq %rdi, %rax		/* duplicate destination.  */
-	jz 1f			/* aligned => start loop */
-
-	neg %ecx		/* We need to align to 8 bytes.  */
-	addl $8,%ecx
-	/* Search the first bytes directly.  */
-0:	cmpb $0x0,(%rax)	/* is byte NUL? */
-	je 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-	decl %ecx
-	jnz 0b
-
-1:	movq $0xfefefefefefefeff,%r8 /* Save magic.  */
-
-	.p2align 4		/* Align loop.  */
-4:	/* Main Loop is unrolled 4 times.  */
-	/* First unroll.  */
-	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
-	addq $8,%rax		/* adjust pointer for next word */
-	movq %r8, %rdx		/* magic value */
-	addq %rcx, %rdx		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc 3f			/* highest byte is NUL => return pointer */
-	xorq %rcx, %rdx		/* (word+magic)^word */
-	orq %r8, %rdx		/* set all non-carry bits */
-	incq %rdx		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jnz 3f			/* found NUL => return pointer */
-
-	/* Second unroll.  */
-	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
-	addq $8,%rax		/* adjust pointer for next word */
-	movq %r8, %rdx		/* magic value */
-	addq %rcx, %rdx		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc 3f			/* highest byte is NUL => return pointer */
-	xorq %rcx, %rdx		/* (word+magic)^word */
-	orq %r8, %rdx		/* set all non-carry bits */
-	incq %rdx		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jnz 3f			/* found NUL => return pointer */
-
-	/* Third unroll.  */
-	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
-	addq $8,%rax		/* adjust pointer for next word */
-	movq %r8, %rdx		/* magic value */
-	addq %rcx, %rdx		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc 3f			/* highest byte is NUL => return pointer */
-	xorq %rcx, %rdx		/* (word+magic)^word */
-	orq %r8, %rdx		/* set all non-carry bits */
-	incq %rdx		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jnz 3f			/* found NUL => return pointer */
-
-	/* Fourth unroll.  */
-	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
-	addq $8,%rax		/* adjust pointer for next word */
-	movq %r8, %rdx		/* magic value */
-	addq %rcx, %rdx		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc 3f			/* highest byte is NUL => return pointer */
-	xorq %rcx, %rdx		/* (word+magic)^word */
-	orq %r8, %rdx		/* set all non-carry bits */
-	incq %rdx		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jz 4b			/* no NUL found => continue loop */
-
-	.p2align 4		/* Align, it's a jump target.  */
-3:	subq $8,%rax		/* correct pointer increment.  */
-
-	testb %cl, %cl		/* is first byte NUL? */
-	jz 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-
-	testb %ch, %ch		/* is second byte NUL? */
-	jz 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-
-	testl $0x00ff0000, %ecx /* is third byte NUL? */
-	jz 2f			/* yes => return pointer */
-	incq %rax		/* increment pointer */
-
-	testl $0xff000000, %ecx /* is fourth byte NUL? */
-	jz 2f			/* yes => return pointer */
-	incq %rax		/* increment pointer */
-
-	shrq $32, %rcx		/* look at other half.  */
-
-	testb %cl, %cl		/* is first byte NUL? */
-	jz 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-
-	testb %ch, %ch		/* is second byte NUL? */
-	jz 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-
-	testl $0xff0000, %ecx	/* is third byte NUL? */
-	jz 2f			/* yes => return pointer */
-	incq %rax		/* increment pointer */
-2:
-	subq %rdi, %rax		/* compute difference to string start */
-	ret
-END (strlen)
-libc_hidden_builtin_def (strlen)
+#include "sysdeps/x86_64/strlen.S"
diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
index 4bdca0a..33dd371 100644
--- a/sysdeps/x86_64/strlen.S
+++ b/sysdeps/x86_64/strlen.S
@@ -1,7 +1,4 @@
-/* strlen(str) -- determine the length of the string STR.
-   Copyright (C) 2009-2013 Free Software Foundation, Inc.
-   Contributed by Ulrich Drepper <drepper@redhat.com>.
-   This file is part of the GNU C Library.
+/* Copyright (C) 2012-2013 Free Software Foundation, Inc.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -17,85 +14,187 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <sysdep.h>
+#include <sysdep.h> 
 
+/* Used in linker - use only %xmm8-%xmm15. */
 
-	.text
+/* Long lived register are
+  strlen(s), strnlen(s,n):
+
+  %xmm11 - zero
+  %rdi   - s
+  %r10  (s+n)&(~(64-1))
+  %r11   s+n  
+ */
+
+
+.text
 ENTRY(strlen)
-	xor	%rax, %rax
-	mov	%edi, %ecx
-	and	$0x3f, %ecx
-	pxor	%xmm0, %xmm0
-	cmp	$0x30, %ecx
-	ja	L(next)
-	movdqu	(%rdi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit_less16)
-	mov	%rdi, %rax
-	and	$-16, %rax
-	jmp	L(align16_start)
-L(next):
-	mov	%rdi, %rax
-	and	$-16, %rax
-	pcmpeqb	(%rax), %xmm0
-	mov	$-1, %esi
-	sub	%rax, %rcx
-	shl	%cl, %esi
-	pmovmskb %xmm0, %edx
-	and	%esi, %edx
-	jnz	L(exit)
-L(align16_start):
-	pxor	%xmm0, %xmm0
-	pxor	%xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	pxor	%xmm3, %xmm3
-	.p2align 4
-L(align16_loop):
-	pcmpeqb	16(%rax), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit16)
-
-	pcmpeqb	32(%rax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	jnz	L(exit32)
-
-	pcmpeqb	48(%rax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	jnz	L(exit48)
-
-	pcmpeqb	64(%rax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	64(%rax), %rax
-	test	%edx, %edx
-	jz	L(align16_loop)
-L(exit):
-	sub	%rdi, %rax
-L(exit_less16):
-	bsf	%rdx, %rdx
-	add	%rdx, %rax
-	ret
-	.p2align 4
-L(exit16):
-	sub	%rdi, %rax
-	bsf	%rdx, %rdx
-	lea	16(%rdx,%rax), %rax
-	ret
-	.p2align 4
-L(exit32):
-	sub	%rdi, %rax
-	bsf	%rdx, %rdx
-	lea	32(%rdx,%rax), %rax
-	ret
-	.p2align 4
-L(exit48):
-	sub	%rdi, %rax
-	bsf	%rdx, %rdx
-	lea	48(%rdx,%rax), %rax
-	ret
+
+#define FIND_ZERO           \
+  pcmpeqb   (%rax), %xmm8;  \
+  pcmpeqb 16(%rax), %xmm9;  \
+  pcmpeqb 32(%rax), %xmm10; \
+  pcmpeqb 48(%rax), %xmm11; \
+  pmovmskb  %xmm8,  %esi;   \
+  pmovmskb  %xmm9,  %edx;   \
+  pmovmskb  %xmm10, %r8d;   \
+  pmovmskb  %xmm11, %ecx;   \
+  salq  $16, %rdx;          \
+  salq  $16, %rcx;          \
+  orq  %rsi, %rdx;          \
+  orq  %r8, %rcx;           \
+  salq  $32, %rcx;          \
+  orq  %rcx, %rdx;
+
+#ifdef AS_STRNLEN
+/* Do not read anything when n==0. */
+  test %rsi,%rsi
+  jne L(n_nonzero)
+  xor %rax,%rax
+  ret
+L(n_nonzero):
+
+/*Initialize long lived registers. */
+
+  add  %rdi,%rsi
+  mov  %rsi,%r10
+  and  $-64,%r10 
+  mov  %rsi,%r11
+#endif
+
+
+  pxor %xmm8,%xmm8
+  pxor %xmm9,%xmm9
+  pxor %xmm10,%xmm10
+  pxor %xmm11,%xmm11
+  movq  %rdi, %rax
+  movq  %rdi, %rcx
+  andq $4095, %rcx
+  cmpq $4032, %rcx
+  /* We cannot unify this branching as it would be ~6 cycles slower. */
+  ja L(next)
+
+#ifdef AS_STRNLEN
+#define STRNLEN_PROLOG \
+  mov   %r11,%rsi;     \
+  subq  %rax,%rsi;     \
+  andq  $-64,%rax;     \
+  testq $-64,%rsi;     \
+  je L(strnlen_ret) ;
+#else 
+#define STRNLEN_PROLOG  andq $-64,%rax;
+#endif
+
+#define PROLOG(lab) \
+  FIND_ZERO;        \
+  movq %rdi,%rcx;   \
+  xorq %rax,%rcx;   \
+  STRNLEN_PROLOG;   \
+  sarq %cl, %rdx;   \
+  test %rdx,%rdx;   \
+  je L(lab);        \
+  bsfq %rdx,%rax;   \
+  ret
+
+  andq $-16,%rax
+  PROLOG(loop)
+
+  L(next):
+  andq $-64,%rax
+  PROLOG(loop_init)
+
+#ifdef AS_STRNLEN
+  /*We must do this check to correctly handle strnlen(s,-1) */
+  L(strnlen_ret):
+  bts  %rsi ,%rdx
+  sarq %cl  ,%rdx
+  test %rdx ,%rdx
+  je   L(loop_init)
+  bsfq %rdx,%rax
+  ret
+#endif
+
+L(loop_init):
+  pxor %xmm9  ,%xmm9
+  pxor %xmm10 ,%xmm10
+  pxor %xmm11 ,%xmm11
+#ifdef AS_STRNLEN
+L(loop):
+
+  addq $64 ,%rax
+  cmpq %rax,%r10
+  je   L(exit_end)
+
+  movdqa    (%rax), %xmm8
+  pminub  16(%rax), %xmm8
+  pminub  32(%rax), %xmm8
+  pminub  48(%rax), %xmm8
+  pcmpeqb   %xmm11, %xmm8
+  pmovmskb   %xmm8, %edx
+  testl %edx, %edx
+  jne L(exit)
+  jmp L(loop)
+
+  L(exit_end):
+  cmp %rax  , %r11
+  je L(first)
+  pxor %xmm8, %xmm8
+  FIND_ZERO
+  L(first):
+  bts   %r11, %rdx
+
+  bsfq  %rdx, %rdx       
+  addq  %rdx, %rax       
+  subq  %rdi, %rax       
+  ret  
+ 
+  L(exit):
+  pxor %xmm8, %xmm8
+  FIND_ZERO
+
+  bsfq  %rdx, %rdx       
+  addq  %rdx, %rax       
+  subq  %rdi, %rax       
+  ret
+
+#else
+L(loop):
+
+  movdqa  64(%rax), %xmm8
+  pminub  80(%rax), %xmm8
+  pminub  96(%rax), %xmm8
+  pminub 112(%rax), %xmm8
+  pcmpeqb   %xmm11, %xmm8
+  pmovmskb   %xmm8, %edx
+  testl  %edx, %edx
+  jne  L(exit64)
+
+  subq  $-128, %rax
+
+  movdqa    (%rax), %xmm8
+  pminub  16(%rax), %xmm8
+  pminub  32(%rax), %xmm8
+  pminub  48(%rax), %xmm8
+  pcmpeqb   %xmm11, %xmm8
+  pmovmskb   %xmm8, %edx
+  testl  %edx, %edx
+  jne  L(exit0)
+  jmp  L(loop)
+
+  L(exit64):
+  addq  $64, %rax
+  L(exit0):
+  pxor %xmm8,%xmm8
+  FIND_ZERO
+
+  bsfq  %rdx, %rdx       
+  addq  %rdx, %rax       
+  subq  %rdi, %rax       
+  ret
+  
+#endif  
+
 END(strlen)
 libc_hidden_builtin_def (strlen)
+weak_alias(strlen,__strlen_sse2)
diff --git a/sysdeps/x86_64/strnlen.S b/sysdeps/x86_64/strnlen.S
index 6e53503..02af2e0 100644
--- a/sysdeps/x86_64/strnlen.S
+++ b/sysdeps/x86_64/strnlen.S
@@ -1,63 +1,7 @@
-/* strnlen(str,maxlen) -- determine the length of the string STR up to MAXLEN.
-   Copyright (C) 2010-2013 Free Software Foundation, Inc.
-   Contributed by Ulrich Drepper <drepper@redhat.com>.
-   This file is part of the GNU C Library.
+#define AS_STRNLEN
+#define strlen __strnlen
+#define __strlen_sse2 __strnlen_sse2
+#include "sysdeps/x86_64/strlen.S"
+weak_alias(__strnlen,strnlen);
+weak_alias(__strnlen,__GI_strnlen);
 
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-
-	.text
-ENTRY(__strnlen)
-	movq	%rsi, %rax
-	testq	%rsi, %rsi
-	jz	3f
-	pxor	%xmm2, %xmm2
-	movq	%rdi, %rcx
-	movq	%rdi, %r8
-	movq	$16, %r9
-	andq	$~15, %rdi
-	movdqa	%xmm2, %xmm1
-	pcmpeqb	(%rdi), %xmm2
-	orl	$0xffffffff, %r10d
-	subq	%rdi, %rcx
-	shll	%cl, %r10d
-	subq	%rcx, %r9
-	pmovmskb %xmm2, %edx
-	andl	%r10d, %edx
-	jnz	1f
-	subq	%r9, %rsi
-	jbe	3f
-
-2:	movdqa	16(%rdi), %xmm0
-	leaq	16(%rdi), %rdi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %edx
-	testl	%edx, %edx
-	jnz	1f
-	subq	$16, %rsi
-	jnbe	2b
-3:	ret
-
-1:	subq	%r8, %rdi
-	bsfl	%edx, %edx
-	addq	%rdi, %rdx
-	cmpq	%rdx, %rax
-	cmovnbq	%rdx, %rax
-	ret
-END(__strnlen)
-weak_alias (__strnlen, strnlen)
-libc_hidden_def (strnlen)

-----------------------------------------------------------------------


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]