This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch, master, updated. glibc-2.12-131-g107b2fa


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  107b2fa56c15572ae56fb4ed18b50b58aff39b7f (commit)
       via  623aac7f84dfddee9bcf9d51f23612479cf672ec (commit)
      from  b416a900856ff871c06b08fa2c9c943fd86597da (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=107b2fa56c15572ae56fb4ed18b50b58aff39b7f

commit 107b2fa56c15572ae56fb4ed18b50b58aff39b7f
Author: Ulrich Drepper <drepper@redhat.com>
Date:   Thu Aug 26 22:12:16 2010 -0700

    Shorten x86-64 strlen a bit.

diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
index 7880c1d..ee94377 100644
--- a/sysdeps/x86_64/strlen.S
+++ b/sysdeps/x86_64/strlen.S
@@ -1,7 +1,6 @@
 /* strlen(str) -- determine the length of the string STR.
    Copyright (C) 2009, 2010 Free Software Foundation, Inc.
    Contributed by Ulrich Drepper <drepper@redhat.com>.
-   Modified by Intel Corporation.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -85,22 +84,19 @@ L(exit_less16):
 L(exit16):
 	sub	%rdi, %rax
 	bsf	%rdx, %rdx
-	add	%rdx, %rax
-	add	$16, %rax
+	lea	16(%rdx), %rax
 	ret
 	.p2align 4
 L(exit32):
 	sub	%rdi, %rax
 	bsf	%rdx, %rdx
-	add	%rdx, %rax
-	add	$32, %rax
+	lea	32(%rdx), %rax
 	ret
 	.p2align 4
 L(exit48):
 	sub	%rdi, %rax
 	bsf	%rdx, %rdx
-	add	%rdx, %rax
-	add	$48, %rax
+	lea	48(%rdx), %rax
 	ret
 END(strlen)
 libc_hidden_builtin_def (strlen)

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=623aac7f84dfddee9bcf9d51f23612479cf672ec

commit 623aac7f84dfddee9bcf9d51f23612479cf672ec
Author: H.J. Lu <hongjiu.lu@intel.com>
Date:   Thu Aug 26 22:09:34 2010 -0700

    Unroll x86-64 strlen

diff --git a/ChangeLog b/ChangeLog
index 33fe899..6a1bd6a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2010-08-26  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* sysdeps/x86_64/strlen.S: Unroll the loop.
+	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
+	strlen-sse2 strlen-sse2-bsf.
+	* sysdeps/x86_64/multiarch/strlen.S ((strlen): Return
+	__strlen_no_bsf if bit_Slow_BSF is set.
+	(__strlen_sse42): Removed.
+	* sysdeps/x86_64/multiarch/strlen-no-bsf.S: New file.
+	* sysdeps/x86_64/multiarch/strlen-sse4.S: New file.
+
 2010-08-25  Roland McGrath  <roland@redhat.com>
 
 	* sysdeps/x86_64/multiarch/varshift.S: File removed.
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index ca0040e..5d2e34e 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -8,7 +8,7 @@ sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
 		   strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
 		   memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
 		   memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
-		   strncase_l-ssse3
+		   strncase_l-ssse3 strlen-sse4 strlen-no-bsf
 ifeq (yes,$(config-cflags-sse4))
 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
 CFLAGS-varshift.c += -msse4
diff --git a/sysdeps/x86_64/multiarch/strlen-no-bsf.S b/sysdeps/x86_64/multiarch/strlen-no-bsf.S
new file mode 100644
index 0000000..29ad822
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strlen-no-bsf.S
@@ -0,0 +1,309 @@
+/* strlen without BSF
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#if defined SHARED && !defined NOT_IN_libc
+
+#include <sysdep.h>
+
+	.text
+ENTRY (__strlen_no_bsf)
+	xor	%eax, %eax
+	cmpb	$0, (%rdi)
+	jz	L(exit_tail0)
+	cmpb	$0, 1(%rdi)
+	jz	L(exit_tail1)
+	cmpb	$0, 2(%rdi)
+	jz	L(exit_tail2)
+	cmpb	$0, 3(%rdi)
+	jz	L(exit_tail3)
+	cmpb	$0, 4(%rdi)
+	jz	L(exit_tail4)
+	cmpb	$0, 5(%rdi)
+	jz	L(exit_tail5)
+	cmpb	$0, 6(%rdi)
+	jz	L(exit_tail6)
+	cmpb	$0, 7(%rdi)
+	jz	L(exit_tail7)
+	cmpb	$0, 8(%rdi)
+	jz	L(exit_tail8)
+	cmpb	$0, 9(%rdi)
+	jz	L(exit_tail9)
+	cmpb	$0, 10(%rdi)
+	jz	L(exit_tail10)
+	cmpb	$0, 11(%rdi)
+	jz	L(exit_tail11)
+	cmpb	$0, 12(%rdi)
+	jz	L(exit_tail12)
+	cmpb	$0, 13(%rdi)
+	jz	L(exit_tail13)
+	cmpb	$0, 14(%rdi)
+	jz	L(exit_tail14)
+	cmpb	$0, 15(%rdi)
+	jz	L(exit_tail15)
+	pxor	%xmm0, %xmm0
+	mov	%rdi, %rcx
+	mov	%rdi, %rax
+	and	$-16, %rax
+	add	$16, %rax
+	add	$16, %rcx
+
+	pcmpeqb	(%rax), %xmm0
+	pmovmskb %xmm0, %edx
+	pxor	%xmm1, %xmm1
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm1
+	pmovmskb %xmm1, %edx
+	pxor	%xmm2, %xmm2
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+
+	pcmpeqb	(%rax), %xmm2
+	pmovmskb %xmm2, %edx
+	pxor	%xmm3, %xmm3
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm0
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm0
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm0
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	and	$-0x40, %rax
+	xor	%r8d, %r8d
+L(aligned_64):
+	pcmpeqb	(%rax), %xmm0
+	pcmpeqb	16(%rax), %xmm1
+	pcmpeqb	32(%rax), %xmm2
+	pcmpeqb	48(%rax), %xmm3
+	pmovmskb %xmm0, %edx
+	pmovmskb %xmm1, %esi
+	pmovmskb %xmm2, %edi
+	pmovmskb %xmm3, %r9d
+	or	%edx, %r8d
+	or	%esi, %r8d
+	or	%edi, %r8d
+	or	%r9d, %r8d
+	lea	64(%rax), %rax
+	jz	L(aligned_64)
+
+	test	%edx, %edx
+	jnz	L(aligned_64_exit_16)
+	test	%esi, %esi
+	jnz	L(aligned_64_exit_32)
+	test	%edi, %edi
+	jnz	L(aligned_64_exit_48)
+L(aligned_64_exit_64):
+	mov	%r9d, %edx
+	jmp	L(aligned_64_exit)
+L(aligned_64_exit_48):
+	lea	-16(%rax), %rax
+	mov	%edi, %edx
+	jmp	L(aligned_64_exit)
+L(aligned_64_exit_32):
+	lea	-32(%rax), %rax
+	mov	%esi, %edx
+	jmp	L(aligned_64_exit)
+L(aligned_64_exit_16):
+	lea	-48(%rax), %rax
+L(aligned_64_exit):
+L(exit):
+	sub	%rcx, %rax
+	test	%dl, %dl
+	jz	L(exit_high)
+	test	$0x01, %dl
+	jnz	L(exit_tail0)
+
+	test	$0x02, %dl
+	jnz	L(exit_tail1)
+
+	test	$0x04, %dl
+	jnz	L(exit_tail2)
+
+	test	$0x08, %dl
+	jnz	L(exit_tail3)
+
+	test	$0x10, %dl
+	jnz	L(exit_tail4)
+
+	test	$0x20, %dl
+	jnz	L(exit_tail5)
+
+	test	$0x40, %dl
+	jnz	L(exit_tail6)
+	add	$7, %eax
+L(exit_tail0):
+	ret
+
+L(exit_high):
+	add	$8, %eax
+	test	$0x01, %dh
+	jnz	L(exit_tail0)
+
+	test	$0x02, %dh
+	jnz	L(exit_tail1)
+
+	test	$0x04, %dh
+	jnz	L(exit_tail2)
+
+	test	$0x08, %dh
+	jnz	L(exit_tail3)
+
+	test	$0x10, %dh
+	jnz	L(exit_tail4)
+
+	test	$0x20, %dh
+	jnz	L(exit_tail5)
+
+	test	$0x40, %dh
+	jnz	L(exit_tail6)
+	add	$7, %eax
+	ret
+	.p2align 4
+L(exit_tail1):
+	add	$1, %eax
+	ret
+
+L(exit_tail2):
+	add	$2, %eax
+	ret
+
+L(exit_tail3):
+	add	$3, %eax
+	ret
+
+L(exit_tail4):
+	add	$4, %eax
+	ret
+
+L(exit_tail5):
+	add	$5, %eax
+	ret
+L(exit_tail6):
+	add	$6, %eax
+	ret
+L(exit_tail7):
+	add	$7, %eax
+	ret
+L(exit_tail8):
+	add	$8, %eax
+	ret
+L(exit_tail9):
+	add	$9, %eax
+	ret
+L(exit_tail10):
+	add	$10, %eax
+	ret
+L(exit_tail11):
+	add	$11, %eax
+	ret
+L(exit_tail12):
+	add	$12, %eax
+	ret
+L(exit_tail13):
+	add	$13, %eax
+	ret
+L(exit_tail14):
+	add	$14, %eax
+	ret
+L(exit_tail15):
+	add	$15, %eax
+	ret
+END (__strlen_no_bsf)
+
+#endif
diff --git a/sysdeps/x86_64/multiarch/strlen-sse4.S b/sysdeps/x86_64/multiarch/strlen-sse4.S
new file mode 100644
index 0000000..6b16ea7
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strlen-sse4.S
@@ -0,0 +1,85 @@
+/* strlen with SSE4
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+   Contributed by Ulrich Drepper <drepper@redhat.com>.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#if defined SHARED && !defined NOT_IN_libc
+
+#include <sysdep.h>
+
+	.section .text.sse4.2,"ax",@progbits
+ENTRY (__strlen_sse42)
+	pxor	%xmm1, %xmm1
+	movl	%edi, %ecx
+	movq	%rdi, %r8
+	andq	$~15, %rdi
+	xor	%edi, %ecx
+	pcmpeqb	(%rdi), %xmm1
+	pmovmskb %xmm1, %edx
+	shrl	%cl, %edx
+	shll	%cl, %edx
+	andl	%edx, %edx
+	jnz	L(less16bytes)
+	pxor	%xmm1, %xmm1
+
+	.p2align 4
+L(more64bytes_loop):
+	pcmpistri $0x08, 16(%rdi), %xmm1
+	jz	L(more32bytes)
+
+	pcmpistri $0x08, 32(%rdi), %xmm1
+	jz	L(more48bytes)
+
+	pcmpistri $0x08, 48(%rdi), %xmm1
+	jz	L(more64bytes)
+
+	add	$64, %rdi
+	pcmpistri $0x08, (%rdi), %xmm1
+	jnz	L(more64bytes_loop)
+	leaq	(%rdi,%rcx), %rax
+	subq	%r8, %rax
+	ret
+
+	.p2align 4
+L(more32bytes):
+	leaq	16(%rdi,%rcx, 1), %rax
+	subq	%r8, %rax
+	ret
+
+	.p2align 4
+L(more48bytes):
+	leaq	32(%rdi,%rcx, 1), %rax
+	subq	%r8, %rax
+	ret
+
+	.p2align 4
+L(more64bytes):
+	leaq	48(%rdi,%rcx, 1), %rax
+	subq	%r8, %rax
+	ret
+
+	.p2align 4
+L(less16bytes):
+	subq	%r8, %rdi
+	bsfl	%edx, %eax
+	addq	%rdi, %rax
+	ret
+
+END (__strlen_sse42)
+
+#endif
diff --git a/sysdeps/x86_64/multiarch/strlen.S b/sysdeps/x86_64/multiarch/strlen.S
index f964113..83a88ec 100644
--- a/sysdeps/x86_64/multiarch/strlen.S
+++ b/sysdeps/x86_64/multiarch/strlen.S
@@ -1,5 +1,5 @@
 /* strlen(str) -- determine the length of the string STR.
-   Copyright (C) 2009 Free Software Foundation, Inc.
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
    Contributed by Ulrich Drepper <drepper@redhat.com>.
    This file is part of the GNU C Library.
 
@@ -36,74 +36,12 @@ ENTRY(strlen)
 	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
 	jz	2f
 	leaq	__strlen_sse42(%rip), %rax
-2:	ret
-END(strlen)
-
-
-	.section .text.sse4.2,"ax",@progbits
-	.align 	16
-	.type	__strlen_sse42, @function
-__strlen_sse42:
-	cfi_startproc
-	CALL_MCOUNT
-	pxor	%xmm1, %xmm1
-	movl	%edi, %ecx
-	movq	%rdi, %r8
-	andq	$~15, %rdi
-	xor	%edi, %ecx
-	pcmpeqb	(%rdi), %xmm1
-	pmovmskb %xmm1, %edx
-	shrl	%cl, %edx
-	shll	%cl, %edx
-	andl	%edx, %edx
-	jnz	L(less16bytes)
-	pxor	%xmm1, %xmm1
-
-	.p2align 4
-L(more64bytes_loop):
-	pcmpistri $0x08, 16(%rdi), %xmm1
-	jz	L(more32bytes)
-
-	pcmpistri $0x08, 32(%rdi), %xmm1
-	jz	L(more48bytes)
-
-	pcmpistri $0x08, 48(%rdi), %xmm1
-	jz	L(more64bytes)
-
-	add	$64, %rdi
-	pcmpistri $0x08, (%rdi), %xmm1
-	jnz	L(more64bytes_loop)
-	leaq	(%rdi,%rcx), %rax
-	subq	%r8, %rax
-	ret
-
-	.p2align 4
-L(more32bytes):
-	leaq	16(%rdi,%rcx, 1), %rax
-	subq	%r8, %rax
-	ret
-
-	.p2align 4
-L(more48bytes):
-	leaq	32(%rdi,%rcx, 1), %rax
-	subq	%r8, %rax
-	ret
-
-	.p2align 4
-L(more64bytes):
-	leaq	48(%rdi,%rcx, 1), %rax
-	subq	%r8, %rax
 	ret
-
-	.p2align 4
-L(less16bytes):
-	subq	%r8, %rdi
-	bsfl	%edx, %eax
-	addq	%rdi, %rax
-	ret
-	cfi_endproc
-	.size	__strlen_sse42, .-__strlen_sse42
-
+2:	testl	$bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
+	jz	3f
+	leaq    __strlen_no_bsf(%rip), %rax
+3:	ret
+END(strlen)
 
 # undef ENTRY
 # define ENTRY(name) \
diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
index 93aee6b..7880c1d 100644
--- a/sysdeps/x86_64/strlen.S
+++ b/sysdeps/x86_64/strlen.S
@@ -1,6 +1,7 @@
 /* strlen(str) -- determine the length of the string STR.
-   Copyright (C) 2009 Free Software Foundation, Inc.
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
    Contributed by Ulrich Drepper <drepper@redhat.com>.
+   Modified by Intel Corporation.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -23,29 +24,83 @@
 
 	.text
 ENTRY(strlen)
-	pxor	%xmm2, %xmm2
-	movq	%rdi, %rcx
-	movq	%rdi, %r8
-	andq	$~15, %rdi
-	movdqa	%xmm2, %xmm1
-	pcmpeqb	(%rdi), %xmm2
-	orl	$0xffffffff, %esi
-	subq	%rdi, %rcx
-	shll	%cl, %esi
-	pmovmskb %xmm2, %edx
-	andl	%esi, %edx
-	jnz	1f
-
-2:	movdqa	16(%rdi), %xmm0
-	leaq	16(%rdi), %rdi
+	xor	%rax, %rax
+	mov	%edi, %ecx
+	and	$0x3f, %ecx
+	pxor	%xmm0, %xmm0
+	cmp	$0x30, %ecx
+	ja	L(next)
+	movdqu	(%rdi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pmovmskb %xmm0, %edx
-	testl	%edx, %edx
-	jz	2b
+	test	%edx, %edx
+	jnz	L(exit_less16)
+	mov	%rdi, %rax
+	and	$-16, %rax
+	jmp	L(align16_start)
+L(next):
+	mov	%rdi, %rax
+	and	$-16, %rax
+	pcmpeqb	(%rax), %xmm0
+	mov	$-1, %esi
+	sub	%rax, %rcx
+	shl	%cl, %esi
+	pmovmskb %xmm0, %edx
+	and	%esi, %edx
+	jnz	L(exit)
+L(align16_start):
+	pxor	%xmm0, %xmm0
+	pxor	%xmm1, %xmm1
+	pxor	%xmm2, %xmm2
+	pxor	%xmm3, %xmm3
+	.p2align 4
+L(align16_loop):
+	pcmpeqb	16(%rax), %xmm0
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	jnz	L(exit16)
 
-1:	subq	%r8, %rdi
-	bsfl	%edx, %eax
-	addq	%rdi, %rax
+	pcmpeqb	32(%rax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	jnz	L(exit32)
+
+	pcmpeqb	48(%rax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	jnz	L(exit48)
+
+	pcmpeqb	64(%rax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	64(%rax), %rax
+	test	%edx, %edx
+	jz	L(align16_loop)
+L(exit):
+	sub	%rdi, %rax
+L(exit_less16):
+	bsf	%rdx, %rdx
+	add	%rdx, %rax
+	ret
+	.p2align 4
+L(exit16):
+	sub	%rdi, %rax
+	bsf	%rdx, %rdx
+	add	%rdx, %rax
+	add	$16, %rax
+	ret
+	.p2align 4
+L(exit32):
+	sub	%rdi, %rax
+	bsf	%rdx, %rdx
+	add	%rdx, %rax
+	add	$32, %rax
+	ret
+	.p2align 4
+L(exit48):
+	sub	%rdi, %rax
+	bsf	%rdx, %rdx
+	add	%rdx, %rax
+	add	$48, %rax
 	ret
 END(strlen)
 libc_hidden_builtin_def (strlen)

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                          |   11 +
 sysdeps/x86_64/multiarch/Makefile                  |    2 +-
 .../multiarch/strlen-no-bsf.S}                     |  220 ++++++++------------
 sysdeps/x86_64/multiarch/strlen-sse4.S             |   85 ++++++++
 sysdeps/x86_64/multiarch/strlen.S                  |   74 +------
 sysdeps/x86_64/strlen.S                            |   93 +++++++--
 6 files changed, 266 insertions(+), 219 deletions(-)
 copy sysdeps/{i386/i686/multiarch/strlen-sse2.S => x86_64/multiarch/strlen-no-bsf.S} (63%)
 create mode 100644 sysdeps/x86_64/multiarch/strlen-sse4.S


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]