This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch, master, updated. glibc-2.12-127-ge73015f


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  e73015f2d666b66c69f3c6f350e2dcd3df5b075a (commit)
      from  224c3568f622f367dbbcd3acbcb6cc673ef0a606 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=e73015f2d666b66c69f3c6f350e2dcd3df5b075a

commit e73015f2d666b66c69f3c6f350e2dcd3df5b075a
Author: H.J. Lu <hongjiu.lu@intel.com>
Date:   Wed Aug 25 10:07:37 2010 -0700

    Unroll 32bit SSE strlen and handle slow bsf

diff --git a/ChangeLog b/ChangeLog
index a2853ca..b118a20 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2010-08-25  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
+	strlen-sse2 strlen-sse2-bsf.
+	* sysdeps/i386/i686/multiarch/strlen.S (strlen): Return
+	__strlen_sse2_bsf if bit_Slow_BSF is unset.
+	(__strlen_sse2): Removed.
+	* sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S: New file.
+	* sysdeps/i386/i686/multiarch/strlen-sse2.S: New file.
+	* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): Set
+	bit_Slow_BSF for Atom.
+	* sysdeps/x86_64/multiarch/init-arch.h (bit_Slow_BSF): Define.
+	(index_Slow_BSF): Define.
+	(HAS_SLOW_BSF): Define.
+
 2010-08-25  Ulrich Drepper  <drepper@redhat.com>
 
 	[BZ #10851]
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index 26f3e58..271d1e0 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -9,7 +9,8 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
 		   memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \
 		   memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \
 		   strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \
-		   memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift
+		   memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift \
+		   strlen-sse2 strlen-sse2-bsf
 ifeq (yes,$(config-cflags-sse4))
 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
 CFLAGS-strcspn-c.c += -msse4
diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
new file mode 100644
index 0000000..0dc651f
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
@@ -0,0 +1,127 @@
+/* strlen with SSE2 and BSF
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#if defined SHARED && !defined NOT_IN_libc
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define CFI_PUSH(REG)						\
+  cfi_adjust_cfa_offset (4);					\
+  cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)						\
+  cfi_adjust_cfa_offset (-4);					\
+  cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#define POP(REG)	popl REG; CFI_POP (REG)
+#define PARMS		4 + 8	/* Preserve ESI and EDI.  */
+#define	STR		PARMS
+#define ENTRANCE	PUSH (%esi); PUSH (%edi); cfi_remember_state
+#define RETURN		POP (%edi); POP (%esi); ret; \
+			cfi_restore_state; cfi_remember_state
+
+	.text
+ENTRY ( __strlen_sse2_bsf)
+	ENTRANCE
+	mov	STR(%esp), %edi
+	xor	%eax, %eax
+	mov	%edi, %ecx
+	and	$0x3f, %ecx
+	pxor	%xmm0, %xmm0
+	cmp	$0x30, %ecx
+	ja	L(next)
+	movdqu	(%edi), %xmm1
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	jnz	L(exit_less16)
+	mov	%edi, %eax
+	and	$-16, %eax
+	jmp	L(align16_start)
+L(next):
+
+	mov	%edi, %eax
+	and	$-16, %eax
+	pcmpeqb	(%eax), %xmm0
+	mov	$-1, %esi
+	sub	%eax, %ecx
+	shl	%cl, %esi
+	pmovmskb %xmm0, %edx
+	and	%esi, %edx
+	jnz	L(exit)
+L(align16_start):
+	pxor	%xmm0, %xmm0
+	pxor	%xmm1, %xmm1
+	pxor	%xmm2, %xmm2
+	pxor	%xmm3, %xmm3
+	.p2align 4
+L(align16_loop):
+	pcmpeqb	16(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	jnz	L(exit16)
+
+	pcmpeqb	32(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	jnz	L(exit32)
+
+	pcmpeqb	48(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	jnz	L(exit48)
+
+	pcmpeqb	64(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	64(%eax), %eax
+	test	%edx, %edx
+	jz	L(align16_loop)
+L(exit):
+	sub	%edi, %eax
+L(exit_less16):
+	bsf	%edx, %edx
+	add	%edx, %eax
+	RETURN
+L(exit16):
+	sub	%edi, %eax
+	bsf	%edx, %edx
+	add	%edx, %eax
+	add	$16, %eax
+	RETURN
+L(exit32):
+	sub	%edi, %eax
+	bsf	%edx, %edx
+	add	%edx, %eax
+	add	$32, %eax
+	RETURN
+L(exit48):
+	sub	%edi, %eax
+	bsf	%edx, %edx
+	add	%edx, %eax
+	add	$48, %eax
+	POP (%edi)
+	POP (%esi)
+	ret
+
+END ( __strlen_sse2_bsf)
+
+#endif
diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2.S b/sysdeps/i386/i686/multiarch/strlen-sse2.S
new file mode 100644
index 0000000..65809d9
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strlen-sse2.S
@@ -0,0 +1,347 @@
+/* strlen with SSE2
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#if defined SHARED && !defined NOT_IN_libc
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define CFI_PUSH(REG)						\
+  cfi_adjust_cfa_offset (4);					\
+  cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)						\
+  cfi_adjust_cfa_offset (-4);					\
+  cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#define POP(REG)	popl REG; CFI_POP (REG)
+#define PARMS		4
+#define	STR		PARMS
+#define ENTRANCE
+#define RETURN		ret
+
+	.text
+ENTRY (__strlen_sse2)
+	ENTRANCE
+	mov	STR(%esp), %edx
+	xor	%eax, %eax
+	cmpb	$0, (%edx)
+	jz	L(exit_tail0)
+	cmpb	$0, 1(%edx)
+	jz	L(exit_tail1)
+	cmpb	$0, 2(%edx)
+	jz	L(exit_tail2)
+	cmpb	$0, 3(%edx)
+	jz	L(exit_tail3)
+	cmpb	$0, 4(%edx)
+	jz	L(exit_tail4)
+	cmpb	$0, 5(%edx)
+	jz	L(exit_tail5)
+	cmpb	$0, 6(%edx)
+	jz	L(exit_tail6)
+	cmpb	$0, 7(%edx)
+	jz	L(exit_tail7)
+	cmpb	$0, 8(%edx)
+	jz	L(exit_tail8)
+	cmpb	$0, 9(%edx)
+	jz	L(exit_tail9)
+	cmpb	$0, 10(%edx)
+	jz	L(exit_tail10)
+	cmpb	$0, 11(%edx)
+	jz	L(exit_tail11)
+	cmpb	$0, 12(%edx)
+	jz	L(exit_tail12)
+	cmpb	$0, 13(%edx)
+	jz	L(exit_tail13)
+	cmpb	$0, 14(%edx)
+	jz	L(exit_tail14)
+	cmpb	$0, 15(%edx)
+	jz	L(exit_tail15)
+	pxor	%xmm0, %xmm0
+	mov	%edx, %eax
+	mov	%edx, %ecx
+	and	$-16, %eax
+	add	$16, %ecx
+	add	$16, %eax
+
+	pcmpeqb	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	pxor	%xmm1, %xmm1
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	pxor	%xmm2, %xmm2
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+
+	pcmpeqb	(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	pxor	%xmm3, %xmm3
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	16(%eax), %eax
+	jnz	L(exit)
+
+	and	$-0x40, %eax
+	PUSH (%esi)
+	PUSH (%edi)
+	PUSH (%ebx)
+	PUSH (%ebp)
+	xor	%ebp, %ebp
+L(aligned_64):
+	pcmpeqb	(%eax), %xmm0
+	pcmpeqb	16(%eax), %xmm1
+	pcmpeqb	32(%eax), %xmm2
+	pcmpeqb	48(%eax), %xmm3
+	pmovmskb %xmm0, %edx
+	pmovmskb %xmm1, %esi
+	pmovmskb %xmm2, %edi
+	pmovmskb %xmm3, %ebx
+	or	%edx, %ebp
+	or	%esi, %ebp
+	or	%edi, %ebp
+	or	%ebx, %ebp
+	lea	64(%eax), %eax
+	jz	L(aligned_64)
+L(48leave):
+	test	%edx, %edx
+	jnz	L(aligned_64_exit_16)
+	test	%esi, %esi
+	jnz	L(aligned_64_exit_32)
+	test	%edi, %edi
+	jnz	L(aligned_64_exit_48)
+	mov	%ebx, %edx
+	lea	(%eax), %eax
+	jmp	L(aligned_64_exit)
+L(aligned_64_exit_48):
+	lea	-16(%eax), %eax
+	mov	%edi, %edx
+	jmp	L(aligned_64_exit)
+L(aligned_64_exit_32):
+	lea	-32(%eax), %eax
+	mov	%esi, %edx
+	jmp	L(aligned_64_exit)
+L(aligned_64_exit_16):
+	lea	-48(%eax), %eax
+L(aligned_64_exit):
+	POP (%ebp)
+	POP (%ebx)
+	POP (%edi)
+	POP (%esi)
+L(exit):
+	sub	%ecx, %eax
+	test	%dl, %dl
+	jz	L(exit_high)
+	test	$0x01, %dl
+	jnz	L(exit_tail0)
+
+	test	$0x02, %dl
+	jnz	L(exit_tail1)
+
+	test	$0x04, %dl
+	jnz	L(exit_tail2)
+
+	test	$0x08, %dl
+	jnz	L(exit_tail3)
+
+	test	$0x10, %dl
+	jnz	L(exit_tail4)
+
+	test	$0x20, %dl
+	jnz	L(exit_tail5)
+
+	test	$0x40, %dl
+	jnz	L(exit_tail6)
+	add	$7, %eax
+L(exit_tail0):
+	RETURN
+
+L(exit_high):
+	add	$8, %eax
+	test	$0x01, %dh
+	jnz	L(exit_tail0)
+
+	test	$0x02, %dh
+	jnz	L(exit_tail1)
+
+	test	$0x04, %dh
+	jnz	L(exit_tail2)
+
+	test	$0x08, %dh
+	jnz	L(exit_tail3)
+
+	test	$0x10, %dh
+	jnz	L(exit_tail4)
+
+	test	$0x20, %dh
+	jnz	L(exit_tail5)
+
+	test	$0x40, %dh
+	jnz	L(exit_tail6)
+	add	$7, %eax
+	RETURN
+
+	.p2align 4
+L(exit_tail1):
+	add	$1, %eax
+	RETURN
+
+L(exit_tail2):
+	add	$2, %eax
+	RETURN
+
+L(exit_tail3):
+	add	$3, %eax
+	RETURN
+
+L(exit_tail4):
+	add	$4, %eax
+	RETURN
+
+L(exit_tail5):
+	add	$5, %eax
+	RETURN
+
+L(exit_tail6):
+	add	$6, %eax
+	RETURN
+
+L(exit_tail7):
+	add	$7, %eax
+	RETURN
+
+L(exit_tail8):
+	add	$8, %eax
+	RETURN
+
+L(exit_tail9):
+	add	$9, %eax
+	RETURN
+
+L(exit_tail10):
+	add	$10, %eax
+	RETURN
+
+L(exit_tail11):
+	add	$11, %eax
+	RETURN
+
+L(exit_tail12):
+	add	$12, %eax
+	RETURN
+
+L(exit_tail13):
+	add	$13, %eax
+	RETURN
+
+L(exit_tail14):
+	add	$14, %eax
+	RETURN
+
+L(exit_tail15):
+	add	$15, %eax
+	ret
+
+END (__strlen_sse2)
+
+#endif
diff --git a/sysdeps/i386/i686/multiarch/strlen.S b/sysdeps/i386/i686/multiarch/strlen.S
index 9786add..9d465c8 100644
--- a/sysdeps/i386/i686/multiarch/strlen.S
+++ b/sysdeps/i386/i686/multiarch/strlen.S
@@ -1,5 +1,5 @@
 /* Multiple versions of strlen
-   Copyright (C) 2009 Free Software Foundation, Inc.
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -48,6 +48,9 @@ ENTRY(strlen)
 1:	leal	__strlen_ia32@GOTOFF(%ebx), %eax
 	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
 	jz	2f
+	leal	__strlen_sse2_bsf@GOTOFF(%ebx), %eax
+	testl	$bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx)
+	jz	2f
 	leal	__strlen_sse2@GOTOFF(%ebx), %eax
 2:	popl	%ebx
 	cfi_adjust_cfa_offset (-4);
@@ -55,84 +58,6 @@ ENTRY(strlen)
 	ret
 END(strlen)
 
-#define CFI_POP(REG)						\
-  cfi_adjust_cfa_offset (-4);					\
-  cfi_restore (REG)
-
-#define RETURN		popl %esi; CFI_POP (esi); ret
-
-	.text
-ENTRY (__strlen_sse2)
-/*
- * This implementation uses SSE instructions to compare up to 16 bytes
- * at a time looking for the end of string (null char).
- */
-	pushl	%esi
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (%esi, 0)
-	mov	8(%esp), %eax
-	mov	%eax, %ecx
-	pxor	%xmm0, %xmm0		/* 16 null chars */
-	mov	%eax, %esi
-	and	$15, %ecx
-	jz	1f			/* string is 16 byte aligned */
-
-	/*
-	* Unaligned case. Round down to 16-byte boundary before comparing
-	* 16 bytes for a null char. The code then compensates for any extra chars
-	* preceding the start of the string.
-	*/
-	and	$-16, %esi
-
-	pcmpeqb	(%esi), %xmm0
-	lea	16(%eax), %esi
-	pmovmskb %xmm0, %edx
-
-	shr	%cl, %edx		/* Compensate for bytes preceding the string */
-	test	%edx, %edx
-	jnz	2f
-	sub	%ecx, %esi		/* no null, adjust to next 16-byte boundary */
-	pxor	%xmm0, %xmm0		/* clear xmm0, may have been changed... */
-
-	.p2align 4
-1:					/* 16 byte aligned */
-	pcmpeqb	(%esi), %xmm0		/* look for null bytes */
-	pmovmskb %xmm0, %edx		/* move each byte mask of %xmm0 to edx */
-
-	add	$16, %esi		/* prepare to search next 16 bytes */
-	test	%edx, %edx		/* if no null byte, %edx must be 0 */
-	jnz	2f			/* found a null */
-
-	pcmpeqb	(%esi), %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %esi
-	test	%edx, %edx
-	jnz	2f
-
-	pcmpeqb	(%esi), %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %esi
-	test	%edx, %edx
-	jnz	2f
-
-	pcmpeqb	(%esi), %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %esi
-	test	%edx, %edx
-	jz	1b
-
-2:
-	neg	%eax
-	lea	-16(%eax, %esi), %eax	/* calculate exact offset */
-	bsf	%edx, %ecx		/* Least significant 1 bit is index of null */
-	add	%ecx, %eax
-	popl	%esi
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (%esi)
-	ret
-
-END (__strlen_sse2)
-
 # undef ENTRY
 # define ENTRY(name) \
 	.type __strlen_ia32, @function; \
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 55c9f54..786466d 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -72,6 +72,12 @@ __init_cpu_features (void)
 	  model += extended_model;
 	  switch (model)
 	    {
+	    case 0x1c:
+	    case 0x26:
+	      /* BSF is slow on Atom.  */
+	      __cpu_features.feature[index_Slow_BSF] |= bit_Slow_BSF;
+	      break;
+
 	    case 0x1a:
 	    case 0x1e:
 	    case 0x1f:
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 4a211c0..783b020 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -18,6 +18,7 @@
 
 #define bit_Fast_Rep_String	(1 << 0)
 #define bit_Fast_Copy_Backward	(1 << 1)
+#define bit_Slow_BSF		(1 << 2)
 
 #ifdef	__ASSEMBLER__
 
@@ -35,6 +36,7 @@
 
 # define index_Fast_Rep_String		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1*FEATURE_SIZE
+# define index_Slow_BSF			FEATURE_INDEX_1*FEATURE_SIZE
 
 #else	/* __ASSEMBLER__ */
 
@@ -106,6 +108,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 
 # define index_Fast_Rep_String		FEATURE_INDEX_1
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1
+# define index_Slow_BSF			FEATURE_INDEX_1
 
 #define HAS_ARCH_FEATURE(idx, bit) \
   ((__get_cpu_features ()->feature[idx] & (bit)) != 0)
@@ -116,4 +119,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 #define HAS_FAST_COPY_BACKWARD \
   HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward)
 
+#define HAS_SLOW_BSF \
+  HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF)
+
 #endif	/* __ASSEMBLER__ */

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                     |   15 +
 sysdeps/i386/i686/multiarch/Makefile          |    3 +-
 sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S |  127 +++++++++
 sysdeps/i386/i686/multiarch/strlen-sse2.S     |  347 +++++++++++++++++++++++++
 sysdeps/i386/i686/multiarch/strlen.S          |   83 +------
 sysdeps/x86_64/multiarch/init-arch.c          |    6 +
 sysdeps/x86_64/multiarch/init-arch.h          |    6 +
 7 files changed, 507 insertions(+), 80 deletions(-)
 create mode 100644 sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
 create mode 100644 sysdeps/i386/i686/multiarch/strlen-sse2.S


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]