This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch, master, updated. glibc-2.12-211-gff02d52


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  ff02d5280bf252e86d325ff4348feaf531ede831 (commit)
      from  344d0b545d0a0a0ab737ff333d807969721ce381 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=ff02d5280bf252e86d325ff4348feaf531ede831

commit ff02d5280bf252e86d325ff4348feaf531ede831
Author: H.J. Lu <hongjiu.lu@intel.com>
Date:   Mon Nov 8 03:41:34 2010 -0500

    Use IFUNC on x86-64 memset

diff --git a/ChangeLog b/ChangeLog
index ffa4f68..005266c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+2010-11-07  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* sysdeps/x86_64/memset.S: Check USE_MULTIARCH and USE_SSE2 for
+	IFUNC support.
+	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
+	memset-x86-64.
+	* sysdeps/x86_64/multiarch/bzero.S: New file.
+	* sysdeps/x86_64/multiarch/cacheinfo.c: New file.
+	* sysdeps/x86_64/multiarch/memset-x86-64.S: New file.
+	* sysdeps/x86_64/multiarch/memset.S: New file.
+	* sysdeps/x86_64/multiarch/memset_chk.S: New file.
+	* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
+	Set bit_Prefer_SSE_for_memop for Intel processors.
+	* sysdeps/x86_64/multiarch/init-arch.h (bit_Prefer_SSE_for_memop):
+	Define.
+	(index_Prefer_SSE_for_memop): Define.
+	(HAS_PREFER_SSE_FOR_MEMOP): Define.
+
 2010-11-04  Luis Machado  <luisgpm@br.ibm.com>
 
 	* sysdeps/powerpc/powerpc32/power7/mempcpy.S: New file.
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index 681ab87..f6eb71f 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -24,7 +24,7 @@
 #define __STOS_UPPER_BOUNDARY	$65536
 
         .text
-#ifndef NOT_IN_libc
+#if !defined NOT_IN_libc && !defined USE_MULTIARCH
 ENTRY(__bzero)
 	mov	%rsi,%rdx	/* Adjust parameter.  */
 	xorl	%esi,%esi	/* Fill with 0s.  */
@@ -34,10 +34,10 @@ weak_alias (__bzero, bzero)
 #endif
 
 #if defined PIC && !defined NOT_IN_libc
-ENTRY (__memset_chk)
+ENTRY_CHK (__memset_chk)
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
-END (__memset_chk)
+END_CHK (__memset_chk)
 #endif
 ENTRY (memset)
 L(memset_entry):
@@ -591,157 +591,13 @@ L(A6Q1):    mov    %dx,-0xe(%rdi)
 L(A7Q0):    mov    %dl,-0x7(%rdi)
 L(A6Q0):    mov    %dx,-0x6(%rdi)
 	mov    %edx,-0x4(%rdi)
-	jmp    L(aligned_now)
-
-	.balign     16
-L(aligned_now):
-
-	 cmpl   $0x1,__x86_64_preferred_memory_instruction(%rip)
-	 jg     L(SSE_pre)
-
-L(8byte_move_try):
-	cmpq	__STOS_LOWER_BOUNDARY,%r8
-	jae	L(8byte_stos_try)
-
-	.balign     16
-L(8byte_move):
-	movq	%r8,%rcx
-	shrq	$7,%rcx
-	jz	L(8byte_move_skip)
-
-	.p2align 4
-
-L(8byte_move_loop):
-	decq	%rcx
-
-	movq	%rdx,    (%rdi)
-	movq	%rdx,  8 (%rdi)
-	movq	%rdx, 16 (%rdi)
-	movq	%rdx, 24 (%rdi)
-	movq	%rdx, 32 (%rdi)
-	movq	%rdx, 40 (%rdi)
-	movq	%rdx, 48 (%rdi)
-	movq	%rdx, 56 (%rdi)
-	movq	%rdx, 64 (%rdi)
-	movq	%rdx, 72 (%rdi)
-	movq	%rdx, 80 (%rdi)
-	movq	%rdx, 88 (%rdi)
-	movq	%rdx, 96 (%rdi)
-	movq	%rdx, 104 (%rdi)
-	movq	%rdx, 112 (%rdi)
-	movq	%rdx, 120 (%rdi)
-
-	leaq	128 (%rdi),%rdi
-
-	jnz     L(8byte_move_loop)
-
-L(8byte_move_skip):
-	andl	$127,%r8d
-	lea    	(%rdi,%r8,1),%rdi
-
-#ifndef PIC
-	lea    	L(setPxQx)(%rip),%r11
-	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
-#else
-	lea    	L(Got0)(%rip),%r11
-	lea	L(setPxQx)(%rip),%rcx
-	movswq	(%rcx,%r8,2),%rcx
-	lea    	(%rcx,%r11,1),%r11
-	jmpq   	*%r11
-#endif
-
-	.balign     16
-L(8byte_stos_try):
-	mov    __x86_64_shared_cache_size(%rip),%r9d // ck largest cache size
-	cmpq	%r8,%r9		// calculate the lesser of remaining
-	cmovaq	%r8,%r9		// bytes and largest cache size
-	jbe	L(8byte_stos)
-
-L(8byte_move_reuse_try):
-	cmp	__STOS_UPPER_BOUNDARY,%r8
-	jae	L(8byte_move)
-
-	.balign     16
-L(8byte_stos):
-	movq	%r9,%rcx
-	andq	$-8,%r9
-
-	shrq	$3,%rcx
-	jz	L(8byte_stos_skip)
-
-	xchgq	%rax,%rdx
-
-	rep
-	stosq
-
-	xchgq	%rax,%rdx
-
-L(8byte_stos_skip):
-	subq	%r9,%r8
-	ja	L(8byte_nt_move)
-
-	andl	$7,%r8d
-	lea    	(%rdi,%r8,1),%rdi
-#ifndef PIC
-	lea    	L(setPxQx)(%rip),%r11
-	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
-#else
-	lea    	L(Got0)(%rip),%r11
-	lea     L(setPxQx)(%rip),%rcx
-	movswq	(%rcx,%r8,2),%rcx
-	lea	(%rcx,%r11,1),%r11
-	jmpq   	*%r11
-#endif
 
-	.balign     16
-L(8byte_nt_move):
-	movq	%r8,%rcx
-	shrq	$7,%rcx
-	jz      L(8byte_nt_move_skip)
-
-	.balign     16
-L(8byte_nt_move_loop):
-	decq	%rcx
-
-	movntiq	%rdx,     (%rdi)
-	movntiq	%rdx,   8 (%rdi)
-	movntiq	%rdx,  16 (%rdi)
-	movntiq	%rdx,  24 (%rdi)
-	movntiq	%rdx,  32 (%rdi)
-	movntiq	%rdx,  40 (%rdi)
-	movntiq	%rdx,  48 (%rdi)
-	movntiq	%rdx,  56 (%rdi)
-	movntiq	%rdx,  64 (%rdi)
-	movntiq	%rdx,  72 (%rdi)
-	movntiq	%rdx,  80 (%rdi)
-	movntiq	%rdx,  88 (%rdi)
-	movntiq	%rdx,  96 (%rdi)
-	movntiq	%rdx, 104 (%rdi)
-	movntiq	%rdx, 112 (%rdi)
-	movntiq	%rdx, 120 (%rdi)
-
-	leaq	128 (%rdi),%rdi
-
-	jnz     L(8byte_nt_move_loop)
-
-	sfence
-
-L(8byte_nt_move_skip):
-	andl	$127,%r8d
-
-	lea    	(%rdi,%r8,1),%rdi
-#ifndef PIC
-	lea    	L(setPxQx)(%rip),%r11
-	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
-#else
-	lea    	L(Got0)(%rip),%r11
-	lea     L(setPxQx)(%rip),%rcx
-	movswq	(%rcx,%r8,2),%rcx
-	lea    	(%rcx,%r11,1),%r11
-	jmpq   	*%r11
-#endif
+#ifndef USE_MULTIARCH
+	jmp    L(aligned_now)
 
 L(SSE_pre):
+#endif
+#if !defined USE_MULTIARCH || defined USE_SSE2
 	 # fill RegXMM0 with the pattern
 	 movd   %rdx,%xmm0
 	 punpcklqdq %xmm0,%xmm0
@@ -1342,11 +1198,162 @@ L(SSExDx):
 	.short     L(SSE15QB)-L(SSE0Q0)
 #endif
 	.popsection
+#endif /* !defined USE_MULTIARCH || defined USE_SSE2  */
+
+	.balign     16
+L(aligned_now):
+
+#ifndef USE_MULTIARCH
+	 cmpl   $0x1,__x86_64_preferred_memory_instruction(%rip)
+	 jg     L(SSE_pre)
+#endif /* USE_MULTIARCH */
+
+L(8byte_move_try):
+	cmpq	__STOS_LOWER_BOUNDARY,%r8
+	jae	L(8byte_stos_try)
+
+	.balign     16
+L(8byte_move):
+	movq	%r8,%rcx
+	shrq	$7,%rcx
+	jz	L(8byte_move_skip)
+
+	.p2align 4
+
+L(8byte_move_loop):
+	decq	%rcx
+
+	movq	%rdx,    (%rdi)
+	movq	%rdx,  8 (%rdi)
+	movq	%rdx, 16 (%rdi)
+	movq	%rdx, 24 (%rdi)
+	movq	%rdx, 32 (%rdi)
+	movq	%rdx, 40 (%rdi)
+	movq	%rdx, 48 (%rdi)
+	movq	%rdx, 56 (%rdi)
+	movq	%rdx, 64 (%rdi)
+	movq	%rdx, 72 (%rdi)
+	movq	%rdx, 80 (%rdi)
+	movq	%rdx, 88 (%rdi)
+	movq	%rdx, 96 (%rdi)
+	movq	%rdx, 104 (%rdi)
+	movq	%rdx, 112 (%rdi)
+	movq	%rdx, 120 (%rdi)
+
+	leaq	128 (%rdi),%rdi
+
+	jnz     L(8byte_move_loop)
+
+L(8byte_move_skip):
+	andl	$127,%r8d
+	lea    	(%rdi,%r8,1),%rdi
+
+#ifndef PIC
+	lea    	L(setPxQx)(%rip),%r11
+	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
+#else
+	lea    	L(Got0)(%rip),%r11
+	lea	L(setPxQx)(%rip),%rcx
+	movswq	(%rcx,%r8,2),%rcx
+	lea    	(%rcx,%r11,1),%r11
+	jmpq   	*%r11
+#endif
+
+	.balign     16
+L(8byte_stos_try):
+	mov    __x86_64_shared_cache_size(%rip),%r9d // ck largest cache size
+	cmpq	%r8,%r9		// calculate the lesser of remaining
+	cmovaq	%r8,%r9		// bytes and largest cache size
+	jbe	L(8byte_stos)
+
+L(8byte_move_reuse_try):
+	cmp	__STOS_UPPER_BOUNDARY,%r8
+	jae	L(8byte_move)
+
+	.balign     16
+L(8byte_stos):
+	movq	%r9,%rcx
+	andq	$-8,%r9
+
+	shrq	$3,%rcx
+	jz	L(8byte_stos_skip)
+
+	xchgq	%rax,%rdx
+
+	rep
+	stosq
+
+	xchgq	%rax,%rdx
+
+L(8byte_stos_skip):
+	subq	%r9,%r8
+	ja	L(8byte_nt_move)
+
+	andl	$7,%r8d
+	lea    	(%rdi,%r8,1),%rdi
+#ifndef PIC
+	lea    	L(setPxQx)(%rip),%r11
+	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
+#else
+	lea    	L(Got0)(%rip),%r11
+	lea     L(setPxQx)(%rip),%rcx
+	movswq	(%rcx,%r8,2),%rcx
+	lea	(%rcx,%r11,1),%r11
+	jmpq   	*%r11
+#endif
+
+	.balign     16
+L(8byte_nt_move):
+	movq	%r8,%rcx
+	shrq	$7,%rcx
+	jz      L(8byte_nt_move_skip)
+
+	.balign     16
+L(8byte_nt_move_loop):
+	decq	%rcx
+
+	movntiq	%rdx,     (%rdi)
+	movntiq	%rdx,   8 (%rdi)
+	movntiq	%rdx,  16 (%rdi)
+	movntiq	%rdx,  24 (%rdi)
+	movntiq	%rdx,  32 (%rdi)
+	movntiq	%rdx,  40 (%rdi)
+	movntiq	%rdx,  48 (%rdi)
+	movntiq	%rdx,  56 (%rdi)
+	movntiq	%rdx,  64 (%rdi)
+	movntiq	%rdx,  72 (%rdi)
+	movntiq	%rdx,  80 (%rdi)
+	movntiq	%rdx,  88 (%rdi)
+	movntiq	%rdx,  96 (%rdi)
+	movntiq	%rdx, 104 (%rdi)
+	movntiq	%rdx, 112 (%rdi)
+	movntiq	%rdx, 120 (%rdi)
+
+	leaq	128 (%rdi),%rdi
+
+	jnz     L(8byte_nt_move_loop)
+
+	sfence
+
+L(8byte_nt_move_skip):
+	andl	$127,%r8d
+
+	lea    	(%rdi,%r8,1),%rdi
+#ifndef PIC
+	lea    	L(setPxQx)(%rip),%r11
+	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
+#else
+	lea    	L(Got0)(%rip),%r11
+	lea     L(setPxQx)(%rip),%rcx
+	movswq	(%rcx,%r8,2),%rcx
+	lea    	(%rcx,%r11,1),%r11
+	jmpq   	*%r11
+#endif
 
 END (memset)
 libc_hidden_builtin_def (memset)
 
-#if defined PIC && !defined NOT_IN_libc
+#if defined PIC && !defined NOT_IN_libc && !defined USE_MULTIARCH
 strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
 	.section .gnu.warning.__memset_zero_constant_len_parameter
 	.string "memset used with constant zero length parameter; this could be due to transposed parameters"
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 5d2e34e..19aa4be 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -8,7 +8,8 @@ sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
 		   strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
 		   memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
 		   memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
-		   strncase_l-ssse3 strlen-sse4 strlen-no-bsf
+		   strncase_l-ssse3 strlen-sse4 strlen-no-bsf \
+		   memset-x86-64
 ifeq (yes,$(config-cflags-sse4))
 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
 CFLAGS-varshift.c += -msse4
diff --git a/sysdeps/x86_64/multiarch/bzero.S b/sysdeps/x86_64/multiarch/bzero.S
new file mode 100644
index 0000000..9c9eebd
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/bzero.S
@@ -0,0 +1,56 @@
+/* Multiple versions of bzero
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+	.text
+ENTRY(__bzero)
+	.type	__bzero, @gnu_indirect_function
+	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	jne	1f
+	call	__init_cpu_features
+1:	leaq	__bzero_x86_64(%rip), %rax
+	testl	$bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
+	jz	2f
+	leaq	__bzero_sse2(%rip), %rax
+2:	ret
+END(__bzero)
+
+	.type	__bzero_sse2, @function
+__bzero_sse2:
+	cfi_startproc
+	CALL_MCOUNT
+	mov	%rsi,%rdx	/* Adjust parameter.  */
+	xorl	%esi,%esi	/* Fill with 0s.  */
+	jmp	__memset_sse2
+	cfi_endproc
+	.size __bzero_sse2, .-__bzero_sse2
+
+	.type	__bzero_x86_64, @function
+__bzero_x86_64:
+	cfi_startproc
+	CALL_MCOUNT
+	mov	%rsi,%rdx	/* Adjust parameter.  */
+	xorl	%esi,%esi	/* Fill with 0s.  */
+	jmp	__memset_x86_64
+	cfi_endproc
+	.size __bzero_x86_64, .-__bzero_x86_64
+
+weak_alias (__bzero, bzero)
diff --git a/sysdeps/x86_64/multiarch/cacheinfo.c b/sysdeps/x86_64/multiarch/cacheinfo.c
new file mode 100644
index 0000000..f87b8dc
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/cacheinfo.c
@@ -0,0 +1,2 @@
+#define DISABLE_PREFERRED_MEMORY_INSTRUCTION
+#include "../cacheinfo.c"
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 786466d..15bc904 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -59,6 +59,11 @@ __init_cpu_features (void)
 
       get_common_indeces (&family, &model);
 
+      /* Intel processors prefer SSE instruction for memory/string
+	 routines if they are avaiable.  */
+      __cpu_features.feature[index_Prefer_SSE_for_memop]
+	|= bit_Prefer_SSE_for_memop;
+
       unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
       unsigned int extended_family = (eax >> 20) & 0xff;
       unsigned int extended_model = (eax >> 12) & 0xf0;
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 783b020..6e409b8 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -16,9 +16,10 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
-#define bit_Fast_Rep_String	(1 << 0)
-#define bit_Fast_Copy_Backward	(1 << 1)
-#define bit_Slow_BSF		(1 << 2)
+#define bit_Fast_Rep_String		(1 << 0)
+#define bit_Fast_Copy_Backward		(1 << 1)
+#define bit_Slow_BSF			(1 << 2)
+#define bit_Prefer_SSE_for_memop	(1 << 3)
 
 #ifdef	__ASSEMBLER__
 
@@ -37,6 +38,7 @@
 # define index_Fast_Rep_String		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Slow_BSF			FEATURE_INDEX_1*FEATURE_SIZE
+# define index_Prefer_SSE_for_memop	FEATURE_INDEX_1*FEATURE_SIZE
 
 #else	/* __ASSEMBLER__ */
 
@@ -109,6 +111,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_Fast_Rep_String		FEATURE_INDEX_1
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1
 # define index_Slow_BSF			FEATURE_INDEX_1
+# define index_Prefer_SSE_for_memop	FEATURE_INDEX_1
 
 #define HAS_ARCH_FEATURE(idx, bit) \
   ((__get_cpu_features ()->feature[idx] & (bit)) != 0)
@@ -122,4 +125,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 #define HAS_SLOW_BSF \
   HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF)
 
+#define HAS_PREFER_SSE_FOR_MEMOP \
+  HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop)
+
 #endif	/* __ASSEMBLER__ */
diff --git a/sysdeps/x86_64/multiarch/memset-x86-64.S b/sysdeps/x86_64/multiarch/memset-x86-64.S
new file mode 100644
index 0000000..5e8cfb3
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset-x86-64.S
@@ -0,0 +1,18 @@
+#include <sysdep.h>
+
+#ifndef NOT_IN_libc
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
+	.type __memset_chk_x86_64, @function; \
+	.globl __memset_chk_x86_64; \
+	.p2align 4; \
+	__memset_chk_x86_64: cfi_startproc; \
+	CALL_MCOUNT
+# undef END_CHK
+# define END_CHK(name) \
+	cfi_endproc; .size __memset_chk_x86_64, .-__memset_chk_x86_64
+
+# define libc_hidden_builtin_def(name)
+# define memset __memset_x86_64
+# include "../memset.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S
new file mode 100644
index 0000000..a8d0e9e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset.S
@@ -0,0 +1,74 @@
+/* Multiple versions of memset
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib.  */
+#ifndef NOT_IN_libc
+ENTRY(memset)
+	.type	memset, @gnu_indirect_function
+	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	jne	1f
+	call	__init_cpu_features
+1:	leaq	__memset_x86_64(%rip), %rax
+	testl	$bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
+	jz	2f
+	leaq	__memset_sse2(%rip), %rax
+2:	ret
+END(memset)
+
+# define USE_SSE2 1
+
+# undef ENTRY
+# define ENTRY(name) \
+	.type __memset_sse2, @function; \
+	.globl __memset_sse2; \
+	.p2align 4; \
+	__memset_sse2: cfi_startproc; \
+	CALL_MCOUNT
+# undef END
+# define END(name) \
+	cfi_endproc; .size __memset_sse2, .-__memset_sse2
+
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
+	.type __memset_chk_sse2, @function; \
+	.globl __memset_chk_sse2; \
+	.p2align 4; \
+	__memset_chk_sse2: cfi_startproc; \
+	CALL_MCOUNT
+# undef END_CHK
+# define END_CHK(name) \
+	cfi_endproc; .size __memset_chk_sse2, .-__memset_chk_sse2
+
+# ifdef SHARED
+#  undef libc_hidden_builtin_def
+/* It doesn't make sense to send libc-internal memset calls through a PLT.
+   The speedup we get from using GPR instruction is likely eaten away
+   by the indirect call in the PLT.  */
+#  define libc_hidden_builtin_def(name) \
+	.globl __GI_memset; __GI_memset = __memset_sse2
+# endif
+
+# undef strong_alias
+# define strong_alias(original, alias)
+#endif
+
+#include "../memset.S"
diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset_chk.S
new file mode 100644
index 0000000..16afe60
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset_chk.S
@@ -0,0 +1,44 @@
+/* Multiple versions of __memset_chk
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib.  */
+#ifndef NOT_IN_libc
+# ifdef SHARED
+ENTRY(__memset_chk)
+	.type	__memset_chk, @gnu_indirect_function
+	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	jne	1f
+	call	__init_cpu_features
+1:	leaq	__memset_chk_x86_64(%rip), %rax
+	testl	$bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
+	jz	2f
+	leaq	__memset_chk_sse2(%rip), %rax
+2:	ret
+END(__memset_chk)
+
+strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
+	.section .gnu.warning.__memset_zero_constant_len_parameter
+	.string "memset used with constant zero length parameter; this could be due to transposed parameters"
+# else
+#  include "../memset_chk.S"
+# endif
+#endif

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                |   18 ++
 sysdeps/x86_64/memset.S                  |  311 +++++++++++++++---------------
 sysdeps/x86_64/multiarch/Makefile        |    3 +-
 sysdeps/x86_64/multiarch/bzero.S         |   56 ++++++
 sysdeps/x86_64/multiarch/cacheinfo.c     |    2 +
 sysdeps/x86_64/multiarch/init-arch.c     |    5 +
 sysdeps/x86_64/multiarch/init-arch.h     |   12 +-
 sysdeps/x86_64/multiarch/memset-x86-64.S |   18 ++
 sysdeps/x86_64/multiarch/memset.S        |   74 +++++++
 sysdeps/x86_64/multiarch/memset_chk.S    |   44 +++++
 10 files changed, 387 insertions(+), 156 deletions(-)
 create mode 100644 sysdeps/x86_64/multiarch/bzero.S
 create mode 100644 sysdeps/x86_64/multiarch/cacheinfo.c
 create mode 100644 sysdeps/x86_64/multiarch/memset-x86-64.S
 create mode 100644 sysdeps/x86_64/multiarch/memset.S
 create mode 100644 sysdeps/x86_64/multiarch/memset_chk.S


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]