This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
GNU C Library master sources branch hjl/benchtests/master created. glibc-2.23-159-g2e12444

From: hjl at sourceware dot org
To: glibc-cvs at sourceware dot org
Date: 4 Apr 2016 14:02:21 -0000
Subject: GNU C Library master sources branch hjl/benchtests/master created. glibc-2.23-159-g2e12444
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, hjl/benchtests/master has been created
        at  2e124448475bebf7a180cdb03ce27e0461bb02e4 (commit)

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=2e124448475bebf7a180cdb03ce27e0461bb02e4

commit 2e124448475bebf7a180cdb03ce27e0461bb02e4
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Mon Apr 4 05:45:31 2016 -0700

    Force 32-bit displacement in memmove-vec-unaligned-erms.S

diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index 4589c24..2bc61d1 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -84,8 +84,6 @@ ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_2))
 L(start):
 	cmpq	$VEC_SIZE, %rdx
 	jb	L(less_vec)
-	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
-	jae	L(large_data)
 	cmpq	$(VEC_SIZE * 2), %rdx
 	ja	L(more_2x_vec)
 	/* From VEC and to 2 * VEC.  No branch when size == VEC_SIZE.  */
@@ -158,8 +156,6 @@ ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
 L(start_erms):
 	cmpq	$VEC_SIZE, %rdx
 	jb	L(less_vec)
-	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
-	jae	L(large_data)
 	cmpq	$(VEC_SIZE * 2), %rdx
 	ja	L(movsb_more_2x_vec)
 L(last_2x_vec):
@@ -191,13 +187,18 @@ L(nop):
 	ret
 
 	.p2align 4
+L(more_2x_vec):
+	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
+	jae	L(large_data)
+	jmp	L(start_more_2x_vec)
+
+	.p2align 4
 L(movsb_more_2x_vec):
+	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
+	jae	L(large_data)
 	cmpq	$REP_MOVSB_THRESHOLD, %rdx
-	/* Force 32-bit displacement to avoid long nop between
-	   instructions.  */
-	ja.d32	L(movsb)
-	.p2align 4
-L(more_2x_vec):
+	ja	L(movsb)
+L(start_more_2x_vec):
 	/* More than 2 * VEC.  */
 	cmpq	%rsi, %rdi
 	jb	L(copy_forward)
@@ -210,7 +211,13 @@ L(copy_forward):
 	leaq	(%rdi,%rdx), %rcx
 	cmpq	%rcx, %rsi
 	jb	L(more_2x_vec_overlap)
+# if  VEC_SIZE == 64
+	/* Force 32-bit displacement to avoid long nop between
+	   instructions.  */
+	VMOVU.d32 (%rsi), %VEC(0)
+# else
 	VMOVU	(%rsi), %VEC(0)
+# endif
 	VMOVU	VEC_SIZE(%rsi), %VEC(1)
 	VMOVU	-VEC_SIZE(%rsi,%rdx), %VEC(2)
 	VMOVU	-(VEC_SIZE * 2)(%rsi,%rdx), %VEC(3)
@@ -219,9 +226,7 @@ L(copy_forward):
 	VMOVU	%VEC(2), -VEC_SIZE(%rdi,%rdx)
 	VMOVU	%VEC(3), -(VEC_SIZE * 2)(%rdi,%rdx)
 	cmpq	$(VEC_SIZE * 4), %rdx
-	/* Force 32-bit displacement to avoid long nop between
-	   instructions.  */
-	jbe.d32	L(return)
+	jbe	L(return)
 	VMOVU	(VEC_SIZE * 2)(%rsi), %VEC(0)
 	VMOVU	(VEC_SIZE * 3)(%rsi), %VEC(1)
 	VMOVU	-(VEC_SIZE * 3)(%rsi,%rdx), %VEC(2)
@@ -231,13 +236,7 @@ L(copy_forward):
 	VMOVU	%VEC(2), -(VEC_SIZE * 3)(%rdi,%rdx)
 	VMOVU	%VEC(3), -(VEC_SIZE * 4)(%rdi,%rdx)
 	cmpq	$(VEC_SIZE * 8), %rdx
-# if  VEC_SIZE == 16
 	jbe	L(return)
-# else
-	/* Use 8-bit displacement to avoid long nop between
-	   instructions.  */
-	jbe	L(return_disp8)
-# endif
 	leaq	(VEC_SIZE * 4)(%rdi), %rcx
 	addq	%rdi, %rdx
 	andq	$-(VEC_SIZE * 4), %rdx
@@ -246,14 +245,19 @@ L(copy_forward):
 	subq	%rdi, %r11
 	addq	%r11, %rsi
 	cmpq	%rdx, %rcx
-	/* Use 8-bit displacement to avoid long nop between
-	   instructions.  */
-	je	L(return_disp8)
+	je	L(return)
 	movq	%rsi, %r10
 	subq	%rcx, %r10
-	leaq	VEC_SIZE(%r10), %r9
+	/* Force 32-bit displacement to avoid long nop between
+	   instructions.  */
+	leaq.d32 VEC_SIZE(%r10), %r9
+# if  VEC_SIZE == 16
+	leaq.d32 (VEC_SIZE * 2)(%r10), %r8
+	leaq.d32 (VEC_SIZE * 3)(%r10), %r11
+# else
 	leaq	(VEC_SIZE * 2)(%r10), %r8
 	leaq	(VEC_SIZE * 3)(%r10), %r11
+# endif
 	.p2align 4
 L(loop):
 	VMOVU	(%rcx,%r10), %VEC(0)
@@ -267,7 +271,6 @@ L(loop):
 	addq	$(VEC_SIZE * 4), %rcx
 	cmpq	%rcx, %rdx
 	jne	L(loop)
-L(return_disp8):
 	VZEROUPPER
 	ret
 L(less_vec):
@@ -424,8 +427,10 @@ L(loop_8x_vec_forward):
 
 	.p2align 4
 L(more_8x_vec_backward):
-	leaq	-VEC_SIZE(%rsi, %rdx), %rcx
-	leaq	-VEC_SIZE(%rdi, %rdx), %r9
+	/* Force 32-bit displacement to avoid long nop between
+	   instructions.  */
+	leaq.d32 -VEC_SIZE(%rsi, %rdx), %rcx
+	leaq.d32 -VEC_SIZE(%rdi, %rdx), %r9
 
 	.p2align 4
 L(loop_8x_vec_backward):
@@ -466,13 +471,15 @@ L(large_data):
 	je	L(nop)
 	/* Align destination for access with non-temporal stores in the
 	   loop.  Compute how much destination is misaligned.  */
-	movl	%edi, %ecx
+	movq	%rdi, %rcx
 	movq	%rdi, %r9
-	andl	$(VEC_SIZE - 1), %ecx
-	/* Load the first VEC and store it at the end.  */
-	VMOVU	(%rsi), %VEC(4)
-	/* Skip if destination is aligned.  */
-	jz	1f
+	andq	$(VEC_SIZE - 1), %rcx
+	/* Load the first VEC and store it at the end.  Force 32-bit
+	   displacement to avoid long nop between instructions.  */
+	VMOVU.d32 (%rsi), %VEC(4)
+	/* Skip if destination is aligned.  Force 32-bit displacement
+	   to avoid long nop between instructions.  */
+	jz.d32	1f
 	/* Get the negative of offset for alignment.  */
 	subq	$VEC_SIZE, %rcx
 	/* Adjust source.  */
@@ -482,7 +489,9 @@ L(large_data):
 	/* Adjust length.  */
 	addq	%rcx, %rdx
 1:
-	leaq	(%rdi,%rdx), %rcx
+	/* Force 32-bit displacement to avoid long nop between
+	   instructions.  */
+	leaq.d32 (%rdi,%rdx), %rcx
 	.p2align 4
 L(loop_large_forward):
 	/* Copy 4 * VEC a time forward with non-temporal stores.  */
@@ -514,16 +523,25 @@ L(loop_large_forward):
 
 	.p2align 4
 L(copy_large_backward):
-	leaq	-VEC_SIZE(%rdi, %rdx), %r9
-	leaq	-VEC_SIZE(%rsi, %rdx), %rcx
+	/* Force 32-bit displacement to avoid long nop between
+	   instructions.  */
+	leaq.d32 -VEC_SIZE(%rdi, %rdx), %r9
+	leaq.d32 -VEC_SIZE(%rsi, %rdx), %rcx
 	movl	%r9d, %r10d
 	movq	%r9, %r11
 	/* Compute how much the end of destination is misaligned.  */
 	andl	$(VEC_SIZE - 1), %r10d
-	/* Load the last VEC and store it at the end.  */
-	VMOVU	(%rcx), %VEC(4)
+	/* Load the last VEC and store it at the end.  Force 32-bit
+	   displacement to avoid long nop between instructions.  */
+	VMOVU.d32 (%rcx), %VEC(4)
 	/* Skip if the end of destination is aligned.  */
+# if VEC_SIZE == 16
+	/* Force 32-bit displacement to avoid long nop between
+	   instructions.  */
+	jz.d32	L(loop_large_backward)
+# else
 	jz	L(loop_large_backward)
+# endif
 	/* Adjust source.  */
 	subq	%r10, %rcx
 	/* Adjust the end of destination which should be aligned now.  */

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=0b0bd5485f738c2d6b9dfec43a0c33df62c96f64

commit 0b0bd5485f738c2d6b9dfec43a0c33df62c96f64
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Mon Apr 4 05:54:44 2016 -0700

    Force 32-bit displacement in memset-vec-unaligned-erms.S

diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
index 9383517..1745a71 100644
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
@@ -159,9 +159,21 @@ L(return):
 	.p2align 4
 L(loop_start):
 	leaq	(VEC_SIZE * 4)(%rdi), %rcx
+# if VEC_SIZE == 32 || VEC_SIZE == 64
+	/* Force 32-bit displacement to avoid long nop between
+	   instructions.  */
+	VMOVU.d32 %VEC(0), (%rdi)
+# else
 	VMOVU	%VEC(0), (%rdi)
+# endif
 	andq	$-(VEC_SIZE * 4), %rcx
+# if VEC_SIZE == 32 || VEC_SIZE == 64
+	/* Force 32-bit displacement to avoid long nop between
+	   instructions.  */
+	VMOVU.d32 %VEC(0), -VEC_SIZE(%rdi,%rdx)
+# else
 	VMOVU	%VEC(0), -VEC_SIZE(%rdi,%rdx)
+# endif
 	VMOVU	%VEC(0), VEC_SIZE(%rdi)
 	VMOVU	%VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx)
 	VMOVU	%VEC(0), (VEC_SIZE * 2)(%rdi)

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=5c201f7928a8557b8d9127f91a5e319c37738228

commit 5c201f7928a8557b8d9127f91a5e319c37738228
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Mon Apr 4 04:52:36 2016 -0700

    Add a comment in memset-sse2-unaligned-erms.S

diff --git a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
index 2deba42..4bf3d36 100644
--- a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
@@ -1,6 +1,8 @@
 #if IS_IN (libc)
 # define VEC_SIZE	16
 # define VEC(i)		xmm##i
+/* Don't use movups and movaps since it will get larger nop paddings
+   for alignment.  */
 # define VMOVU		movdqu
 # define VMOVA		movdqa
 

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=3b2e3a74f6eeda71339c0feb18a15c54b97913b1

commit 3b2e3a74f6eeda71339c0feb18a15c54b97913b1
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Sun Apr 3 21:19:45 2016 -0700

    Add __x86_shared_non_temporal_threshold

diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c
index 96463df..cae9907 100644
--- a/sysdeps/x86_64/cacheinfo.c
+++ b/sysdeps/x86_64/cacheinfo.c
@@ -464,6 +464,10 @@ long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
 /* Similar to __x86_shared_cache_size, but not rounded.  */
 long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
 
+/* Threshold to use non temporal stores.  */
+long int __x86_shared_non_temporal_threshold attribute_hidden
+  = 1024 * 1024 * 4;
+
 #ifndef DISABLE_PREFETCHW
 /* PREFETCHW support flag for use in memory and string routines.  */
 int __x86_prefetchw attribute_hidden;
@@ -661,5 +665,6 @@ init_cacheinfo (void)
       shared = shared & ~255L;
       __x86_shared_cache_size_half = shared / 2;
       __x86_shared_cache_size = shared;
+      __x86_shared_non_temporal_threshold = shared * 4;
     }
 }
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index 41e8232..4589c24 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -84,7 +84,7 @@ ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_2))
 L(start):
 	cmpq	$VEC_SIZE, %rdx
 	jb	L(less_vec)
-	cmpq	__x86_shared_cache_size_half(%rip), %rdx
+	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
 	jae	L(large_data)
 	cmpq	$(VEC_SIZE * 2), %rdx
 	ja	L(more_2x_vec)
@@ -158,7 +158,7 @@ ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
 L(start_erms):
 	cmpq	$VEC_SIZE, %rdx
 	jb	L(less_vec)
-	cmpq	__x86_shared_cache_size_half(%rip), %rdx
+	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
 	jae	L(large_data)
 	cmpq	$(VEC_SIZE * 2), %rdx
 	ja	L(movsb_more_2x_vec)

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=710404b401127acc5647b77ce7981ff62fbbaf1c

commit 710404b401127acc5647b77ce7981ff62fbbaf1c
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Sun Apr 3 21:06:30 2016 -0700

    Copy very large data with non-temporal stores

diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S
index 44711c3..126716e 100644
--- a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S
@@ -1,6 +1,8 @@
 #if IS_IN (libc)
 # define VEC_SIZE	32
 # define VEC(i)		ymm##i
+# define PREFETCHNT	prefetchnta
+# define VMOVNTU	vmovntdq
 # define VMOVU		vmovdqu
 # define VMOVA		vmovdqa
 
diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
index c2c5293..6590144 100644
--- a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
@@ -1,6 +1,8 @@
 #if defined HAVE_AVX512_ASM_SUPPORT && IS_IN (libc)
 # define VEC_SIZE	64
 # define VEC(i)		zmm##i
+# define PREFETCHNT	prefetchnta
+# define VMOVNTU	vmovntdq
 # define VMOVU		vmovdqu64
 # define VMOVA		vmovdqa64
 
diff --git a/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
index 85214fe..f7b8013 100644
--- a/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
@@ -1,8 +1,11 @@
 #if IS_IN (libc)
 # define VEC_SIZE	16
 # define VEC(i)		xmm##i
-# define VMOVU		movdqu
-# define VMOVA		movdqa
+# define PREFETCHNT	prefetchnta
+# define VMOVNTU	movntdq
+/* Use movups and movaps for smaller code sizes.  */
+# define VMOVU		movups
+# define VMOVA		movaps
 
 # define SECTION(p)		p
 # define MEMMOVE_SYMBOL(p,s)	p##_sse2_##s
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index bb9773f..41e8232 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -84,11 +84,7 @@ ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_2))
 L(start):
 	cmpq	$VEC_SIZE, %rdx
 	jb	L(less_vec)
-#if 0
 	cmpq	__x86_shared_cache_size_half(%rip), %rdx
-#else
-	cmpq	$512, %rdx
-#endif
 	jae	L(large_data)
 	cmpq	$(VEC_SIZE * 2), %rdx
 	ja	L(more_2x_vec)
@@ -407,6 +403,8 @@ L(loop_8x_vec_forward):
 	VMOVU	(VEC_SIZE * 5)(%rsi), %VEC(5)
 	VMOVU	(VEC_SIZE * 6)(%rsi), %VEC(6)
 	VMOVU	(VEC_SIZE * 7)(%rsi), %VEC(7)
+	addq	$(VEC_SIZE * 8), %rsi
+	subq	$(VEC_SIZE * 8), %rdx
 	VMOVU	%VEC(0), (%rdi)
 	VMOVU	%VEC(1), VEC_SIZE(%rdi)
 	VMOVU	%VEC(2), (VEC_SIZE * 2)(%rdi)
@@ -416,8 +414,6 @@ L(loop_8x_vec_forward):
 	VMOVU	%VEC(6), (VEC_SIZE * 6)(%rdi)
 	VMOVU	%VEC(7), (VEC_SIZE * 7)(%rdi)
 	addq	$(VEC_SIZE * 8), %rdi
-	addq	$(VEC_SIZE * 8), %rsi
-	subq	$(VEC_SIZE * 8), %rdx
 	cmpq	$(VEC_SIZE * 8), %rdx
 	je	L(between_4x_vec_and_8x_vec)
 	ja	L(loop_8x_vec_forward)
@@ -442,6 +438,8 @@ L(loop_8x_vec_backward):
 	VMOVU	-(VEC_SIZE * 5)(%rcx), %VEC(5)
 	VMOVU	-(VEC_SIZE * 6)(%rcx), %VEC(6)
 	VMOVU	-(VEC_SIZE * 7)(%rcx), %VEC(7)
+	subq	$(VEC_SIZE * 8), %rcx
+	subq	$(VEC_SIZE * 8), %rdx
 	VMOVU	%VEC(0), (%r9)
 	VMOVU	%VEC(1), -VEC_SIZE(%r9)
 	VMOVU	%VEC(2), -(VEC_SIZE * 2)(%r9)
@@ -450,9 +448,7 @@ L(loop_8x_vec_backward):
 	VMOVU	%VEC(5), -(VEC_SIZE * 5)(%r9)
 	VMOVU	%VEC(6), -(VEC_SIZE * 6)(%r9)
 	VMOVU	%VEC(7), -(VEC_SIZE * 7)(%r9)
-	subq	$(VEC_SIZE * 8), %rcx
 	subq	$(VEC_SIZE * 8), %r9
-	subq	$(VEC_SIZE * 8), %rdx
 	cmpq	$(VEC_SIZE * 8), %rdx
 	je	L(between_4x_vec_and_8x_vec)
 	ja	L(loop_8x_vec_backward)
@@ -464,29 +460,51 @@ L(loop_8x_vec_backward):
 	.p2align 4
 L(large_data):
 	/* Copy very large data with non-temporal stores.  */
-	leaq	(%rdi,%rdx), %rcx
 	cmpq	%rsi, %rdi
 	ja	L(copy_large_backward)
 	/* Source == destination is less common.  */
 	je	L(nop)
+	/* Align destination for access with non-temporal stores in the
+	   loop.  Compute how much destination is misaligned.  */
+	movl	%edi, %ecx
+	movq	%rdi, %r9
+	andl	$(VEC_SIZE - 1), %ecx
+	/* Load the first VEC and store it at the end.  */
+	VMOVU	(%rsi), %VEC(4)
+	/* Skip if destination is aligned.  */
+	jz	1f
+	/* Get the negative of offset for alignment.  */
+	subq	$VEC_SIZE, %rcx
+	/* Adjust source.  */
+	subq	%rcx, %rsi
+	/* Adjust destination which should be aligned now.  */
+	subq	%rcx, %rdi
+	/* Adjust length.  */
+	addq	%rcx, %rdx
+1:
+	leaq	(%rdi,%rdx), %rcx
+	.p2align 4
 L(loop_large_forward):
 	/* Copy 4 * VEC a time forward with non-temporal stores.  */
-	prefetchnta (VEC_SIZE * 14)(%rsi)
-	prefetchnta (VEC_SIZE * 20)(%rsi)
+	PREFETCHNT (VEC_SIZE * 14)(%rsi)
+	PREFETCHNT (VEC_SIZE * 20)(%rsi)
 	VMOVU	(%rsi), %VEC(0)
 	VMOVU	VEC_SIZE(%rsi), %VEC(1)
 	VMOVU	(VEC_SIZE * 2)(%rsi), %VEC(2)
 	VMOVU	(VEC_SIZE * 3)(%rsi), %VEC(3)
-	VMOVU	%VEC(0), (%rdi)
-	VMOVU	%VEC(1), VEC_SIZE(%rdi)
-	VMOVU	%VEC(2), (VEC_SIZE * 2)(%rdi)
-	VMOVU	%VEC(3), (VEC_SIZE * 3)(%rdi)
-	addq	$(VEC_SIZE * 4), %rdi
 	addq	$(VEC_SIZE * 4), %rsi
 	subq	$(VEC_SIZE * 4), %rdx
+	VMOVNTU	%VEC(0), (%rdi)
+	VMOVNTU	%VEC(1), VEC_SIZE(%rdi)
+	VMOVNTU	%VEC(2), (VEC_SIZE * 2)(%rdi)
+	VMOVNTU	%VEC(3), (VEC_SIZE * 3)(%rdi)
+	addq	$(VEC_SIZE * 4), %rdi
 	cmpq	$(VEC_SIZE * 4), %rdx
-	je	L(last_4x_vec)
 	ja	L(loop_large_forward)
+	sfence
+	/* Store the first VEC.  */
+	VMOVU	%VEC(4), (%r9)
+	je	L(last_4x_vec)
 	/* Less than 4 * VEC to copy.  */
 	cmpq	$VEC_SIZE, %rdx
 	jb	L(less_vec)
@@ -496,27 +514,44 @@ L(loop_large_forward):
 
 	.p2align 4
 L(copy_large_backward):
-	leaq	-VEC_SIZE(%rsi, %rdx), %rcx
 	leaq	-VEC_SIZE(%rdi, %rdx), %r9
+	leaq	-VEC_SIZE(%rsi, %rdx), %rcx
+	movl	%r9d, %r10d
+	movq	%r9, %r11
+	/* Compute how much the end of destination is misaligned.  */
+	andl	$(VEC_SIZE - 1), %r10d
+	/* Load the last VEC and store it at the end.  */
+	VMOVU	(%rcx), %VEC(4)
+	/* Skip if the end of destination is aligned.  */
+	jz	L(loop_large_backward)
+	/* Adjust source.  */
+	subq	%r10, %rcx
+	/* Adjust the end of destination which should be aligned now.  */
+	subq	%r10, %r9
+	/* Adjust length.  */
+	subq	%r10, %rdx
 	.p2align 4
 L(loop_large_backward):
 	/* Copy 4 * VEC a time backward with non-temporal stores.  */
-	prefetchnta -(VEC_SIZE * 14)(%rsi)
-	prefetchnta -(VEC_SIZE * 20)(%rsi)
+	PREFETCHNT -(VEC_SIZE * 14)(%rcx)
+	PREFETCHNT -(VEC_SIZE * 20)(%rcx)
 	VMOVU	(%rcx), %VEC(0)
 	VMOVU	-VEC_SIZE(%rcx), %VEC(1)
 	VMOVU	-(VEC_SIZE * 2)(%rcx), %VEC(2)
 	VMOVU	-(VEC_SIZE * 3)(%rcx), %VEC(3)
-	VMOVU	%VEC(0), (%r9)
-	VMOVU	%VEC(1), -VEC_SIZE(%r9)
-	VMOVU	%VEC(2), -(VEC_SIZE * 2)(%r9)
-	VMOVU	%VEC(3), -(VEC_SIZE * 3)(%r9)
 	subq	$(VEC_SIZE * 4), %rcx
-	subq	$(VEC_SIZE * 4), %r9
 	subq	$(VEC_SIZE * 4), %rdx
+	VMOVNTU	%VEC(0), (%r9)
+	VMOVNTU	%VEC(1), -VEC_SIZE(%r9)
+	VMOVNTU	%VEC(2), -(VEC_SIZE * 2)(%r9)
+	VMOVNTU	%VEC(3), -(VEC_SIZE * 3)(%r9)
+	subq	$(VEC_SIZE * 4), %r9
 	cmpq	$(VEC_SIZE * 4), %rdx
-	je	L(last_4x_vec)
 	ja	L(loop_large_backward)
+	sfence
+	/* Store the last VEC.  */
+	VMOVU	%VEC(4), (%r11)
+	je	L(last_4x_vec)
 	/* Less than 4 * VEC to copy.  */
 	cmpq	$VEC_SIZE, %rdx
 	jb	L(less_vec)

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=e4b3d75451a764184f4f256e71dcf2f49907b181

commit e4b3d75451a764184f4f256e71dcf2f49907b181
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Sun Apr 3 17:21:45 2016 -0700

    Add large_data

diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index 66779a3..bb9773f 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -84,6 +84,12 @@ ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_2))
 L(start):
 	cmpq	$VEC_SIZE, %rdx
 	jb	L(less_vec)
+#if 0
+	cmpq	__x86_shared_cache_size_half(%rip), %rdx
+#else
+	cmpq	$512, %rdx
+#endif
+	jae	L(large_data)
 	cmpq	$(VEC_SIZE * 2), %rdx
 	ja	L(more_2x_vec)
 	/* From VEC and to 2 * VEC.  No branch when size == VEC_SIZE.  */
@@ -156,6 +162,8 @@ ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
 L(start_erms):
 	cmpq	$VEC_SIZE, %rdx
 	jb	L(less_vec)
+	cmpq	__x86_shared_cache_size_half(%rip), %rdx
+	jae	L(large_data)
 	cmpq	$(VEC_SIZE * 2), %rdx
 	ja	L(movsb_more_2x_vec)
 L(last_2x_vec):
@@ -452,6 +460,69 @@ L(loop_8x_vec_backward):
 	cmpq	$(VEC_SIZE * 4), %rdx
 	jb	L(between_0_and_4x_vec)
 	jmp	L(between_4x_vec_and_8x_vec)
+
+	.p2align 4
+L(large_data):
+	/* Copy very large data with non-temporal stores.  */
+	leaq	(%rdi,%rdx), %rcx
+	cmpq	%rsi, %rdi
+	ja	L(copy_large_backward)
+	/* Source == destination is less common.  */
+	je	L(nop)
+L(loop_large_forward):
+	/* Copy 4 * VEC a time forward with non-temporal stores.  */
+	prefetchnta (VEC_SIZE * 14)(%rsi)
+	prefetchnta (VEC_SIZE * 20)(%rsi)
+	VMOVU	(%rsi), %VEC(0)
+	VMOVU	VEC_SIZE(%rsi), %VEC(1)
+	VMOVU	(VEC_SIZE * 2)(%rsi), %VEC(2)
+	VMOVU	(VEC_SIZE * 3)(%rsi), %VEC(3)
+	VMOVU	%VEC(0), (%rdi)
+	VMOVU	%VEC(1), VEC_SIZE(%rdi)
+	VMOVU	%VEC(2), (VEC_SIZE * 2)(%rdi)
+	VMOVU	%VEC(3), (VEC_SIZE * 3)(%rdi)
+	addq	$(VEC_SIZE * 4), %rdi
+	addq	$(VEC_SIZE * 4), %rsi
+	subq	$(VEC_SIZE * 4), %rdx
+	cmpq	$(VEC_SIZE * 4), %rdx
+	je	L(last_4x_vec)
+	ja	L(loop_large_forward)
+	/* Less than 4 * VEC to copy.  */
+	cmpq	$VEC_SIZE, %rdx
+	jb	L(less_vec)
+	cmpq	$(VEC_SIZE * 2), %rdx
+	jbe	L(last_2x_vec)
+	jmp	L(last_4x_vec)
+
+	.p2align 4
+L(copy_large_backward):
+	leaq	-VEC_SIZE(%rsi, %rdx), %rcx
+	leaq	-VEC_SIZE(%rdi, %rdx), %r9
+	.p2align 4
+L(loop_large_backward):
+	/* Copy 4 * VEC a time backward with non-temporal stores.  */
+	prefetchnta -(VEC_SIZE * 14)(%rsi)
+	prefetchnta -(VEC_SIZE * 20)(%rsi)
+	VMOVU	(%rcx), %VEC(0)
+	VMOVU	-VEC_SIZE(%rcx), %VEC(1)
+	VMOVU	-(VEC_SIZE * 2)(%rcx), %VEC(2)
+	VMOVU	-(VEC_SIZE * 3)(%rcx), %VEC(3)
+	VMOVU	%VEC(0), (%r9)
+	VMOVU	%VEC(1), -VEC_SIZE(%r9)
+	VMOVU	%VEC(2), -(VEC_SIZE * 2)(%r9)
+	VMOVU	%VEC(3), -(VEC_SIZE * 3)(%r9)
+	subq	$(VEC_SIZE * 4), %rcx
+	subq	$(VEC_SIZE * 4), %r9
+	subq	$(VEC_SIZE * 4), %rdx
+	cmpq	$(VEC_SIZE * 4), %rdx
+	je	L(last_4x_vec)
+	ja	L(loop_large_backward)
+	/* Less than 4 * VEC to copy.  */
+	cmpq	$VEC_SIZE, %rdx
+	jb	L(less_vec)
+	cmpq	$(VEC_SIZE * 2), %rdx
+	jbe	L(last_2x_vec)
+	jmp	L(last_4x_vec)
 END (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
 
 # ifdef SHARED

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=4c12d12e316f92a9c5d28529f4a566417375a512

commit 4c12d12e316f92a9c5d28529f4a566417375a512
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Wed Mar 30 09:18:27 2016 -0700

    Add --enable-large-benchtests for large data benchmarks
    
    We like to run memcpy memmove and memset benchmarks with large data sizes.
    This patch adds --enable-large-benchtests to enable benchmarks with very
    large data.
    
    	* benchtests/Makefile (string-benchset): Add memcpy-large,
    	memmove-large and memset-large for --enable-large-benchtests.
    	* benchtests/bench-memcpy-large.c: New file.
    	* benchtests/bench-memmove-large.c: Likewise.
    	* benchtests/bench-memmove-large.c: Likewise.
    	* benchtests/bench-string.h (TIMEOUT): Don't redefine.
    	* config.make.in (run-large-benchtests): New.
    	* configure.ac: Add --enable-large-benchtests.
    	* configure: Regenerated.

diff --git a/benchtests/Makefile b/benchtests/Makefile
index a37d666..7f8ae02 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -39,6 +39,9 @@ string-benchset := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
 		   strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
 		   strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \
 		   strcoll
+ifeq (yes,$(run-large-benchtests))
+string-benchset += memcpy-large memmove-large memset-large
+endif
 wcsmbs-benchset := wcslen wcsnlen wcscpy wcpcpy wcsncpy wcpncpy wcscat wcsncat \
 		   wcscmp wcsncmp wcschr wcschrnul wcsrchr wcsspn wcspbrk wcscspn \
 		   wmemchr wmemset wmemcmp
diff --git a/benchtests/bench-memcpy-large.c b/benchtests/bench-memcpy-large.c
new file mode 100644
index 0000000..470000f
--- /dev/null
+++ b/benchtests/bench-memcpy-large.c
@@ -0,0 +1,105 @@
+/* Measure memcpy functions with large data sizes.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef MEMCPY_RESULT
+# define MEMCPY_RESULT(dst, len) dst
+# define BASE_PAGE_SIZE (1024 * 1024)
+# define MIN_PAGE_SIZE (getpagesize () + 256 * BASE_PAGE_SIZE)
+# define TEST_MAIN
+# define TEST_NAME "memcpy"
+# define TIMEOUT (20 * 60)
+# include "bench-string.h"
+
+IMPL (memcpy, 1)
+#endif
+
+typedef char *(*proto_t) (char *, const char *, size_t);
+
+static void
+do_one_test (impl_t *impl, char *dst, const char *src,
+	     size_t len)
+{
+  size_t i, iters = 16;
+  timing_t start, stop, cur;
+
+  /* It is too slow to check result with a simple implementation.  */
+
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
+    {
+      CALL (impl, dst, src, len);
+    }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
+}
+
+static void
+do_test (size_t align1, size_t align2, size_t len)
+{
+  size_t i, j;
+  char *s1, *s2;
+
+  align1 &= 63;
+  if (align1 + len >= page_size)
+    return;
+
+  align2 &= 63;
+  if (align2 + len >= page_size)
+    return;
+
+  s1 = (char *) (buf1 + align1);
+  s2 = (char *) (buf2 + align2);
+
+  for (i = 0, j = 1; i < len; i++, j += 23)
+    s1[i] = j;
+
+  printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
+
+  FOR_EACH_IMPL (impl, 0)
+    do_one_test (impl, s2, s1, len);
+
+  putchar ('\n');
+}
+
+int
+test_main (void)
+{
+  size_t i;
+
+  test_init ();
+
+  printf ("%23s", "");
+  FOR_EACH_IMPL (impl, 0)
+    printf ("\t%s", impl->name);
+  putchar ('\n');
+
+  for (i = BASE_PAGE_SIZE; i <= MIN_PAGE_SIZE; i <<= 1)
+    {
+      do_test (0, 0, i);
+      do_test (0, 1, i);
+      do_test (1, 0, i);
+      do_test (1, 1, i);
+    }
+
+  return ret;
+}
+
+#include "../test-skeleton.c"
diff --git a/benchtests/bench-memmove-large.c b/benchtests/bench-memmove-large.c
new file mode 100644
index 0000000..585b94b
--- /dev/null
+++ b/benchtests/bench-memmove-large.c
@@ -0,0 +1,103 @@
+/* Measure memmove functions with large data sizes.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define BASE_PAGE_SIZE (1024 * 1024)
+#define MIN_PAGE_SIZE (getpagesize () + 256 * BASE_PAGE_SIZE)
+#define TEST_MAIN
+#define TEST_NAME "memmove"
+#define TIMEOUT (20 * 60)
+#include "bench-string.h"
+
+IMPL (memmove, 1)
+
+typedef char *(*proto_t) (char *, const char *, size_t);
+
+static void
+do_one_test (impl_t *impl, char *dst, char *src, const char *orig_src,
+	     size_t len)
+{
+  size_t i, iters = 16;
+  timing_t start, stop, cur;
+
+  /* It is too slow to check result with a simple implementation.  */
+
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
+    {
+      CALL (impl, dst, src, len);
+    }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
+}
+
+static void
+do_test (size_t align1, size_t align2, size_t len)
+{
+  size_t i, j;
+  char *s1, *s2;
+
+  align1 &= 127;
+  if (align1 + len >= page_size)
+    return;
+
+  align2 &= 127;
+  if (align2 + len >= page_size)
+    return;
+
+  s1 = (char *) (buf1 + align1);
+  s2 = (char *) (buf2 + align2);
+
+  for (i = 0, j = 1; i < len; i++, j += 23)
+    s1[i] = j;
+
+  printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
+
+  FOR_EACH_IMPL (impl, 0)
+    do_one_test (impl, s2, (char *) (buf2 + align1), s1, len);
+
+  putchar ('\n');
+}
+
+int
+test_main (void)
+{
+  size_t i;
+
+  test_init ();
+
+  printf ("%23s", "");
+  FOR_EACH_IMPL (impl, 0)
+    printf ("\t%s", impl->name);
+  putchar ('\n');
+
+  for (i = BASE_PAGE_SIZE; i <= MIN_PAGE_SIZE; i <<= 1)
+    {
+      do_test (0, 64, i);
+      do_test (0, 1, i);
+      do_test (1, 0, i);
+      do_test (1, 2, i);
+      do_test (6, 3, i);
+    }
+
+  return ret;
+}
+
+#include "../test-skeleton.c"
diff --git a/benchtests/bench-memset-large.c b/benchtests/bench-memset-large.c
new file mode 100644
index 0000000..acb920e
--- /dev/null
+++ b/benchtests/bench-memset-large.c
@@ -0,0 +1,103 @@
+/* Measure memset functions with large data sizes.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define TEST_MAIN
+#ifndef WIDE
+# define TEST_NAME "memset"
+#else
+# define TEST_NAME "wmemset"
+#endif /* WIDE */
+#define BASE_PAGE_SIZE (1024 * 1024)
+#define MIN_PAGE_SIZE (getpagesize () + 256 * BASE_PAGE_SIZE)
+#define TIMEOUT (20 * 60)
+#include "bench-string.h"
+
+#ifndef WIDE
+# define MEMSET memset
+# define CHAR char
+# define MEMCMP memcmp
+#else
+# include <wchar.h>
+# define MEMSET wmemset
+# define CHAR wchar_t
+# define MEMCMP wmemcmp
+#endif /* WIDE */
+
+IMPL (MEMSET, 1)
+
+typedef CHAR *(*proto_t) (CHAR *, int, size_t);
+
+static void
+do_one_test (impl_t *impl, CHAR *s, int c __attribute ((unused)), size_t n)
+{
+  size_t i, iters = 16;
+  timing_t start, stop, cur;
+
+  /* It is too slow to check result with a simple implementation.  */
+
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
+    {
+      CALL (impl, s, c, n);
+    }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
+}
+
+static void
+do_test (size_t align, int c, size_t len)
+{
+  align &= 63;
+  if ((align + len) * sizeof (CHAR) > page_size)
+    return;
+
+  printf ("Length %4zd, alignment %2zd, c %2d:", len, align, c);
+
+  FOR_EACH_IMPL (impl, 0)
+    do_one_test (impl, (CHAR *) (buf1) + align, c, len);
+
+  putchar ('\n');
+}
+
+int
+test_main (void)
+{
+  size_t i;
+  int c;
+
+  test_init ();
+
+  printf ("%24s", "");
+  FOR_EACH_IMPL (impl, 0)
+    printf ("\t%s", impl->name);
+  putchar ('\n');
+
+  c = 65;
+  for (i = BASE_PAGE_SIZE; i <= MIN_PAGE_SIZE; i <<= 1)
+    {
+      do_test (0, c, i);
+      do_test (1, c, i);
+    }
+
+  return ret;
+}
+
+#include "../test-skeleton.c"
diff --git a/benchtests/bench-string.h b/benchtests/bench-string.h
index be4c618..9c5371e 100644
--- a/benchtests/bench-string.h
+++ b/benchtests/bench-string.h
@@ -56,7 +56,9 @@ extern impl_t __start_impls[], __stop_impls[];
 
 
 # define TEST_FUNCTION test_main ()
-# define TIMEOUT (4 * 60)
+# ifndef TIMEOUT
+#  define TIMEOUT (4 * 60)
+# endif
 # define OPT_ITERATIONS 10000
 # define OPT_RANDOM 10001
 # define OPT_SEED 10002
diff --git a/config.make.in b/config.make.in
index 95c6f36..b9a4dbb 100644
--- a/config.make.in
+++ b/config.make.in
@@ -89,6 +89,7 @@ link-obsolete-rpc = @link_obsolete_rpc@
 build-nscd = @build_nscd@
 use-nscd = @use_nscd@
 build-hardcoded-path-in-tests= @hardcoded_path_in_tests@
+run-large-benchtests = @large_benchtests@
 build-pt-chown = @build_pt_chown@
 enable-lock-elision = @enable_lock_elision@
 
diff --git a/configure b/configure
index 8fe5937..42bde65 100755
--- a/configure
+++ b/configure
@@ -668,6 +668,7 @@ all_warnings
 force_install
 bindnow
 enable_lock_elision
+large_benchtests
 hardcoded_path_in_tests
 enable_timezone_tools
 use_default_link
@@ -755,6 +756,7 @@ enable_shared
 enable_profile
 enable_timezone_tools
 enable_hardcoded_path_in_tests
+enable_large_benchtests
 enable_stackguard_randomization
 enable_lock_elision
 enable_add_ons
@@ -1411,6 +1413,8 @@ Optional Features:
   --enable-hardcoded-path-in-tests
                           hardcode newly built glibc path in tests
                           [default=no]
+  --enable-large-benchtests
+                          run benchtests with large data size [default=no]
   --enable-stackguard-randomization
                           initialize __stack_chk_guard canary with a random
                           number at program start
@@ -3363,6 +3367,15 @@ fi
 
 
 
+# Check whether --enable-large-benchtests was given.
+if test "${enable_large_benchtests+set}" = set; then :
+  enableval=$enable_large_benchtests; large_benchtests=$enableval
+else
+  large_benchtests=no
+fi
+
+
+
 # Check whether --enable-stackguard-randomization was given.
 if test "${enable_stackguard_randomization+set}" = set; then :
   enableval=$enable_stackguard_randomization; enable_stackguard_randomize=$enableval
diff --git a/configure.ac b/configure.ac
index 3c766b7..8fb93d9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -190,6 +190,13 @@ AC_ARG_ENABLE([hardcoded-path-in-tests],
 	      [hardcoded_path_in_tests=no])
 AC_SUBST(hardcoded_path_in_tests)
 
+AC_ARG_ENABLE([large-benchtests],
+	      AC_HELP_STRING([--enable-large-benchtests],
+			     [run benchtests with large data size @<:@default=no@:>@]),
+	      [large_benchtests=$enableval],
+	      [large_benchtests=no])
+AC_SUBST(large_benchtests)
+
 AC_ARG_ENABLE([stackguard-randomization],
 	      AC_HELP_STRING([--enable-stackguard-randomization],
 			     [initialize __stack_chk_guard canary with a random number at program start]),

-----------------------------------------------------------------------


hooks/post-receive
-- 
GNU C Library master sources
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]