This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch hjl/cacheline/master created. glibc-2.23-235-g9cdb7e7


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, hjl/cacheline/master has been created
        at  9cdb7e72b694274d76fce00a23b87efde2e0d28b (commit)

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=9cdb7e72b694274d76fce00a23b87efde2e0d28b

commit 9cdb7e72b694274d76fce00a23b87efde2e0d28b
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Sun Apr 24 10:53:25 2016 -0700

    Align to cacheline

diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index 0a2bf41..c140cb3 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -376,6 +376,27 @@ L(more_8x_vec):
 	subq	%r8, %rdi
 	/* Adjust length.  */
 	addq	%r8, %rdx
+#if CACHELINE_SIZE != VEC_SIZE
+	movl	%edi, %r8d
+	andl	$(CACHELINE_SIZE - 1), %r8d
+	je	L(loop_4x_vec_forward_pre)
+# if CACHELINE_SIZE == (VEC_SIZE * 4)
+	/* Cacheline misaligned by VEC_SIZE, 2 * VEC_SIZE, or
+	   3 * VEC_SIZE.  */
+	cmpl	$(VEC_SIZE * 2), %r8d
+	je	L(misaligned_by_2x_vec_forward)
+	jb	L(misaligned_by_3x_vec_forward)
+# elif CACHELINE_SIZE != (VEC_SIZE * 2)
+#  error Unsupported CACHELINE_SIZE!
+# endif
+	/* Cacheline misaligned by VEC_SIZE.  */
+	VMOVU	(%rsi), %VEC(0)
+	addq	$VEC_SIZE, %rsi
+	subq	$VEC_SIZE, %rdx
+	VMOVA	%VEC(0), (%rdi)
+	addq	$VEC_SIZE, %rdi
+#endif
+L(loop_4x_vec_forward_pre):
 #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
 	/* Check non-temporal store threshold.  */
 	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
@@ -406,6 +427,32 @@ L(loop_4x_vec_forward):
 	VZEROUPPER
 	ret
 
+#if CACHELINE_SIZE == (VEC_SIZE * 4)
+L(misaligned_by_2x_vec_forward):
+	/* Cacheline misaligned by 2 * VEC_SIZE.  */
+	VMOVU	(%rsi), %VEC(0)
+	VMOVU	VEC_SIZE(%rsi), %VEC(1)
+	addq	$(VEC_SIZE * 2), %rsi
+	subq	$(VEC_SIZE * 2), %rdx
+	VMOVA	%VEC(0), (%rdi)
+	VMOVA	%VEC(1), VEC_SIZE(%rdi)
+	addq	$(VEC_SIZE * 2), %rdi
+	jmp	L(loop_4x_vec_forward_pre)
+
+L(misaligned_by_3x_vec_forward):
+	/* Cacheline misaligned by 3 * VEC_SIZE.  */
+	VMOVU	(%rsi), %VEC(0)
+	VMOVU	VEC_SIZE(%rsi), %VEC(1)
+	VMOVU	(VEC_SIZE * 2)(%rsi), %VEC(2)
+	addq	$(VEC_SIZE * 3), %rsi
+	subq	$(VEC_SIZE * 3), %rdx
+	VMOVA	%VEC(0), (%rdi)
+	VMOVA	%VEC(1), VEC_SIZE(%rdi)
+	VMOVA	%VEC(2), (VEC_SIZE * 2)(%rdi)
+	addq	$(VEC_SIZE * 3), %rdi
+	jmp	L(loop_4x_vec_forward_pre)
+#endif
+
 L(more_8x_vec_backward):
 	/* Load the first 4 * VEC and last VEC to support overlapping
 	   addresses.  */
@@ -428,6 +475,27 @@ L(more_8x_vec_backward):
 	subq	%r8, %r9
 	/* Adjust length.  */
 	subq	%r8, %rdx
+#if CACHELINE_SIZE != VEC_SIZE
+	movl	%r9d, %r8d
+	andl	$(CACHELINE_SIZE - 1), %r8d
+	je	L(loop_4x_vec_backward_pre)
+# if CACHELINE_SIZE == (VEC_SIZE * 4)
+	/* Cacheline misaligned by VEC_SIZE, 2 * VEC_SIZE, or
+	   3 * VEC_SIZE.  */
+	cmpl	$(VEC_SIZE * 2), %r8d
+	je	L(misaligned_by_2x_vec_backward)
+	jb	L(misaligned_by_3x_vec_backward)
+# elif CACHELINE_SIZE != (VEC_SIZE * 2)
+#  error Unsupported CACHELINE_SIZE!
+# endif
+	/* Cacheline misaligned by VEC_SIZE.  */
+	VMOVU	(%rcx), %VEC(0)
+	subq	$VEC_SIZE, %rcx
+	subq	$VEC_SIZE, %rdx
+	VMOVA	%VEC(0), (%r9)
+	subq	$VEC_SIZE, %r9
+#endif
+L(loop_4x_vec_backward_pre):
 #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
 	/* Check non-temporal store threshold.  */
 	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
@@ -458,6 +526,32 @@ L(loop_4x_vec_backward):
 	VZEROUPPER
 	ret
 
+#if CACHELINE_SIZE == (VEC_SIZE * 4)
+L(misaligned_by_2x_vec_backward):
+	/* Cacheline misaligned by 2 * VEC_SIZE.  */
+	VMOVU	(%rcx), %VEC(0)
+	VMOVU	-VEC_SIZE(%rcx), %VEC(1)
+	subq	$(VEC_SIZE * 2), %rcx
+	subq	$(VEC_SIZE * 2), %rdx
+	VMOVA	%VEC(0), (%r9)
+	VMOVA	%VEC(1), -VEC_SIZE(%r9)
+	subq	$(VEC_SIZE * 2), %r9
+	jmp	L(loop_4x_vec_backward_pre)
+
+L(misaligned_by_3x_vec_backward):
+	/* Cacheline misaligned by 3 * VEC_SIZE.  */
+	VMOVU	(%rcx), %VEC(0)
+	VMOVU	-VEC_SIZE(%rcx), %VEC(1)
+	VMOVU	-(VEC_SIZE * 2)(%rcx), %VEC(2)
+	subq	$(VEC_SIZE * 3), %rcx
+	subq	$(VEC_SIZE * 3), %rdx
+	VMOVA	%VEC(0), (%r9)
+	VMOVA	%VEC(1), -VEC_SIZE(%r9)
+	VMOVA	%VEC(2), -(VEC_SIZE * 2)(%r9)
+	subq	$(VEC_SIZE * 3), %r9
+	jmp	L(loop_4x_vec_backward_pre)
+#endif
+
 #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
 L(large_forward):
 	/* Don't use non-temporal store if there is overlap between

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=8dd19b0b3ca334060eec990f0afa502700939ad3

commit 8dd19b0b3ca334060eec990f0afa502700939ad3
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Sun Apr 24 09:55:40 2016 -0700

    Use PREFETCHED_LOAD_SIZE in loop_4x_vec_xxx

diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index b5ffa49..0a2bf41 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -387,14 +387,14 @@ L(loop_4x_vec_forward):
 	VMOVU	VEC_SIZE(%rsi), %VEC(1)
 	VMOVU	(VEC_SIZE * 2)(%rsi), %VEC(2)
 	VMOVU	(VEC_SIZE * 3)(%rsi), %VEC(3)
-	addq	$(VEC_SIZE * 4), %rsi
-	subq	$(VEC_SIZE * 4), %rdx
+	addq	$(PREFETCHED_LOAD_SIZE), %rsi
+	subq	$(PREFETCHED_LOAD_SIZE), %rdx
 	VMOVA	%VEC(0), (%rdi)
 	VMOVA	%VEC(1), VEC_SIZE(%rdi)
 	VMOVA	%VEC(2), (VEC_SIZE * 2)(%rdi)
 	VMOVA	%VEC(3), (VEC_SIZE * 3)(%rdi)
-	addq	$(VEC_SIZE * 4), %rdi
-	cmpq	$(VEC_SIZE * 4), %rdx
+	addq	$(PREFETCHED_LOAD_SIZE), %rdi
+	cmpq	$(PREFETCHED_LOAD_SIZE), %rdx
 	ja	L(loop_4x_vec_forward)
 	/* Store the last 4 * VEC.  */
 	VMOVU	%VEC(5), (%rcx)
@@ -439,14 +439,14 @@ L(loop_4x_vec_backward):
 	VMOVU	-VEC_SIZE(%rcx), %VEC(1)
 	VMOVU	-(VEC_SIZE * 2)(%rcx), %VEC(2)
 	VMOVU	-(VEC_SIZE * 3)(%rcx), %VEC(3)
-	subq	$(VEC_SIZE * 4), %rcx
-	subq	$(VEC_SIZE * 4), %rdx
+	subq	$(PREFETCHED_LOAD_SIZE), %rcx
+	subq	$(PREFETCHED_LOAD_SIZE), %rdx
 	VMOVA	%VEC(0), (%r9)
 	VMOVA	%VEC(1), -VEC_SIZE(%r9)
 	VMOVA	%VEC(2), -(VEC_SIZE * 2)(%r9)
 	VMOVA	%VEC(3), -(VEC_SIZE * 3)(%r9)
-	subq	$(VEC_SIZE * 4), %r9
-	cmpq	$(VEC_SIZE * 4), %rdx
+	subq	$(PREFETCHED_LOAD_SIZE), %r9
+	cmpq	$(PREFETCHED_LOAD_SIZE), %rdx
 	ja	L(loop_4x_vec_backward)
 	/* Store the first 4 * VEC.  */
 	VMOVU	%VEC(4), (%rdi)

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=13fd5ab017a053bbcfc7b73bd18d3b58c9078b8f

commit 13fd5ab017a053bbcfc7b73bd18d3b58c9078b8f
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Sun Apr 24 09:43:36 2016 -0700

    Rename PREFETCH_SIZE to CACHELINE_SIZE

diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index 346d7a4..b5ffa49 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -71,33 +71,33 @@
 # define PREFETCH(addr) prefetcht0 addr
 #endif
 
-/* Assume 64-byte prefetch size.  */
-#ifndef PREFETCH_SIZE
-# define PREFETCH_SIZE 64
+/* Assume 64-byte cacheline/prefetch size.  */
+#ifndef CACHELINE_SIZE
+# define CACHELINE_SIZE 64
 #endif
 
 #define PREFETCHED_LOAD_SIZE (VEC_SIZE * 4)
 
-#if PREFETCH_SIZE == 64
-# if PREFETCHED_LOAD_SIZE == PREFETCH_SIZE
+#if CACHELINE_SIZE == 64
+# if PREFETCHED_LOAD_SIZE == CACHELINE_SIZE
 #  define PREFETCH_ONE_SET(dir, base, offset) \
 	PREFETCH ((offset)base)
-# elif PREFETCHED_LOAD_SIZE == 2 * PREFETCH_SIZE
+# elif PREFETCHED_LOAD_SIZE == 2 * CACHELINE_SIZE
 #  define PREFETCH_ONE_SET(dir, base, offset) \
 	PREFETCH ((offset)base); \
-	PREFETCH ((offset + dir * PREFETCH_SIZE)base)
-# elif PREFETCHED_LOAD_SIZE == 4 * PREFETCH_SIZE
+	PREFETCH ((offset + dir * CACHELINE_SIZE)base)
+# elif PREFETCHED_LOAD_SIZE == 4 * CACHELINE_SIZE
 #  define PREFETCH_ONE_SET(dir, base, offset) \
 	PREFETCH ((offset)base); \
-	PREFETCH ((offset + dir * PREFETCH_SIZE)base); \
-	PREFETCH ((offset + dir * PREFETCH_SIZE)base); \
-	PREFETCH ((offset + dir * PREFETCH_SIZE * 2)base); \
-	PREFETCH ((offset + dir * PREFETCH_SIZE * 3)base)
+	PREFETCH ((offset + dir * CACHELINE_SIZE)base); \
+	PREFETCH ((offset + dir * CACHELINE_SIZE)base); \
+	PREFETCH ((offset + dir * CACHELINE_SIZE * 2)base); \
+	PREFETCH ((offset + dir * CACHELINE_SIZE * 3)base)
 # else
 #   error Unsupported PREFETCHED_LOAD_SIZE!
 # endif
 #else
-# error Unsupported PREFETCH_SIZE!
+# error Unsupported CACHELINE_SIZE!
 #endif
 
 #ifndef SECTION

-----------------------------------------------------------------------


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]