This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch hjl/cacheline/master created. glibc-2.23-235-g9cdb7e7
- From: hjl at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 25 Apr 2016 12:02:04 -0000
- Subject: GNU C Library master sources branch hjl/cacheline/master created. glibc-2.23-235-g9cdb7e7
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, hjl/cacheline/master has been created
at 9cdb7e72b694274d76fce00a23b87efde2e0d28b (commit)
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=9cdb7e72b694274d76fce00a23b87efde2e0d28b
commit 9cdb7e72b694274d76fce00a23b87efde2e0d28b
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Apr 24 10:53:25 2016 -0700
Align to cacheline
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index 0a2bf41..c140cb3 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -376,6 +376,27 @@ L(more_8x_vec):
subq %r8, %rdi
/* Adjust length. */
addq %r8, %rdx
+#if CACHELINE_SIZE != VEC_SIZE
+ movl %edi, %r8d
+ andl $(CACHELINE_SIZE - 1), %r8d
+ je L(loop_4x_vec_forward_pre)
+# if CACHELINE_SIZE == (VEC_SIZE * 4)
+ /* Cacheline misaligned by VEC_SIZE, 2 * VEC_SIZE, or
+ 3 * VEC_SIZE. */
+ cmpl $(VEC_SIZE * 2), %r8d
+ je L(misaligned_by_2x_vec_forward)
+ jb L(misaligned_by_3x_vec_forward)
+# elif CACHELINE_SIZE != (VEC_SIZE * 2)
+# error Unsupported CACHELINE_SIZE!
+# endif
+ /* Cacheline misaligned by VEC_SIZE. */
+ VMOVU (%rsi), %VEC(0)
+ addq $VEC_SIZE, %rsi
+ subq $VEC_SIZE, %rdx
+ VMOVA %VEC(0), (%rdi)
+ addq $VEC_SIZE, %rdi
+#endif
+L(loop_4x_vec_forward_pre):
#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
/* Check non-temporal store threshold. */
cmpq __x86_shared_non_temporal_threshold(%rip), %rdx
@@ -406,6 +427,32 @@ L(loop_4x_vec_forward):
VZEROUPPER
ret
+#if CACHELINE_SIZE == (VEC_SIZE * 4)
+L(misaligned_by_2x_vec_forward):
+ /* Cacheline misaligned by 2 * VEC_SIZE. */
+ VMOVU (%rsi), %VEC(0)
+ VMOVU VEC_SIZE(%rsi), %VEC(1)
+ addq $(VEC_SIZE * 2), %rsi
+ subq $(VEC_SIZE * 2), %rdx
+ VMOVA %VEC(0), (%rdi)
+ VMOVA %VEC(1), VEC_SIZE(%rdi)
+ addq $(VEC_SIZE * 2), %rdi
+ jmp L(loop_4x_vec_forward_pre)
+
+L(misaligned_by_3x_vec_forward):
+ /* Cacheline misaligned by 3 * VEC_SIZE. */
+ VMOVU (%rsi), %VEC(0)
+ VMOVU VEC_SIZE(%rsi), %VEC(1)
+ VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2)
+ addq $(VEC_SIZE * 3), %rsi
+ subq $(VEC_SIZE * 3), %rdx
+ VMOVA %VEC(0), (%rdi)
+ VMOVA %VEC(1), VEC_SIZE(%rdi)
+ VMOVA %VEC(2), (VEC_SIZE * 2)(%rdi)
+ addq $(VEC_SIZE * 3), %rdi
+ jmp L(loop_4x_vec_forward_pre)
+#endif
+
L(more_8x_vec_backward):
/* Load the first 4 * VEC and last VEC to support overlapping
addresses. */
@@ -428,6 +475,27 @@ L(more_8x_vec_backward):
subq %r8, %r9
/* Adjust length. */
subq %r8, %rdx
+#if CACHELINE_SIZE != VEC_SIZE
+ movl %r9d, %r8d
+ andl $(CACHELINE_SIZE - 1), %r8d
+ je L(loop_4x_vec_backward_pre)
+# if CACHELINE_SIZE == (VEC_SIZE * 4)
+ /* Cacheline misaligned by VEC_SIZE, 2 * VEC_SIZE, or
+ 3 * VEC_SIZE. */
+ cmpl $(VEC_SIZE * 2), %r8d
+ je L(misaligned_by_2x_vec_backward)
+ jb L(misaligned_by_3x_vec_backward)
+# elif CACHELINE_SIZE != (VEC_SIZE * 2)
+# error Unsupported CACHELINE_SIZE!
+# endif
+ /* Cacheline misaligned by VEC_SIZE. */
+ VMOVU (%rcx), %VEC(0)
+ subq $VEC_SIZE, %rcx
+ subq $VEC_SIZE, %rdx
+ VMOVA %VEC(0), (%r9)
+ subq $VEC_SIZE, %r9
+#endif
+L(loop_4x_vec_backward_pre):
#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
/* Check non-temporal store threshold. */
cmpq __x86_shared_non_temporal_threshold(%rip), %rdx
@@ -458,6 +526,32 @@ L(loop_4x_vec_backward):
VZEROUPPER
ret
+#if CACHELINE_SIZE == (VEC_SIZE * 4)
+L(misaligned_by_2x_vec_backward):
+ /* Cacheline misaligned by 2 * VEC_SIZE. */
+ VMOVU (%rcx), %VEC(0)
+ VMOVU -VEC_SIZE(%rcx), %VEC(1)
+ subq $(VEC_SIZE * 2), %rcx
+ subq $(VEC_SIZE * 2), %rdx
+ VMOVA %VEC(0), (%r9)
+ VMOVA %VEC(1), -VEC_SIZE(%r9)
+ subq $(VEC_SIZE * 2), %r9
+ jmp L(loop_4x_vec_backward_pre)
+
+L(misaligned_by_3x_vec_backward):
+ /* Cacheline misaligned by 3 * VEC_SIZE. */
+ VMOVU (%rcx), %VEC(0)
+ VMOVU -VEC_SIZE(%rcx), %VEC(1)
+ VMOVU -(VEC_SIZE * 2)(%rcx), %VEC(2)
+ subq $(VEC_SIZE * 3), %rcx
+ subq $(VEC_SIZE * 3), %rdx
+ VMOVA %VEC(0), (%r9)
+ VMOVA %VEC(1), -VEC_SIZE(%r9)
+ VMOVA %VEC(2), -(VEC_SIZE * 2)(%r9)
+ subq $(VEC_SIZE * 3), %r9
+ jmp L(loop_4x_vec_backward_pre)
+#endif
+
#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
L(large_forward):
/* Don't use non-temporal store if there is overlap between
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=8dd19b0b3ca334060eec990f0afa502700939ad3
commit 8dd19b0b3ca334060eec990f0afa502700939ad3
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Apr 24 09:55:40 2016 -0700
Use PREFETCHED_LOAD_SIZE in loop_4x_vec_xxx
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index b5ffa49..0a2bf41 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -387,14 +387,14 @@ L(loop_4x_vec_forward):
VMOVU VEC_SIZE(%rsi), %VEC(1)
VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2)
VMOVU (VEC_SIZE * 3)(%rsi), %VEC(3)
- addq $(VEC_SIZE * 4), %rsi
- subq $(VEC_SIZE * 4), %rdx
+ addq $(PREFETCHED_LOAD_SIZE), %rsi
+ subq $(PREFETCHED_LOAD_SIZE), %rdx
VMOVA %VEC(0), (%rdi)
VMOVA %VEC(1), VEC_SIZE(%rdi)
VMOVA %VEC(2), (VEC_SIZE * 2)(%rdi)
VMOVA %VEC(3), (VEC_SIZE * 3)(%rdi)
- addq $(VEC_SIZE * 4), %rdi
- cmpq $(VEC_SIZE * 4), %rdx
+ addq $(PREFETCHED_LOAD_SIZE), %rdi
+ cmpq $(PREFETCHED_LOAD_SIZE), %rdx
ja L(loop_4x_vec_forward)
/* Store the last 4 * VEC. */
VMOVU %VEC(5), (%rcx)
@@ -439,14 +439,14 @@ L(loop_4x_vec_backward):
VMOVU -VEC_SIZE(%rcx), %VEC(1)
VMOVU -(VEC_SIZE * 2)(%rcx), %VEC(2)
VMOVU -(VEC_SIZE * 3)(%rcx), %VEC(3)
- subq $(VEC_SIZE * 4), %rcx
- subq $(VEC_SIZE * 4), %rdx
+ subq $(PREFETCHED_LOAD_SIZE), %rcx
+ subq $(PREFETCHED_LOAD_SIZE), %rdx
VMOVA %VEC(0), (%r9)
VMOVA %VEC(1), -VEC_SIZE(%r9)
VMOVA %VEC(2), -(VEC_SIZE * 2)(%r9)
VMOVA %VEC(3), -(VEC_SIZE * 3)(%r9)
- subq $(VEC_SIZE * 4), %r9
- cmpq $(VEC_SIZE * 4), %rdx
+ subq $(PREFETCHED_LOAD_SIZE), %r9
+ cmpq $(PREFETCHED_LOAD_SIZE), %rdx
ja L(loop_4x_vec_backward)
/* Store the first 4 * VEC. */
VMOVU %VEC(4), (%rdi)
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=13fd5ab017a053bbcfc7b73bd18d3b58c9078b8f
commit 13fd5ab017a053bbcfc7b73bd18d3b58c9078b8f
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Apr 24 09:43:36 2016 -0700
Rename PREFETCH_SIZE to CACHELINE_SIZE
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index 346d7a4..b5ffa49 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -71,33 +71,33 @@
# define PREFETCH(addr) prefetcht0 addr
#endif
-/* Assume 64-byte prefetch size. */
-#ifndef PREFETCH_SIZE
-# define PREFETCH_SIZE 64
+/* Assume 64-byte cacheline/prefetch size. */
+#ifndef CACHELINE_SIZE
+# define CACHELINE_SIZE 64
#endif
#define PREFETCHED_LOAD_SIZE (VEC_SIZE * 4)
-#if PREFETCH_SIZE == 64
-# if PREFETCHED_LOAD_SIZE == PREFETCH_SIZE
+#if CACHELINE_SIZE == 64
+# if PREFETCHED_LOAD_SIZE == CACHELINE_SIZE
# define PREFETCH_ONE_SET(dir, base, offset) \
PREFETCH ((offset)base)
-# elif PREFETCHED_LOAD_SIZE == 2 * PREFETCH_SIZE
+# elif PREFETCHED_LOAD_SIZE == 2 * CACHELINE_SIZE
# define PREFETCH_ONE_SET(dir, base, offset) \
PREFETCH ((offset)base); \
- PREFETCH ((offset + dir * PREFETCH_SIZE)base)
-# elif PREFETCHED_LOAD_SIZE == 4 * PREFETCH_SIZE
+ PREFETCH ((offset + dir * CACHELINE_SIZE)base)
+# elif PREFETCHED_LOAD_SIZE == 4 * CACHELINE_SIZE
# define PREFETCH_ONE_SET(dir, base, offset) \
PREFETCH ((offset)base); \
- PREFETCH ((offset + dir * PREFETCH_SIZE)base); \
- PREFETCH ((offset + dir * PREFETCH_SIZE)base); \
- PREFETCH ((offset + dir * PREFETCH_SIZE * 2)base); \
- PREFETCH ((offset + dir * PREFETCH_SIZE * 3)base)
+ PREFETCH ((offset + dir * CACHELINE_SIZE)base); \
+ PREFETCH ((offset + dir * CACHELINE_SIZE)base); \
+ PREFETCH ((offset + dir * CACHELINE_SIZE * 2)base); \
+ PREFETCH ((offset + dir * CACHELINE_SIZE * 3)base)
# else
# error Unsupported PREFETCHED_LOAD_SIZE!
# endif
#else
-# error Unsupported PREFETCH_SIZE!
+# error Unsupported CACHELINE_SIZE!
#endif
#ifndef SECTION
-----------------------------------------------------------------------
hooks/post-receive
--
GNU C Library master sources