Sourceware Bugzilla – Attachment 5264 Details for
Bug 12518
memcpy acts randomly (and differently) with overlapping areas
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
Example patch to give the basic idea
patch.diff (text/plain), 3.78 KB, created by
Linus Torvalds
on 2011-02-25 02:26:20 UTC
(
hide
)
Description:
Example patch to give the basic idea
Filename:
MIME Type:
Creator:
Linus Torvalds
Created:
2011-02-25 02:26:20 UTC
Size:
3.78 KB
patch
obsolete
> sysdeps/x86_64/multiarch/memcpy-ssse3.S | 48 +++++++++++++++--------------- > sysdeps/x86_64/multiarch/memmove-ssse3.S | 8 ++-- > 2 files changed, 28 insertions(+), 28 deletions(-) > >diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S >index 9a878d3..747d2ef 100644 >--- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S >+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S >@@ -61,18 +61,15 @@ ENTRY (MEMCPY) > #ifdef USE_AS_MEMPCPY > add %rdx, %rax > #endif >- >-#ifdef USE_AS_MEMMOVE >- cmp %rsi, %rdi >- jb L(copy_forward) >- je L(write_0bytes) >- cmp $79, %rdx >- jbe L(copy_forward) >- jmp L(copy_backward) >-L(copy_forward): >-#endif >- cmp $79, %rdx >- lea L(table_less_80bytes)(%rip), %r11 >+ /* >+ * The small cases we can do without checking for any >+ * overlap at all, since we do them as all loads followed >+ * by all stores. >+ * >+ * So just jump through the less-than-80bytes table. >+ */ >+ cmp $79,%rdx >+ lea L(table_less_80bytes)(%rip), %r11 > ja L(80bytesormore) > movslq (%r11, %rdx, 4), %r9 > add %rdx, %rsi >@@ -81,13 +78,24 @@ L(copy_forward): > jmp *%r9 > ud2 > >- ALIGN (4) >+ /* >+ * For the 80+ byte cases we need to check overlap >+ */ > L(80bytesormore): >-#ifndef USE_AS_MEMMOVE >+ lea (%rsi,%rdx),%r9 >+ lea (%rdi,%rdx),%r11 >+ cmp %rdi,%r9 /* dest start >= source end */ >+ jae L(nonoverlap) /* -> nonoverlapping */ >+ cmp %rsi,%r11 /* source start >= destination end */ >+ jae L(nonoverlap) /* -> nonoverlapping */ >+ cmp %rsi, %rdi /* overlap: */ >+ jb L(copy_forward) /* source < dest: forward copy */ >+ je L(write_0bytes) /* source == dest: no copy */ >+ jmp L(copy_backward) /* source > dest: backward copy */ >+L(nonoverlap): > cmp %dil, %sil > jle L(copy_backward) >-#endif >- >+L(copy_forward): > movdqu (%rsi), %xmm0 > mov %rdi, %rcx > and $-16, %rdi >@@ -2805,7 +2813,6 @@ L(large_page_fwd): > movntdq %xmm1, (%rdi) > lea 16(%rdi), %rdi > lea -0x90(%rdx), %rdx >-#ifdef USE_AS_MEMMOVE > mov %rsi, %r9 > sub %rdi, %r9 > cmp %rdx, %r9 >@@ -2814,7 +2821,6 @@ L(large_page_fwd): > cmp %rcx, %rdx > jb L(ll_cache_copy_fwd_start) > L(memmove_is_memcpy_fwd): >-#endif > L(large_page_loop): > movdqu (%rsi), %xmm0 > movdqu 0x10(%rsi), %xmm1 >@@ -2859,7 +2865,6 @@ L(large_page_less_64bytes): > sfence > BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) > >-#ifdef USE_AS_MEMMOVE > ALIGN (4) > L(ll_cache_copy_fwd_start): > prefetcht0 0x1c0(%rsi) >@@ -2906,7 +2911,6 @@ L(large_page_ll_less_fwd_64bytes): > add %rdx, %rdi > BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) > >-#endif > ALIGN (4) > L(large_page_bwd): > movdqu -0x10(%rsi), %xmm1 >@@ -2915,7 +2919,6 @@ L(large_page_bwd): > movdqa %xmm1, -0x10(%rdi) > lea -16(%rdi), %rdi > lea -0x90(%rdx), %rdx >-#ifdef USE_AS_MEMMOVE > mov %rdi, %r9 > sub %rsi, %r9 > cmp %rdx, %r9 >@@ -2923,7 +2926,6 @@ L(large_page_bwd): > cmp %rcx, %r9 > jb L(ll_cache_copy_bwd_start) > L(memmove_is_memcpy_bwd): >-#endif > L(large_page_bwd_loop): > movdqu -0x10(%rsi), %xmm0 > movdqu -0x20(%rsi), %xmm1 >@@ -2966,7 +2968,6 @@ L(large_page_less_bwd_64bytes): > sfence > BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) > >-#ifdef USE_AS_MEMMOVE > ALIGN (4) > L(ll_cache_copy_bwd_start): > prefetcht0 -0x1c0(%rsi) >@@ -3010,7 +3011,6 @@ L(ll_cache_copy_bwd_start): > sub $0x40, %rdx > L(large_page_ll_less_bwd_64bytes): > BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) >-#endif > > END (MEMCPY) > >diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3.S b/sysdeps/x86_64/multiarch/memmove-ssse3.S >index 295430b..a38d445 100644 >--- a/sysdeps/x86_64/multiarch/memmove-ssse3.S >+++ b/sysdeps/x86_64/multiarch/memmove-ssse3.S >@@ -1,4 +1,4 @@ >-#define USE_AS_MEMMOVE >-#define MEMCPY __memmove_ssse3 >-#define MEMCPY_CHK __memmove_chk_ssse3 >-#include "memcpy-ssse3.S" >+#include <sysdep.h> >+ >+strong_alias(__memmove_ssse3, __memcpy_ssse3); >+strong_alias(__memmove_chk_ssse3, __memcpy_chk_ssse3);
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 12518
:
5264
|
5323
|
5341
|
5660