This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch hjl/pr19776/master created. glibc-2.23-73-g94a97fa
- From: hjl at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 7 Mar 2016 16:05:34 -0000
- Subject: GNU C Library master sources branch hjl/pr19776/master created. glibc-2.23-73-g94a97fa
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, hjl/pr19776/master has been created
at 94a97fa99c9fc6bb1a579aabbde4509b9cb746b8 (commit)
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=94a97fa99c9fc6bb1a579aabbde4509b9cb746b8
commit 94a97fa99c9fc6bb1a579aabbde4509b9cb746b8
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Mon Mar 7 05:47:26 2016 -0800
Enable __memcpy_chk_sse2_unaligned
Check Fast_Unaligned_Load for __memcpy_chk_sse2_unaligned
* sysdeps/x86_64/multiarch/memcpy_chk.S (__memcpy_chk): Check
Fast_Unaligned_Load to enable __memcpy_chk_sse2_unaligned.
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S
index 648217e..c009211 100644
--- a/sysdeps/x86_64/multiarch/memcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/memcpy_chk.S
@@ -32,22 +32,25 @@ ENTRY(__memcpy_chk)
LOAD_RTLD_GLOBAL_RO_RDX
#ifdef HAVE_AVX512_ASM_SUPPORT
HAS_ARCH_FEATURE (AVX512F_Usable)
- jz 1f
+ jz 1f
HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
- jz 1f
- leaq __memcpy_chk_avx512_no_vzeroupper(%rip), %rax
+ jz 1f
+ lea __memcpy_chk_avx512_no_vzeroupper(%rip), %RAX_LP
ret
#endif
-1: leaq __memcpy_chk_sse2(%rip), %rax
+1: lea __memcpy_chk_avx_unaligned(%rip), %RAX_LP
+ HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
+ jnz 2f
+ lea __memcpy_chk_sse2_unaligned(%rip), %RAX_LP
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+ jnz 2f
+ lea __memcpy_chk_sse2(%rip), %RAX_LP
HAS_CPU_FEATURE (SSSE3)
jz 2f
- leaq __memcpy_chk_ssse3(%rip), %rax
+ lea __memcpy_chk_ssse3_back(%rip), %RAX_LP
HAS_ARCH_FEATURE (Fast_Copy_Backward)
- jz 2f
- leaq __memcpy_chk_ssse3_back(%rip), %rax
- HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- jz 2f
- leaq __memcpy_chk_avx_unaligned(%rip), %rax
+ jnz 2f
+ lea __memcpy_chk_ssse3(%rip), %RAX_LP
2: ret
END(__memcpy_chk)
# else
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=164b82bb51a39bdfb21478b4ca28909fb265bdd1
commit 164b82bb51a39bdfb21478b4ca28909fb265bdd1
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Mon Mar 7 05:44:58 2016 -0800
Enable __mempcpy_chk_sse2_unaligned
Check Fast_Unaligned_Load for __mempcpy_chk_sse2_unaligned
* sysdeps/x86_64/multiarch/mempcpy_chk.S (__mempcpy_chk): Check
Fast_Unaligned_Load to enable __mempcpy_chk_sse2_unaligned.
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S
index 6e8a89d..bec37bc 100644
--- a/sysdeps/x86_64/multiarch/mempcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S
@@ -35,19 +35,22 @@ ENTRY(__mempcpy_chk)
jz 1f
HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
jz 1f
- leaq __mempcpy_chk_avx512_no_vzeroupper(%rip), %rax
+ lea __mempcpy_chk_avx512_no_vzeroupper(%rip), %RAX_LP
ret
#endif
-1: leaq __mempcpy_chk_sse2(%rip), %rax
+1: lea __mempcpy_chk_avx_unaligned(%rip), %RAX_LP
+ HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
+ jnz 2f
+ lea __mempcpy_chk_sse2_unaligned(%rip), %RAX_LP
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+ jnz 2f
+ lea __mempcpy_chk_sse2(%rip), %RAX_LP
HAS_CPU_FEATURE (SSSE3)
jz 2f
- leaq __mempcpy_chk_ssse3(%rip), %rax
+ lea __mempcpy_chk_ssse3_back(%rip), %RAX_LP
HAS_ARCH_FEATURE (Fast_Copy_Backward)
- jz 2f
- leaq __mempcpy_chk_ssse3_back(%rip), %rax
- HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- jz 2f
- leaq __mempcpy_chk_avx_unaligned(%rip), %rax
+ jnz 2f
+ lea __mempcpy_chk_ssse3(%rip), %RAX_LP
2: ret
END(__mempcpy_chk)
# else
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=681c975ae1db45f1f184e52172bb5288f84a184d
commit 681c975ae1db45f1f184e52172bb5288f84a184d
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Mon Mar 7 05:42:46 2016 -0800
Enable __mempcpy_sse2_unaligned
Check Fast_Unaligned_Load for __mempcpy_sse2_unaligned.
* sysdeps/x86_64/multiarch/mempcpy.S (__mempcpy): Check
Fast_Unaligned_Load to enable __mempcpy_sse2_unaligned.
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index ed78623..1314d76 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -33,19 +33,22 @@ ENTRY(__mempcpy)
jz 1f
HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
jz 1f
- leaq __mempcpy_avx512_no_vzeroupper(%rip), %rax
+ lea __mempcpy_avx512_no_vzeroupper(%rip), %RAX_LP
ret
#endif
-1: leaq __mempcpy_sse2(%rip), %rax
+1: lea __mempcpy_avx_unaligned(%rip), %RAX_LP
+ HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
+ jnz 2f
+ lea __mempcpy_sse2_unaligned(%rip), %RAX_LP
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+ jnz 2f
+ lea __mempcpy_sse2(%rip), %RAX_LP
HAS_CPU_FEATURE (SSSE3)
jz 2f
- leaq __mempcpy_ssse3(%rip), %rax
+ lea __mempcpy_ssse3_back(%rip), %RAX_LP
HAS_ARCH_FEATURE (Fast_Copy_Backward)
- jz 2f
- leaq __mempcpy_ssse3_back(%rip), %rax
- HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- jz 2f
- leaq __mempcpy_avx_unaligned(%rip), %rax
+ jnz 2f
+ lea __mempcpy_ssse3(%rip), %RAX_LP
2: ret
END(__mempcpy)
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=9452ba101b95ec3a20074b1cc8fd2d84b6e0e859
commit 9452ba101b95ec3a20074b1cc8fd2d84b6e0e859
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Mar 6 17:06:41 2016 -0800
Add __mempcpy_sse2_unaligned and _chk functions
Add __mempcpy_chk_sse2_unaligned, __mempcpy_sse2_unaligned and
__memcpy_chk_sse2_unaligned.
* sysdeps/x86_64/multiarch/ifunc-impl-list.c
(__libc_ifunc_impl_list): Test __memcpy_chk_sse2_unaligned,
__mempcpy_chk_sse2_unaligned and __mempcpy_sse2_unaligned.
* sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
(__mempcpy_chk_sse2_unaligned): New.
(__mempcpy_sse2_unaligned): Likewise.
(__memcpy_chk_sse2_unaligned): Likewise.
(L(start): New label.
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 188b6d3..47ca468 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -278,6 +278,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
HAS_CPU_FEATURE (SSSE3),
__memcpy_chk_ssse3)
IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
+ __memcpy_chk_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
__memcpy_chk_sse2))
/* Support sysdeps/x86_64/multiarch/memcpy.S. */
@@ -314,6 +316,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
HAS_CPU_FEATURE (SSSE3),
__mempcpy_chk_ssse3)
IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
+ __mempcpy_chk_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
__mempcpy_chk_sse2))
/* Support sysdeps/x86_64/multiarch/mempcpy.S. */
@@ -330,6 +334,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__mempcpy_ssse3_back)
IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
__mempcpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, mempcpy, 1,
+ __mempcpy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2))
/* Support sysdeps/x86_64/multiarch/strncmp.S. */
diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
index 335a498..947c50f 100644
--- a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
@@ -22,9 +22,29 @@
#include "asm-syntax.h"
+# ifdef SHARED
+ENTRY (__mempcpy_chk_sse2_unaligned)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__mempcpy_chk_sse2_unaligned)
+# endif
+
+ENTRY (__mempcpy_sse2_unaligned)
+ mov %rdi, %rax
+ add %rdx, %rax
+ jmp L(start)
+END (__mempcpy_sse2_unaligned)
+
+# ifdef SHARED
+ENTRY (__memcpy_chk_sse2_unaligned)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__memcpy_chk_sse2_unaligned)
+# endif
ENTRY(__memcpy_sse2_unaligned)
movq %rdi, %rax
+L(start):
testq %rdx, %rdx
je L(return)
cmpq $16, %rdx
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=b812ca4864339d011edde57a86b27b80748bbfda
commit b812ca4864339d011edde57a86b27b80748bbfda
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Mar 6 16:52:53 2016 -0800
Remove L(overlapping) from memcpy-sse2-unaligned.S
Since memcpy doesn't need to check overlapping source and destination,
we can remove L(overlapping).
[BZ #19776]
* sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
(L(overlapping)): Removed.
diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
index 19d8aa6..335a498 100644
--- a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
@@ -25,12 +25,8 @@
ENTRY(__memcpy_sse2_unaligned)
movq %rdi, %rax
- movq %rsi, %r11
- leaq (%rdx,%rdx), %rcx
- subq %rdi, %r11
- subq %rdx, %r11
- cmpq %rcx, %r11
- jb L(overlapping)
+ testq %rdx, %rdx
+ je L(return)
cmpq $16, %rdx
jbe L(less_16)
movdqu (%rsi), %xmm8
@@ -89,45 +85,6 @@ L(loop):
cmpq %rcx, %rdx
jne L(loop)
ret
-L(overlapping):
- testq %rdx, %rdx
- .p2align 4,,5
- je L(return)
- movq %rdx, %r9
- leaq 16(%rsi), %rcx
- leaq 16(%rdi), %r8
- shrq $4, %r9
- movq %r9, %r11
- salq $4, %r11
- cmpq %rcx, %rdi
- setae %cl
- cmpq %r8, %rsi
- setae %r8b
- orl %r8d, %ecx
- cmpq $15, %rdx
- seta %r8b
- testb %r8b, %cl
- je .L21
- testq %r11, %r11
- je .L21
- xorl %ecx, %ecx
- xorl %r8d, %r8d
-.L7:
- movdqu (%rsi,%rcx), %xmm8
- addq $1, %r8
- movdqu %xmm8, (%rdi,%rcx)
- addq $16, %rcx
- cmpq %r8, %r9
- ja .L7
- cmpq %r11, %rdx
- je L(return)
-.L21:
- movzbl (%rsi,%r11), %ecx
- movb %cl, (%rdi,%r11)
- addq $1, %r11
- cmpq %r11, %rdx
- ja .L21
- ret
L(less_16):
testb $24, %dl
jne L(between_9_16)
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=b74a495c76c05e9685482322a6a30d12a98a8a44
commit b74a495c76c05e9685482322a6a30d12a98a8a44
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Mar 6 13:46:54 2016 -0800
Don't use RAX as scratch register
To prepare sharing code with mempcpy, don't use RAX as scratch register
so that RAX can be set to the return value at entrance.
[BZ #19776]
* sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S: Don't use
RAX as scratch register.
diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
index 7207753..19d8aa6 100644
--- a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
@@ -24,11 +24,12 @@
ENTRY(__memcpy_sse2_unaligned)
- movq %rsi, %rax
+ movq %rdi, %rax
+ movq %rsi, %r11
leaq (%rdx,%rdx), %rcx
- subq %rdi, %rax
- subq %rdx, %rax
- cmpq %rcx, %rax
+ subq %rdi, %r11
+ subq %rdx, %r11
+ cmpq %rcx, %r11
jb L(overlapping)
cmpq $16, %rdx
jbe L(less_16)
@@ -39,7 +40,6 @@ ENTRY(__memcpy_sse2_unaligned)
movdqu %xmm8, -16(%rdi,%rdx)
ja .L31
L(return):
- movq %rdi, %rax
ret
.p2align 4,,10
.p2align 4
@@ -64,16 +64,16 @@ L(return):
addq %rdi, %rdx
andq $-64, %rdx
andq $-64, %rcx
- movq %rcx, %rax
- subq %rdi, %rax
- addq %rax, %rsi
+ movq %rcx, %r11
+ subq %rdi, %r11
+ addq %r11, %rsi
cmpq %rdx, %rcx
je L(return)
movq %rsi, %r10
subq %rcx, %r10
leaq 16(%r10), %r9
leaq 32(%r10), %r8
- leaq 48(%r10), %rax
+ leaq 48(%r10), %r11
.p2align 4,,10
.p2align 4
L(loop):
@@ -83,12 +83,12 @@ L(loop):
movdqa %xmm8, 16(%rcx)
movdqu (%rcx,%r8), %xmm8
movdqa %xmm8, 32(%rcx)
- movdqu (%rcx,%rax), %xmm8
+ movdqu (%rcx,%r11), %xmm8
movdqa %xmm8, 48(%rcx)
addq $64, %rcx
cmpq %rcx, %rdx
jne L(loop)
- jmp L(return)
+ ret
L(overlapping):
testq %rdx, %rdx
.p2align 4,,5
@@ -97,8 +97,8 @@ L(overlapping):
leaq 16(%rsi), %rcx
leaq 16(%rdi), %r8
shrq $4, %r9
- movq %r9, %rax
- salq $4, %rax
+ movq %r9, %r11
+ salq $4, %r11
cmpq %rcx, %rdi
setae %cl
cmpq %r8, %rsi
@@ -107,9 +107,9 @@ L(overlapping):
cmpq $15, %rdx
seta %r8b
testb %r8b, %cl
- je .L16
- testq %rax, %rax
- je .L16
+ je .L21
+ testq %r11, %r11
+ je .L21
xorl %ecx, %ecx
xorl %r8d, %r8d
.L7:
@@ -119,15 +119,15 @@ L(overlapping):
addq $16, %rcx
cmpq %r8, %r9
ja .L7
- cmpq %rax, %rdx
+ cmpq %r11, %rdx
je L(return)
.L21:
- movzbl (%rsi,%rax), %ecx
- movb %cl, (%rdi,%rax)
- addq $1, %rax
- cmpq %rax, %rdx
+ movzbl (%rsi,%r11), %ecx
+ movb %cl, (%rdi,%r11)
+ addq $1, %r11
+ cmpq %r11, %rdx
ja .L21
- jmp L(return)
+ ret
L(less_16):
testb $24, %dl
jne L(between_9_16)
@@ -137,28 +137,25 @@ L(less_16):
testq %rdx, %rdx
.p2align 4,,2
je L(return)
- movzbl (%rsi), %eax
+ movzbl (%rsi), %ecx
testb $2, %dl
- movb %al, (%rdi)
+ movb %cl, (%rdi)
je L(return)
- movzwl -2(%rsi,%rdx), %eax
- movw %ax, -2(%rdi,%rdx)
- jmp L(return)
+ movzwl -2(%rsi,%rdx), %ecx
+ movw %cx, -2(%rdi,%rdx)
+ ret
L(between_9_16):
- movq (%rsi), %rax
- movq %rax, (%rdi)
- movq -8(%rsi,%rdx), %rax
- movq %rax, -8(%rdi,%rdx)
- jmp L(return)
-.L16:
- xorl %eax, %eax
- jmp .L21
+ movq (%rsi), %rcx
+ movq %rcx, (%rdi)
+ movq -8(%rsi,%rdx), %rcx
+ movq %rcx, -8(%rdi,%rdx)
+ ret
L(between_5_8):
- movl (%rsi), %eax
- movl %eax, (%rdi)
- movl -4(%rsi,%rdx), %eax
- movl %eax, -4(%rdi,%rdx)
- jmp L(return)
+ movl (%rsi), %ecx
+ movl %ecx, (%rdi)
+ movl -4(%rsi,%rdx), %ecx
+ movl %ecx, -4(%rdi,%rdx)
+ ret
END(__memcpy_sse2_unaligned)
#endif
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=ab3ae148820f5023254a4979d6eef60537f0bb03
commit ab3ae148820f5023254a4979d6eef60537f0bb03
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Mar 6 14:16:32 2016 -0800
Remove dead code from memcpy-sse2-unaligned.S
There are
ENTRY(__memcpy_sse2_unaligned)
movq %rsi, %rax
leaq (%rdx,%rdx), %rcx
subq %rdi, %rax
subq %rdx, %rax
cmpq %rcx, %rax
jb L(overlapping)
When branch is taken,
cmpq %rsi, %rdi
jae .L3
will never be taken. We can remove the dead code.
[BZ #19776]
* sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S (.L3) Removed.
diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
index c450983..7207753 100644
--- a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
@@ -90,8 +90,6 @@ L(loop):
jne L(loop)
jmp L(return)
L(overlapping):
- cmpq %rsi, %rdi
- jae .L3
testq %rdx, %rdx
.p2align 4,,5
je L(return)
@@ -146,15 +144,6 @@ L(less_16):
movzwl -2(%rsi,%rdx), %eax
movw %ax, -2(%rdi,%rdx)
jmp L(return)
-.L3:
- leaq -1(%rdx), %rax
- .p2align 4,,10
- .p2align 4
-.L11:
- movzbl (%rsi,%rax), %edx
- movb %dl, (%rdi,%rax)
- subq $1, %rax
- jmp .L11
L(between_9_16):
movq (%rsi), %rax
movq %rax, (%rdi)
-----------------------------------------------------------------------
hooks/post-receive
--
GNU C Library master sources