This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch ldmitrie/intel_mpx created. glibc-2.18-87-g1aef5b2
- From: ldmitrie at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 4 Sep 2013 14:13:21 -0000
- Subject: GNU C Library master sources branch ldmitrie/intel_mpx created. glibc-2.18-87-g1aef5b2
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, ldmitrie/intel_mpx has been created
at 1aef5b2564676933dbc4eafa0c35c00792e47595 (commit)
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=1aef5b2564676933dbc4eafa0c35c00792e47595
commit 1aef5b2564676933dbc4eafa0c35c00792e47595
Author: Liubov Dmitrieva <ldmitrie@sourceware.org>
Date: Fri Aug 30 18:37:28 2013 +0400
Implemented bound check support for string/memory routines for x86_64.
TODO: Fix bound check support in strcmp-sse2 and implement in strspn, strstr and strcspn.
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 08db331..db6838d 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -18,6 +18,9 @@ endif
ifeq ($(subdir),string)
sysdep_routines += cacheinfo strcasecmp_l-nonascii strncase_l-nonascii
gen-as-const-headers += locale-defines.sym
+ifeq ($(enable-mpx), yes)
+sysdep_routines += strcpy_chk-c stpcpy_chk-c
+endif
endif
ifeq ($(subdir),elf)
diff --git a/sysdeps/x86_64/Versions b/sysdeps/x86_64/Versions
index a437f85..083770a 100644
--- a/sysdeps/x86_64/Versions
+++ b/sysdeps/x86_64/Versions
@@ -2,6 +2,13 @@ libc {
GLIBC_2.14 {
memcpy;
}
+%ifdef __CHKP__
+ GLIBC_2.17 {
+ mpx_memset_nobnd;
+ mpx_memset_nochk;
+ mpx_memset_nobnd_nochk;
+ }
+%endif
}
libm {
GLIBC_2.1 {
diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
index 891ee70..205345b 100644
--- a/sysdeps/x86_64/memchr.S
+++ b/sysdeps/x86_64/memchr.S
@@ -20,8 +20,17 @@
/* fast SSE2 version with using pmaxub and 64 byte loop */
+# ifdef __CHKP__
+# define RETURN \
+ bndcu (%rax), %bnd0; \
+ ret
+# else
+# define RETURN ret
+# endif
+
.text
ENTRY(memchr)
+
movd %rsi, %xmm1
mov %rdi, %rcx
@@ -33,6 +42,10 @@ ENTRY(memchr)
and $63, %rcx
pshufd $0, %xmm1, %xmm1
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
cmp $48, %rcx
ja L(crosscache)
@@ -72,7 +85,7 @@ L(crosscache):
jbe L(return_null)
add %rdi, %rax
add %rcx, %rax
- ret
+ RETURN
.p2align 4
L(unaligned_no_match):
@@ -85,24 +98,36 @@ L(unaligned_no_match):
.p2align 4
L(loop_prolog):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
+#ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+#endif
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
+#ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+#endif
movdqa 48(%rdi), %xmm4
pcmpeqb %xmm1, %xmm4
add $64, %rdi
@@ -116,24 +141,36 @@ L(loop_prolog):
sub $64, %rdx
jbe L(exit_loop)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
+#ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+#endif
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
+#ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+#endif
movdqa 48(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
@@ -151,6 +188,9 @@ L(loop_prolog):
L(align64_loop):
sub $64, %rdx
jbe L(exit_loop)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
movdqa 16(%rdi), %xmm2
movdqa 32(%rdi), %xmm3
@@ -192,25 +232,34 @@ L(align64_loop):
pmovmskb %xmm1, %eax
bsf %eax, %eax
lea 48(%rdi, %rax), %rax
- ret
+ RETURN
.p2align 4
L(exit_loop):
add $32, %rdx
jle L(exit_loop_32)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
+#ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+#endif
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
@@ -219,6 +268,9 @@ L(exit_loop):
sub $16, %rdx
jle L(return_null)
+#ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+#endif
pcmpeqb 48(%rdi), %xmm1
pmovmskb %xmm1, %eax
test %eax, %eax
@@ -229,6 +281,9 @@ L(exit_loop):
.p2align 4
L(exit_loop_32):
add $32, %rdx
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
@@ -237,6 +292,9 @@ L(exit_loop_32):
sub $16, %rdx
jbe L(return_null)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
pcmpeqb 16(%rdi), %xmm1
pmovmskb %xmm1, %eax
test %eax, %eax
@@ -248,25 +306,25 @@ L(exit_loop_32):
L(matches0):
bsf %eax, %eax
lea -16(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(matches):
bsf %eax, %eax
add %rdi, %rax
- ret
+ RETURN
.p2align 4
L(matches16):
bsf %eax, %eax
lea 16(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(matches32):
bsf %eax, %eax
lea 32(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(matches_1):
@@ -274,7 +332,7 @@ L(matches_1):
sub %rax, %rdx
jbe L(return_null)
add %rdi, %rax
- ret
+ RETURN
.p2align 4
L(matches16_1):
@@ -282,7 +340,7 @@ L(matches16_1):
sub %rax, %rdx
jbe L(return_null)
lea 16(%rdi, %rax), %rax
- ret
+ RETURN
.p2align 4
L(matches32_1):
@@ -290,7 +348,7 @@ L(matches32_1):
sub %rax, %rdx
jbe L(return_null)
lea 32(%rdi, %rax), %rax
- ret
+ RETURN
.p2align 4
L(matches48_1):
@@ -298,7 +356,7 @@ L(matches48_1):
sub %rax, %rdx
jbe L(return_null)
lea 48(%rdi, %rax), %rax
- ret
+ RETURN
.p2align 4
L(return_null):
diff --git a/sysdeps/x86_64/memcmp.S b/sysdeps/x86_64/memcmp.S
index d5c072c..77a7bca 100644
--- a/sysdeps/x86_64/memcmp.S
+++ b/sysdeps/x86_64/memcmp.S
@@ -23,6 +23,11 @@
ENTRY (memcmp)
test %rdx, %rdx
jz L(finz)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+#endif
+ pxor %xmm0, %xmm0
cmpq $1, %rdx
jle L(finr1b)
subq %rdi, %rsi
@@ -86,6 +91,10 @@ L(s16b):
.p2align 4,, 4
L(finr1b):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+#endif
movzbl (%rdi), %eax
movzbl (%rsi), %edx
L(finz1):
@@ -132,6 +141,10 @@ L(gt32):
andq $15, %r8
jz L(16am)
/* Both pointers may be misaligned. */
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi), %xmm1
movdqu (%rdi, %rsi), %xmm0
pcmpeqb %xmm0, %xmm1
@@ -146,6 +159,10 @@ L(16am):
jz L(ATR)
testq $16, %rdi
jz L(A32)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi, %rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -160,6 +177,10 @@ L(A32):
/* Pre-unroll to be ready for unrolled 64B loop. */
testq $32, %rdi
jz L(A64)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -167,6 +188,10 @@ L(A32):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -181,6 +206,10 @@ L(A64):
jge L(mt32)
L(A64main):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -188,6 +217,10 @@ L(A64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -195,6 +228,10 @@ L(A64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -202,6 +239,10 @@ L(A64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -219,6 +260,10 @@ L(mt32):
jge L(mt16)
L(A32main):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -226,6 +271,10 @@ L(A32main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -258,6 +307,10 @@ L(ATR):
testq $16, %rdi
jz L(ATR32)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -273,6 +326,10 @@ L(ATR32):
testq $32, %rdi
jz L(ATR64)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -280,6 +337,10 @@ L(ATR32):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -292,6 +353,10 @@ L(ATR64):
je L(mt32)
L(ATR64main):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -299,6 +364,10 @@ L(ATR64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -306,6 +375,10 @@ L(ATR64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -313,6 +386,10 @@ L(ATR64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -328,6 +405,10 @@ L(ATR64main):
jge L(mt16)
L(ATR32res):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -335,6 +416,10 @@ L(ATR32res):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
diff --git a/sysdeps/x86_64/memrchr.S b/sysdeps/x86_64/memrchr.S
index 5a659fe..3afa97c 100644
--- a/sysdeps/x86_64/memrchr.S
+++ b/sysdeps/x86_64/memrchr.S
@@ -27,6 +27,11 @@ ENTRY (memrchr)
sub $16, %rdx
jbe L(length_less16)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rdx), %bnd0
+#endif
+
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
@@ -284,6 +289,10 @@ L(length_less16_offset0):
test %edx, %edx
jz L(return_null)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rdx), %bnd0
+#endif
mov %dl, %cl
pcmpeqb (%rdi), %xmm1
@@ -314,6 +323,10 @@ L(length_less16):
and $15, %rcx
jz L(length_less16_offset0)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rdx), %bnd0
+#endif
mov %rdi, %rcx
and $15, %rcx
mov %cl, %dh
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index 6c69f4b..4e1bb84 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -26,6 +26,15 @@
.text
#if !defined NOT_IN_libc
ENTRY(__bzero)
+ testq %rsi, %rsi
+ jz L(only_return)
+
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rsi), %bnd0
+# endif
+
+ mov %rdi, %rax
movq %rdi, %rax /* Set return value. */
movq %rsi, %rdx /* Set n. */
pxor %xmm8, %xmm8
@@ -53,7 +62,20 @@ ENTRY_CHK (__memset_chk)
END_CHK (__memset_chk)
#endif
+#ifdef __CHKP__
+ENTRY (mpx_memset_nochk)
+ jmp L(entry_from_mpx_memset_nochk)
+END (mpx_memset_nochk)
+#endif
+
ENTRY (memset)
+ testq %rdx, %rdx
+ jz L(only_return)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rdx), %bnd0
+L(entry_from_mpx_memset_nochk):
+#endif
movd %esi, %xmm8
movq %rdi, %rax
punpcklbw %xmm8, %xmm8
@@ -71,6 +93,9 @@ L(entry_from_bzero):
L(return):
rep
ret
+L(only_return):
+ movq %rdi, %rax
+ ret
ALIGN (4)
L(between_32_64_bytes):
movdqu %xmm8, 16(%rdi)
@@ -129,6 +154,11 @@ L(between8_16bytes):
END (memset)
libc_hidden_builtin_def (memset)
+#ifdef __CHKP__
+weak_alias (memset, mpx_memset_nobnd)
+weak_alias (mpx_memset_nochk, mpx_memset_nobnd_nochk)
+#endif
+
#if defined PIC && !defined NOT_IN_libc && !defined USE_MULTIARCH
strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
.section .gnu.warning.__memset_zero_constant_len_parameter
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 203d16e..490950e 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -26,6 +26,15 @@ CFLAGS-strstr.c += -msse4
CFLAGS-strcasestr.c += -msse4
CFLAGS-strcasestr-nonascii.c += -msse4
endif
+
+ifeq ($(enable-mpx), yes)
+sysdep_routines += memcpy-ssse3-back-1 mempcpy-ssse3-back-1 memmove-ssse3-back-1 \
+ memcpy-c memmove-c mempcpy-c memcpy_chk-c mempcpy_chk-c memmove_chk-c
+ASFLAGS-memcpy-ssse3-back-1.S += -fno-mpx
+ASFLAGS-mempcpy-ssse3-back-1.S += -fno-mpx
+ASFLAGS-memmove-ssse3-back-1.S += -fno-mpx
+endif
+
endif
ifeq ($(subdir),wcsmbs)
diff --git a/sysdeps/x86_64/multiarch/Versions b/sysdeps/x86_64/multiarch/Versions
index 59b185a..fa1cf0a 100644
--- a/sysdeps/x86_64/multiarch/Versions
+++ b/sysdeps/x86_64/multiarch/Versions
@@ -2,4 +2,17 @@ libc {
GLIBC_PRIVATE {
__get_cpu_features;
}
+%ifdef __CHKP__
+ GLIBC_2.17 {
+ mpx_memcpy_nobnd;
+ mpx_memmove_nobnd;
+ mpx_mempcpy_nobnd;
+ mpx_memcpy_nobnd_nochk;
+ mpx_memmove_nobnd_nochk;
+ mpx_mempcpy_nobnd_nochk;
+ mpx_memcpy_nochk;
+ mpx_memmove_nochk;
+ mpx_mempcpy_nochk;
+ }
+%endif
}
diff --git a/sysdeps/x86_64/multiarch/bcopy.S b/sysdeps/x86_64/multiarch/bcopy.S
index 639f02b..9809d47 100644
--- a/sysdeps/x86_64/multiarch/bcopy.S
+++ b/sysdeps/x86_64/multiarch/bcopy.S
@@ -3,5 +3,10 @@
.text
ENTRY(bcopy)
xchg %rdi, %rsi
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
jmp __libc_memmove /* Branch to IFUNC memmove. */
END(bcopy)
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index d0992e1..e3a4163 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -44,6 +44,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memcmp, HAS_SSSE3, __memcmp_ssse3)
IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_sse2))
+#ifndef __CHKP__
+ /* We use specific version for MPX glibc */
/* Support sysdeps/x86_64/multiarch/memmove_chk.S. */
IFUNC_IMPL (i, name, __memmove_chk,
IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
@@ -60,6 +62,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
__memmove_ssse3)
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2))
+#endif
/* Support sysdeps/x86_64/multiarch/stpncpy.S. */
IFUNC_IMPL (i, name, stpncpy,
@@ -207,6 +210,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_sse2))
#ifdef SHARED
+#ifndef __CHKP__
+ /* We use specific version of memcpy, memcpy_chk, mempcpy if Intel MPX is enabled. */
/* Support sysdeps/x86_64/multiarch/memcpy_chk.S. */
IFUNC_IMPL (i, name, __memcpy_chk,
IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
@@ -240,6 +245,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
__mempcpy_ssse3)
IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2))
+#endif
/* Support sysdeps/x86_64/multiarch/strncmp.S. */
IFUNC_IMPL (i, name, strncmp,
diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S
index 1ed4200..b5c6675 100644
--- a/sysdeps/x86_64/multiarch/memcmp-sse4.S
+++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S
@@ -48,6 +48,13 @@ ENTRY (MEMCMP)
# ifdef USE_AS_WMEMCMP
shl $2, %rdx
# endif
+# ifdef __CHKP__
+ testq %rdx, %rdx
+ jz L(NoEntryCheck)
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+L(NoEntryCheck):
+# endif
pxor %xmm0, %xmm0
cmp $79, %rdx
ja L(79bytesormore)
@@ -70,6 +77,10 @@ L(firstbyte):
ALIGN (4)
L(79bytesormore):
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rsi), %xmm1
movdqu (%rdi), %xmm2
pxor %xmm1, %xmm2
@@ -90,21 +101,37 @@ L(79bytesormore):
L(less128bytes):
sub $64, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -112,11 +139,19 @@ L(less128bytes):
cmp $32, %rdx
jb L(less32bytesin64)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqu 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqu 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -139,41 +174,73 @@ L(128bytesormore):
L(less256bytes):
sub $128, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(64bytesin256)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqu 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqu 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(96bytesin256)
+# ifdef __CHKP__
+ bndcu 96(%rdi), %bnd0
+ bndcu 96(%rsi), %bnd1
+# endif
movdqu 96(%rdi), %xmm2
pxor 96(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(112bytesin256)
+# ifdef __CHKP__
+ bndcu 112(%rdi), %bnd0
+ bndcu 112(%rsi), %bnd1
+# endif
movdqu 112(%rdi), %xmm2
pxor 112(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -188,11 +255,19 @@ L(less256bytes):
cmp $32, %rdx
jb L(less32bytesin128)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -207,81 +282,145 @@ L(less32bytesin128):
L(less512bytes):
sub $256, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(64bytesin256)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqu 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqu 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(96bytesin256)
+# ifdef __CHKP__
+ bndcu 96(%rdi), %bnd0
+ bndcu 96(%rsi), %bnd1
+# endif
movdqu 96(%rdi), %xmm2
pxor 96(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(112bytesin256)
+# ifdef __CHKP__
+ bndcu 112(%rdi), %bnd0
+ bndcu 112(%rsi), %bnd1
+# endif
movdqu 112(%rdi), %xmm2
pxor 112(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(128bytesin256)
+# ifdef __CHKP__
+ bndcu 128(%rdi), %bnd0
+ bndcu 128(%rsi), %bnd1
+# endif
movdqu 128(%rdi), %xmm2
pxor 128(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(144bytesin256)
+# ifdef __CHKP__
+ bndcu 144(%rdi), %bnd0
+ bndcu 144(%rsi), %bnd1
+# endif
movdqu 144(%rdi), %xmm2
pxor 144(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(160bytesin256)
+# ifdef __CHKP__
+ bndcu 160(%rdi), %bnd0
+ bndcu 160(%rsi), %bnd1
+# endif
movdqu 160(%rdi), %xmm2
pxor 160(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(176bytesin256)
+# ifdef __CHKP__
+ bndcu 176(%rdi), %bnd0
+ bndcu 176(%rsi), %bnd1
+# endif
movdqu 176(%rdi), %xmm2
pxor 176(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(192bytesin256)
+# ifdef __CHKP__
+ bndcu 192(%rdi), %bnd0
+ bndcu 192(%rsi), %bnd1
+# endif
movdqu 192(%rdi), %xmm2
pxor 192(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(208bytesin256)
+# ifdef __CHKP__
+ bndcu 208(%rdi), %bnd0
+ bndcu 208(%rsi), %bnd1
+# endif
movdqu 208(%rdi), %xmm2
pxor 208(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(224bytesin256)
+# ifdef __CHKP__
+ bndcu 224(%rdi), %bnd0
+ bndcu 224(%rsi), %bnd1
+# endif
movdqu 224(%rdi), %xmm2
pxor 224(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(240bytesin256)
+# ifdef __CHKP__
+ bndcu 240(%rdi), %bnd0
+ bndcu 240(%rsi), %bnd1
+# endif
movdqu 240(%rdi), %xmm2
pxor 240(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -299,11 +438,19 @@ L(less512bytes):
cmp $32, %rdx
jb L(less32bytesin256)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -331,18 +478,34 @@ L(512bytesormore):
sub $64, %rdx
ALIGN (4)
L(64bytesormore_loop):
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
movdqa %xmm2, %xmm1
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm3
pxor 16(%rsi), %xmm3
por %xmm3, %xmm1
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm4
pxor 32(%rsi), %xmm4
por %xmm4, %xmm1
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm5
pxor 48(%rsi), %xmm5
por %xmm5, %xmm1
@@ -365,18 +528,34 @@ L(L2_L3_cache_unaglined):
L(L2_L3_unaligned_128bytes_loop):
prefetchnta 0x1c0(%rdi)
prefetchnta 0x1c0(%rsi)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
movdqa %xmm2, %xmm1
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm3
pxor 16(%rsi), %xmm3
por %xmm3, %xmm1
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm4
pxor 32(%rsi), %xmm4
por %xmm4, %xmm1
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm5
pxor 48(%rsi), %xmm5
por %xmm5, %xmm1
@@ -403,21 +582,37 @@ L(2aligned):
L(less128bytesin2aligned):
sub $64, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -425,11 +620,19 @@ L(less128bytesin2aligned):
cmp $32, %rdx
jb L(less32bytesin64in2alinged)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqa 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqa 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -453,41 +656,73 @@ L(128bytesormorein2aligned):
L(less256bytesin2alinged):
sub $128, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(64bytesin256)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqa 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqa 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(96bytesin256)
+# ifdef __CHKP__
+ bndcu 96(%rdi), %bnd0
+ bndcu 96(%rsi), %bnd1
+# endif
movdqa 96(%rdi), %xmm2
pxor 96(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(112bytesin256)
+# ifdef __CHKP__
+ bndcu 112(%rdi), %bnd0
+ bndcu 112(%rsi), %bnd1
+# endif
movdqa 112(%rdi), %xmm2
pxor 112(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -502,11 +737,19 @@ L(less256bytesin2alinged):
cmp $32, %rdx
jb L(less32bytesin128in2aligned)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -523,81 +766,145 @@ L(less32bytesin128in2aligned):
L(256bytesormorein2aligned):
sub $256, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(64bytesin256)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqa 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqa 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(96bytesin256)
+# ifdef __CHKP__
+ bndcu 96(%rdi), %bnd0
+ bndcu 96(%rsi), %bnd1
+# endif
movdqa 96(%rdi), %xmm2
pxor 96(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(112bytesin256)
+# ifdef __CHKP__
+ bndcu 112(%rdi), %bnd0
+ bndcu 112(%rsi), %bnd1
+# endif
movdqa 112(%rdi), %xmm2
pxor 112(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(128bytesin256)
+# ifdef __CHKP__
+ bndcu 128(%rdi), %bnd0
+ bndcu 128(%rsi), %bnd1
+# endif
movdqa 128(%rdi), %xmm2
pxor 128(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(144bytesin256)
+# ifdef __CHKP__
+ bndcu 144(%rdi), %bnd0
+ bndcu 144(%rsi), %bnd1
+# endif
movdqa 144(%rdi), %xmm2
pxor 144(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(160bytesin256)
+# ifdef __CHKP__
+ bndcu 160(%rdi), %bnd0
+ bndcu 160(%rsi), %bnd1
+# endif
movdqa 160(%rdi), %xmm2
pxor 160(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(176bytesin256)
+# ifdef __CHKP__
+ bndcu 176(%rdi), %bnd0
+ bndcu 176(%rsi), %bnd1
+# endif
movdqa 176(%rdi), %xmm2
pxor 176(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(192bytesin256)
+# ifdef __CHKP__
+ bndcu 192(%rdi), %bnd0
+ bndcu 192(%rsi), %bnd1
+# endif
movdqa 192(%rdi), %xmm2
pxor 192(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(208bytesin256)
+# ifdef __CHKP__
+ bndcu 208(%rdi), %bnd0
+ bndcu 208(%rsi), %bnd1
+# endif
movdqa 208(%rdi), %xmm2
pxor 208(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(224bytesin256)
+# ifdef __CHKP__
+ bndcu 224(%rdi), %bnd0
+ bndcu 224(%rsi), %bnd1
+# endif
movdqa 224(%rdi), %xmm2
pxor 224(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(240bytesin256)
+# ifdef __CHKP__
+ bndcu 240(%rdi), %bnd0
+ bndcu 240(%rsi), %bnd1
+# endif
movdqa 240(%rdi), %xmm2
pxor 240(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -648,18 +955,34 @@ L(512bytesormorein2aligned):
sub $64, %rdx
ALIGN (4)
L(64bytesormore_loopin2aligned):
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
movdqa %xmm2, %xmm1
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm3
pxor 16(%rsi), %xmm3
por %xmm3, %xmm1
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm4
pxor 32(%rsi), %xmm4
por %xmm4, %xmm1
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm5
pxor 48(%rsi), %xmm5
por %xmm5, %xmm1
@@ -682,18 +1005,34 @@ L(L2_L3_cache_aglined):
L(L2_L3_aligned_128bytes_loop):
prefetchnta 0x1c0(%rdi)
prefetchnta 0x1c0(%rsi)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
movdqa %xmm2, %xmm1
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm3
pxor 16(%rsi), %xmm3
por %xmm3, %xmm1
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm4
pxor 32(%rsi), %xmm4
por %xmm4, %xmm1
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm5
pxor 48(%rsi), %xmm5
por %xmm5, %xmm1
diff --git a/sysdeps/x86_64/multiarch/memcpy-c.c b/sysdeps/x86_64/multiarch/memcpy-c.c
new file mode 100644
index 0000000..7076d4a
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy-c.c
@@ -0,0 +1,70 @@
+/* C-version of memcpy for using when Intel MPX is enabled
+ in order to prosess with a buffer of pointers correctly.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef __CHKP__
+
+# include <stddef.h>
+
+void *
+__memcpy (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ return ret;
+}
+
+weak_alias (__memcpy, __GI_memcpy)
+
+# if defined SHARED && !defined NOT_IN_libc && !defined IA32
+# include <shlib-compat.h>
+versioned_symbol (libc, __memcpy, memcpy, GLIBC_2_14);
+# else
+weak_alias (__memcpy, memcpy)
+# endif
+
+weak_alias (__memcpy, mpx_memcpy_nochk)
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S
new file mode 100644
index 0000000..e0c179a
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S
@@ -0,0 +1,4 @@
+#ifdef __CHKP__
+# define MEMCPY mpx_memcpy_nobnd_nochk
+# include "memcpy-ssse3-back.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
index fc9fcef..5731b9d 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
@@ -27,7 +27,11 @@
#include "asm-syntax.h"
#ifndef MEMCPY
-# define MEMCPY __memcpy_ssse3_back
+# ifdef __CHKP__
+# define MEMCPY mpx_memcpy_nobnd
+# else
+# define MEMCPY __memcpy_ssse3_back
+# endif
# define MEMCPY_CHK __memcpy_chk_ssse3_back
#endif
@@ -48,7 +52,7 @@
ud2
.section .text.ssse3,"ax",@progbits
-#if !defined USE_AS_BCOPY
+#if !defined USE_AS_BCOPY && defined MEMCPY_CHK
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
@@ -56,6 +60,15 @@ END (MEMCPY_CHK)
#endif
ENTRY (MEMCPY)
+#ifdef __CHKP__
+ testq %rdx, %rdx
+ jz L(NoEntryCheck)
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rdx), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu -1(%rsi, %rdx), %bnd1
+#endif
+
mov %rdi, %rax
#ifdef USE_AS_MEMPCPY
add %rdx, %rax
@@ -87,6 +100,15 @@ L(bk_write):
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
#endif
+#ifdef __CHKP__
+L(NoEntryCheck):
+ mov %rdi, %rax
+# ifdef USE_AS_MEMPCPY
+ add %rdx, %rax
+# endif
+ ret
+#endif
+
ALIGN (4)
L(144bytesormore):
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index a1e5031..34987b8 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -18,14 +18,15 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <shlib-compat.h>
-#include <init-arch.h>
+#ifndef __CHKP__
+# include <sysdep.h>
+# include <shlib-compat.h>
+# include <init-arch.h>
/* Define multiple versions only for the definition in lib and for
DSO. In static binaries we need memcpy before the initialization
happened. */
-#if defined SHARED && !defined NOT_IN_libc
+# if defined SHARED && !defined NOT_IN_libc
.text
ENTRY(__new_memcpy)
.type __new_memcpy, @gnu_indirect_function
@@ -43,37 +44,39 @@ ENTRY(__new_memcpy)
3: ret
END(__new_memcpy)
-# undef ENTRY
-# define ENTRY(name) \
+# undef ENTRY
+# define ENTRY(name) \
.type __memcpy_sse2, @function; \
.globl __memcpy_sse2; \
.hidden __memcpy_sse2; \
.p2align 4; \
__memcpy_sse2: cfi_startproc; \
CALL_MCOUNT
-# undef END
-# define END(name) \
+# undef END
+# define END(name) \
cfi_endproc; .size __memcpy_sse2, .-__memcpy_sse2
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
.type __memcpy_chk_sse2, @function; \
.globl __memcpy_chk_sse2; \
.p2align 4; \
__memcpy_chk_sse2: cfi_startproc; \
CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
+# undef END_CHK
+# define END_CHK(name) \
cfi_endproc; .size __memcpy_chk_sse2, .-__memcpy_chk_sse2
-# undef libc_hidden_builtin_def
+# undef libc_hidden_builtin_def
/* It doesn't make sense to send libc-internal memcpy calls through a PLT.
The speedup we get from using SSSE3 instruction is likely eaten away
by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
+# define libc_hidden_builtin_def(name) \
.globl __GI_memcpy; __GI_memcpy = __memcpy_sse2
versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
-#endif
+# endif
+
+# include "../memcpy.S"
-#include "../memcpy.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk-c.c b/sysdeps/x86_64/multiarch/memcpy_chk-c.c
new file mode 100644
index 0000000..3bca281
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy_chk-c.c
@@ -0,0 +1,3 @@
+#ifdef __CHKP__
+# include <debug/memcpy_chk.c>
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S
index ad01d8c..5b03f20 100644
--- a/sysdeps/x86_64/multiarch/memcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/memcpy_chk.S
@@ -18,14 +18,15 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <init-arch.h>
+#ifndef __CHKP__
+# include <sysdep.h>
+# include <init-arch.h>
/* Define multiple versions only for the definition in lib and for
DSO. There are no multiarch memcpy functions for static binaries.
*/
-#ifndef NOT_IN_libc
-# ifdef SHARED
+# ifndef NOT_IN_libc
+# ifdef SHARED
.text
ENTRY(__memcpy_chk)
.type __memcpy_chk, @gnu_indirect_function
@@ -41,7 +42,8 @@ ENTRY(__memcpy_chk)
leaq __memcpy_chk_ssse3_back(%rip), %rax
2: ret
END(__memcpy_chk)
-# else
-# include "../memcpy_chk.S"
+# else
+# include "../memcpy_chk.S"
+# endif
# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-c.c b/sysdeps/x86_64/multiarch/memmove-c.c
new file mode 100644
index 0000000..63d779e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-c.c
@@ -0,0 +1,108 @@
+/* C-version of memmove for using when Intel MPX is enabled
+ in order to prosess with a buffer of pointers correctly.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef __CHKP__
+
+# include <stddef.h>
+
+void *
+__memmove (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ if (s < d)
+ {
+ /* backward copying */
+ d += n;
+ s += n;
+ while (n--)
+ *--d = *--s;
+ }
+ else
+ /* forward copying */
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (s < d)
+ {
+ offset_src = (offset_src + (size_t)src) & (sizeof(size_t) - 1);
+ /* backward copying */
+ d += n;
+ s += n;
+ while (n-- && offset_src--)
+ *--d = *--s;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *--d1 = *--s1;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *--d = *--s;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ /* forward copying */
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ }
+ return ret;
+}
+
+weak_alias (__memmove, __libc_memmove)
+weak_alias (__memmove, __GI_memmove)
+weak_alias (__memmove, memmove)
+
+# if defined SHARED && !defined NOT_IN_libc
+# include <shlib-compat.h>
+# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
+compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
+# endif
+# endif
+
+weak_alias (__memmove, mpx_memmove_nochk)
+#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S b/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S
new file mode 100644
index 0000000..45a8209
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S
@@ -0,0 +1,5 @@
+#ifdef __CHKP__
+# define USE_AS_MEMMOVE
+# define MEMCPY mpx_memmove_nobnd_nochk
+# include "memcpy-ssse3-back.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3-back.S b/sysdeps/x86_64/multiarch/memmove-ssse3-back.S
index f9a4e9a..53e90e7 100644
--- a/sysdeps/x86_64/multiarch/memmove-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memmove-ssse3-back.S
@@ -1,4 +1,10 @@
#define USE_AS_MEMMOVE
-#define MEMCPY __memmove_ssse3_back
+#ifdef __CHKP__
+/* version of memmove with no copying of bounds support
+ if there are pointers in the source buffer. */
+# define MEMCPY mpx_memmove_nobnd
+# else
+# define MEMCPY __memmove_ssse3_back
+#endif
#define MEMCPY_CHK __memmove_chk_ssse3_back
#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c
index 8149c48..0d2c6f0 100644
--- a/sysdeps/x86_64/multiarch/memmove.c
+++ b/sysdeps/x86_64/multiarch/memmove.c
@@ -17,31 +17,32 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#ifndef NOT_IN_libc
-# define MEMMOVE __memmove_sse2
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name) \
+#ifndef __CHKP__
+# ifndef NOT_IN_libc
+# define MEMMOVE __memmove_sse2
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name) \
__hidden_ver1 (__memmove_sse2, __GI_memmove, __memmove_sse2);
-# endif
+# endif
/* Redefine memmove so that the compiler won't complain about the type
mismatch with the IFUNC selector in strong_alias, below. */
-# undef memmove
-# define memmove __redirect_memmove
-# include <string.h>
-# undef memmove
+# undef memmove
+# define memmove __redirect_memmove
+# include <string.h>
+# undef memmove
extern __typeof (__redirect_memmove) __memmove_sse2 attribute_hidden;
extern __typeof (__redirect_memmove) __memmove_ssse3 attribute_hidden;
extern __typeof (__redirect_memmove) __memmove_ssse3_back attribute_hidden;
-#endif
+# endif
-#include "string/memmove.c"
+# include "string/memmove.c"
-#ifndef NOT_IN_libc
-# include <shlib-compat.h>
-# include "init-arch.h"
+# ifndef NOT_IN_libc
+# include <shlib-compat.h>
+# include "init-arch.h"
/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
ifunc symbol properly. */
@@ -54,7 +55,8 @@ libc_ifunc (__libc_memmove,
strong_alias (__libc_memmove, memmove)
-# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
+# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
+# endif
# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/memmove_chk-c.c b/sysdeps/x86_64/multiarch/memmove_chk-c.c
new file mode 100644
index 0000000..bbf53d0
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove_chk-c.c
@@ -0,0 +1 @@
+#include <debug/memmove_chk.c>
diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c
index 17ed460..c1b0b93 100644
--- a/sysdeps/x86_64/multiarch/memmove_chk.c
+++ b/sysdeps/x86_64/multiarch/memmove_chk.c
@@ -17,19 +17,21 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <string.h>
-#include "init-arch.h"
+#ifndef __CHKP__
+# include <string.h>
+# include "init-arch.h"
-#define MEMMOVE_CHK __memmove_chk_sse2
+# define MEMMOVE_CHK __memmove_chk_sse2
extern __typeof (__memmove_chk) __memmove_chk_sse2 attribute_hidden;
extern __typeof (__memmove_chk) __memmove_chk_ssse3 attribute_hidden;
extern __typeof (__memmove_chk) __memmove_chk_ssse3_back attribute_hidden;
-#include "debug/memmove_chk.c"
+# include "debug/memmove_chk.c"
libc_ifunc (__memmove_chk,
HAS_SSSE3
? (HAS_FAST_COPY_BACKWARD
? __memmove_chk_ssse3_back : __memmove_chk_ssse3)
: __memmove_chk_sse2);
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy-c.c b/sysdeps/x86_64/multiarch/mempcpy-c.c
new file mode 100644
index 0000000..b9fcb11
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy-c.c
@@ -0,0 +1,64 @@
+/* C-version of mempcpy for using when Intel MPX is enabled
+ in order to process with an array of pointers correctly.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef __CHKP__
+
+# include <stddef.h>
+
+void *
+mempcpy (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst + n;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ return ret;
+}
+
+weak_alias (mempcpy, __GI_mempcpy)
+weak_alias (mempcpy, __GI___mempcpy)
+weak_alias (mempcpy, __mempcpy)
+weak_alias (mempcpy, mpx_mempcpy_nochk)
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S
new file mode 100644
index 0000000..8fa99b5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S
@@ -0,0 +1,6 @@
+#ifdef __CHKP__
+# define USE_AS_MEMPCPY
+/* the version of mempcpy without ant checks or copying bounds. */
+# define MEMCPY mpx_mempcpy_nobnd_nochk
+# include "memcpy-ssse3-back.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
index 82ffacb..2aa5313 100644
--- a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
@@ -1,4 +1,12 @@
#define USE_AS_MEMPCPY
-#define MEMCPY __mempcpy_ssse3_back
-#define MEMCPY_CHK __mempcpy_chk_ssse3_back
+
+#ifdef __CHKP__
+/* version of mempcpy with no copying of bounds support
+ if there are pointers in the source buffer. */
+# define MEMCPY mpx_mempcpy_nobnd
+#else
+# define MEMCPY __mempcpy_ssse3_back
+#endif
+
+#define MEMCPY_CHK __mempcpy_chk_ssse3_back
#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index b8b7fcd..b4bfbdc 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -18,13 +18,14 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <init-arch.h>
+#ifndef __CHKP__
+# include <sysdep.h>
+# include <init-arch.h>
/* Define multiple versions only for the definition in lib and for
DSO. In static binaries we need mempcpy before the initialization
happened. */
-#if defined SHARED && !defined NOT_IN_libc
+# if defined SHARED && !defined NOT_IN_libc
ENTRY(__mempcpy)
.type __mempcpy, @gnu_indirect_function
cmpl $0, KIND_OFFSET+__cpu_features(%rip)
@@ -40,38 +41,40 @@ ENTRY(__mempcpy)
2: ret
END(__mempcpy)
-# undef ENTRY
-# define ENTRY(name) \
+# undef ENTRY
+# define ENTRY(name) \
.type __mempcpy_sse2, @function; \
.p2align 4; \
.globl __mempcpy_sse2; \
.hidden __mempcpy_sse2; \
__mempcpy_sse2: cfi_startproc; \
CALL_MCOUNT
-# undef END
-# define END(name) \
+# undef END
+# define END(name) \
cfi_endproc; .size __mempcpy_sse2, .-__mempcpy_sse2
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
.type __mempcpy_chk_sse2, @function; \
.globl __mempcpy_chk_sse2; \
.p2align 4; \
__mempcpy_chk_sse2: cfi_startproc; \
CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
+# undef END_CHK
+# define END_CHK(name) \
cfi_endproc; .size __mempcpy_chk_sse2, .-__mempcpy_chk_sse2
-# undef libc_hidden_def
-# undef libc_hidden_builtin_def
+# undef libc_hidden_def
+# undef libc_hidden_builtin_def
/* It doesn't make sense to send libc-internal mempcpy calls through a PLT.
The speedup we get from using SSSE3 instruction is likely eaten away
by the indirect call in the PLT. */
-# define libc_hidden_def(name) \
+# define libc_hidden_def(name) \
.globl __GI_mempcpy; __GI_mempcpy = __mempcpy_sse2
-# define libc_hidden_builtin_def(name) \
+# define libc_hidden_builtin_def(name) \
.globl __GI___mempcpy; __GI___mempcpy = __mempcpy_sse2
-#endif
+# endif
+
+# include "../mempcpy.S"
-#include "../mempcpy.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk-c.c b/sysdeps/x86_64/multiarch/mempcpy_chk-c.c
new file mode 100644
index 0000000..40ae725
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk-c.c
@@ -0,0 +1,3 @@
+#ifdef __CHKP__
+# include <debug/mempcpy_chk.c>
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S
index 3801db3..10653c5 100644
--- a/sysdeps/x86_64/multiarch/mempcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S
@@ -18,14 +18,15 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <init-arch.h>
+#ifndef __CHKP__
+# include <sysdep.h>
+# include <init-arch.h>
/* Define multiple versions only for the definition in lib and for
DSO. There are no multiarch mempcpy functions for static binaries.
*/
-#ifndef NOT_IN_libc
-# ifdef SHARED
+# ifndef NOT_IN_libc
+# ifdef SHARED
.text
ENTRY(__mempcpy_chk)
.type __mempcpy_chk, @gnu_indirect_function
@@ -41,7 +42,8 @@ ENTRY(__mempcpy_chk)
leaq __mempcpy_chk_ssse3_back(%rip), %rax
2: ret
END(__mempcpy_chk)
-# else
-# include "../mempcpy_chk.S"
+# else
+# include "../mempcpy_chk.S"
+# endif
# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
index 028c6d3..a3535ad 100644
--- a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
@@ -25,6 +25,14 @@
# define STRCAT __strcat_sse2_unaligned
# endif
+# ifdef __CHKP__
+# define RETURN \
+ bndcu -1(%rdi, %rax), %bnd0; \
+ ret
+# else
+# define RETURN ret
+# endif
+
# define USE_AS_STRCAT
.text
@@ -37,6 +45,10 @@ ENTRY (STRCAT)
/* Inline corresponding strlen file, temporary until new strcpy
implementation gets merged. */
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+# endif
xor %rax, %rax
mov %edi, %ecx
and $0x3f, %ecx
@@ -67,84 +79,132 @@ L(align16_start):
pxor %xmm1, %xmm1
pxor %xmm2, %xmm2
pxor %xmm3, %xmm3
+# ifdef __CHKP__
+ bndcu 16(%rax), %bnd0
+# endif
pcmpeqb 16(%rax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%rax), %bnd0
+# endif
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%rax), %bnd0
+# endif
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit64)
+# ifdef __CHKP__
+ bndcu 80(%rax), %bnd0
+# endif
pcmpeqb 80(%rax), %xmm0
add $64, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%rax), %bnd0
+# endif
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%rax), %bnd0
+# endif
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit64)
+# ifdef __CHKP__
+ bndcu 80(%rax), %bnd0
+# endif
pcmpeqb 80(%rax), %xmm0
add $64, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%rax), %bnd0
+# endif
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%rax), %bnd0
+# endif
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit64)
+# ifdef __CHKP__
+ bndcu 80(%rax), %bnd0
+# endif
pcmpeqb 80(%rax), %xmm0
add $64, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%rax), %bnd0
+# endif
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%rax), %bnd0
+# endif
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -153,6 +213,9 @@ L(align16_start):
test $0x3f, %rax
jz L(align64_loop)
+# ifdef __CHKP__
+ bndcu 80(%rax), %bnd0
+# endif
pcmpeqb 80(%rax), %xmm0
add $80, %rax
pmovmskb %xmm0, %edx
@@ -162,6 +225,9 @@ L(align16_start):
test $0x3f, %rax
jz L(align64_loop)
+# ifdef __CHKP__
+ bndcu 16(%rax), %bnd0
+# endif
pcmpeqb 16(%rax), %xmm1
add $16, %rax
pmovmskb %xmm1, %edx
@@ -171,6 +237,9 @@ L(align16_start):
test $0x3f, %rax
jz L(align64_loop)
+# ifdef __CHKP__
+ bndcu 16(%rax), %bnd0
+# endif
pcmpeqb 16(%rax), %xmm2
add $16, %rax
pmovmskb %xmm2, %edx
@@ -180,6 +249,9 @@ L(align16_start):
test $0x3f, %rax
jz L(align64_loop)
+# ifdef __CHKP__
+ bndcu 16(%rax), %bnd0
+# endif
pcmpeqb 16(%rax), %xmm3
add $16, %rax
pmovmskb %xmm3, %edx
@@ -187,8 +259,12 @@ L(align16_start):
jnz L(exit)
add $16, %rax
+
.p2align 4
L(align64_loop):
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
movaps (%rax), %xmm4
pminub 16(%rax), %xmm4
movaps 32(%rax), %xmm5
diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S
index f170238..4311e86 100644
--- a/sysdeps/x86_64/multiarch/strchr.S
+++ b/sysdeps/x86_64/multiarch/strchr.S
@@ -91,6 +91,10 @@ __strchr_sse42:
CALL_MCOUNT
testb %sil, %sil
je __strend_sse4
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+# endif
pxor %xmm2, %xmm2
movd %esi, %xmm1
movl %edi, %ecx
@@ -124,6 +128,9 @@ __strchr_sse42:
ja L(return_null)
L(unaligned_match):
addq %rdi, %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
ret
.p2align 4
@@ -135,15 +142,27 @@ L(unaligned_no_match):
L(loop):
addq $16, %r8
L(aligned_start):
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
pcmpistri $0x2, (%r8), %xmm1
jbe L(wrap)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
pcmpistri $0x2, (%r8), %xmm1
jbe L(wrap)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
pcmpistri $0x2, (%r8), %xmm1
jbe L(wrap)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
pcmpistri $0x2, (%r8), %xmm1
jbe L(wrap)
jmp L(loop)
@@ -159,6 +178,9 @@ L(return_null):
.p2align 4
L(loop_exit):
leaq (%r8,%rcx), %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
ret
cfi_endproc
.size __strchr_sse42, .-__strchr_sse42
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
index c84f1c2..edfa915 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
@@ -127,6 +127,14 @@ STRCMP_SSE42:
je LABEL(Byte0)
mov %rdx, %r11
#endif
+
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+#endif
+
mov %esi, %ecx
mov %edi, %eax
/* Use 64bit AND here to avoid long NOP padding. */
@@ -210,6 +218,10 @@ LABEL(touppermask):
#endif
add $16, %rsi /* prepare to search next 16 bytes */
add $16, %rdi /* prepare to search next 16 bytes */
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+#endif
/*
* Determine source and destination string offsets from 16-byte
@@ -231,6 +243,11 @@ LABEL(crosscache):
mov %edx, %r8d /* r8d is offset flag for exit tail */
xchg %ecx, %eax
xchg %rsi, %rdi
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
LABEL(bigger):
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -280,6 +297,10 @@ LABEL(ashr_0):
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
.p2align 4
LABEL(ashr_0_use):
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rdx), %bnd0
+ bndcu -1(%rsi, %rdx), %bnd1
+#endif
movdqa (%rdi,%rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
@@ -295,6 +316,10 @@ LABEL(ashr_0_use):
jbe LABEL(strcmp_exitz)
#endif
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rdx), %bnd0
+ bndcu -1(%rsi, %rdx), %bnd1
+#endif
movdqa (%rdi,%rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
@@ -320,6 +345,10 @@ LABEL(ashr_0_exit_use):
jbe LABEL(strcmp_exitz)
#endif
lea -16(%rdx, %rcx), %rcx
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rcx), %bnd0
+ bndcu -1(%rsi, %rcx), %bnd1
+#endif
movzbl (%rdi, %rcx), %eax
movzbl (%rsi, %rcx), %edx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
@@ -362,6 +391,15 @@ LABEL(ashr_1):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_1_use)
+LABEL(ashr_1_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_1_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_1_use):
@@ -416,7 +454,11 @@ LABEL(nibble_ashr_1_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $14, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_1_check)
+#else
ja LABEL(nibble_ashr_1_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -450,6 +492,15 @@ LABEL(ashr_2):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_2_use)
+LABEL(ashr_2_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_2_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_2_use):
@@ -504,7 +555,11 @@ LABEL(nibble_ashr_2_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $13, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_2_check)
+#else
ja LABEL(nibble_ashr_2_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -539,6 +594,15 @@ LABEL(ashr_3):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_3_use)
+LABEL(ashr_3_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_3_restart_use)
+#endif
LABEL(loop_ashr_3_use):
add $16, %r10
@@ -592,7 +656,11 @@ LABEL(nibble_ashr_3_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $12, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_3_check)
+#else
ja LABEL(nibble_ashr_3_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -627,6 +695,15 @@ LABEL(ashr_4):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_4_use)
+LABEL(ashr_4_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_4_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_4_use):
@@ -681,7 +758,11 @@ LABEL(nibble_ashr_4_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $11, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_4_check)
+#else
ja LABEL(nibble_ashr_4_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -716,6 +797,15 @@ LABEL(ashr_5):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_5_use)
+LABEL(ashr_5_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_5_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_5_use):
@@ -771,7 +861,11 @@ LABEL(nibble_ashr_5_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $10, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_5_check)
+#else
ja LABEL(nibble_ashr_5_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -806,6 +900,15 @@ LABEL(ashr_6):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_6_use)
+LABEL(ashr_6_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_6_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_6_use):
@@ -860,7 +963,11 @@ LABEL(nibble_ashr_6_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $9, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_6_check)
+#else
ja LABEL(nibble_ashr_6_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -895,6 +1002,15 @@ LABEL(ashr_7):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_7_use)
+LABEL(ashr_7_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_7_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_7_use):
@@ -949,7 +1065,11 @@ LABEL(nibble_ashr_7_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $8, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_7_check)
+#else
ja LABEL(nibble_ashr_7_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -984,6 +1104,15 @@ LABEL(ashr_8):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_8_use)
+LABEL(ashr_8_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_8_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_8_use):
@@ -1038,7 +1167,11 @@ LABEL(nibble_ashr_8_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $7, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_8_check)
+#else
ja LABEL(nibble_ashr_8_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1073,6 +1206,15 @@ LABEL(ashr_9):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_9_use)
+LABEL(ashr_9_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_9_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_9_use):
@@ -1128,7 +1270,11 @@ LABEL(nibble_ashr_9_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $6, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_9_check)
+#else
ja LABEL(nibble_ashr_9_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1163,6 +1309,15 @@ LABEL(ashr_10):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_10_use)
+LABEL(ashr_10_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_10_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_10_use):
@@ -1217,7 +1372,11 @@ LABEL(nibble_ashr_10_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $5, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_10_check)
+#else
ja LABEL(nibble_ashr_10_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1252,6 +1411,15 @@ LABEL(ashr_11):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_11_use)
+LABEL(ashr_11_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_11_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_11_use):
@@ -1306,7 +1474,11 @@ LABEL(nibble_ashr_11_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $4, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_11_check)
+#else
ja LABEL(nibble_ashr_11_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1341,6 +1513,15 @@ LABEL(ashr_12):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_12_use)
+LABEL(ashr_12_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_12_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_12_use):
@@ -1395,7 +1576,11 @@ LABEL(nibble_ashr_12_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $3, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_12_check)
+#else
ja LABEL(nibble_ashr_12_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1431,6 +1616,15 @@ LABEL(ashr_13):
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_13_use)
+LABEL(ashr_13_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_13_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_13_use):
@@ -1485,7 +1679,11 @@ LABEL(nibble_ashr_13_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $2, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_13_check)
+#else
ja LABEL(nibble_ashr_13_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1521,6 +1719,15 @@ LABEL(ashr_14):
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_14_use)
+LABEL(ashr_14_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_14_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_14_use):
@@ -1575,7 +1782,11 @@ LABEL(nibble_ashr_14_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $1, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_14_check)
+#else
ja LABEL(nibble_ashr_14_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1613,6 +1824,15 @@ LABEL(ashr_15):
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_15_use)
+LABEL(ashr_15_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_15_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_15_use):
@@ -1667,7 +1887,11 @@ LABEL(nibble_ashr_15_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $0, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_15_check)
+#else
ja LABEL(nibble_ashr_15_restart_use)
+#endif
LABEL(nibble_ashr_exit_use):
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -1691,6 +1915,11 @@ LABEL(exit_use):
test %r8d, %r8d
jz LABEL(ret_use)
xchg %eax, %edx
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
LABEL(ret_use):
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rcx
@@ -1707,6 +1936,11 @@ LABEL(less32bytes):
test %r8d, %r8d
jz LABEL(ret)
xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
.p2align 4
LABEL(ret):
@@ -1717,6 +1951,10 @@ LABEL(less16bytes):
sub %rdx, %r11
jbe LABEL(strcmp_exitz)
#endif
+#ifdef __CHKP__
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+#endif
movzbl (%rsi, %rdx), %ecx
movzbl (%rdi, %rdx), %eax
diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
index 7710173..e6baee9 100644
--- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
@@ -33,7 +33,7 @@
lea TABLE(%rip), %r11; \
movslq (%r11, INDEX, SCALE), %rcx; \
lea (%r11, %rcx), %rcx; \
- jmp *%rcx
+ jmp *%rcx
# ifndef USE_AS_STRCAT
@@ -51,6 +51,16 @@ ENTRY (STRCPY)
# endif
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
+ bndcu -1(%rdi, %rdx), %bnd0
+# endif
+# endif
+
and $63, %rcx
cmp $32, %rcx
jbe L(SourceStringAlignmentLess32)
@@ -79,6 +89,9 @@ ENTRY (STRCPY)
test %rdx, %rdx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%rsi), %bnd1
+# endif
pcmpeqb 16(%rsi), %xmm0
pmovmskb %xmm0, %rdx
@@ -91,6 +104,9 @@ ENTRY (STRCPY)
jnz L(CopyFrom1To32Bytes)
movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */
+# ifdef __CHKP__
+ bndcu 15(%rdi), %bnd0
+# endif
movdqu %xmm1, (%rdi)
/* If source address alignment != destination address alignment */
@@ -101,6 +117,10 @@ L(Unalign16Both):
add %rcx, %r8
# endif
mov $16, %rcx
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movdqa (%rsi, %rcx), %xmm1
movaps 16(%rsi, %rcx), %xmm2
movdqu %xmm1, (%rdi, %rcx)
@@ -118,6 +138,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm3
movdqu %xmm2, (%rdi, %rcx)
pcmpeqb %xmm3, %xmm0
@@ -134,6 +158,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm4
movdqu %xmm3, (%rdi, %rcx)
pcmpeqb %xmm4, %xmm0
@@ -150,6 +178,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm1
movdqu %xmm4, (%rdi, %rcx)
pcmpeqb %xmm1, %xmm0
@@ -166,6 +198,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm2
movdqu %xmm1, (%rdi, %rcx)
pcmpeqb %xmm2, %xmm0
@@ -182,6 +218,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm3
movdqu %xmm2, (%rdi, %rcx)
pcmpeqb %xmm3, %xmm0
@@ -198,6 +238,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movdqu %xmm3, (%rdi, %rcx)
mov %rsi, %rdx
lea 16(%rsi, %rcx), %rsi
@@ -208,6 +252,9 @@ L(Unalign16Both):
lea 128(%r8, %rdx), %r8
# endif
L(Unaligned64Loop):
+# ifdef __CHKP__
+ bndcu 48(%rsi), %bnd1
+# endif
movaps (%rsi), %xmm2
movaps %xmm2, %xmm4
movaps 16(%rsi), %xmm5
@@ -229,6 +276,10 @@ L(Unaligned64Loop):
L(Unaligned64Loop_start):
add $64, %rdi
add $64, %rsi
+# ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+ bndcu (%rdi), %bnd0
+# endif
movdqu %xmm4, -64(%rdi)
movaps (%rsi), %xmm2
movdqa %xmm2, %xmm4
@@ -271,16 +322,28 @@ L(Unaligned64Leave):
jnz L(CopyFrom1To16BytesUnaligned_32)
bsf %rcx, %rdx
+# ifdef __CHKP__
+ bndcu 47(%rdi), %bnd0
+# endif
movdqu %xmm4, (%rdi)
movdqu %xmm5, 16(%rdi)
movdqu %xmm6, 32(%rdi)
# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
# ifdef USE_AS_STPCPY
+# ifdef __CHKP__
+ bndcu 48(%rdi, %rdx), %bnd0
+# endif
lea 48(%rdi, %rdx), %rax
# endif
+# ifdef __CHKP__
+ bndcu 63(%rdi), %bnd0
+# endif
movdqu %xmm7, 48(%rdi)
add $15, %r8
sub %rdx, %r8
+# ifdef __CHKP__
+ bndcu 49(%rdi, %rdx), %bnd0
+# endif
lea 49(%rdi, %rdx), %rdi
jmp L(StrncpyFillTailWithZero)
# else
@@ -309,6 +372,10 @@ L(SourceStringAlignmentLess32):
test %rdx, %rdx
jnz L(CopyFrom1To16BytesTail1)
+# ifdef __CHKP__
+ bndcu 16(%rsi), %bnd1
+ bndcu 15(%rdi), %bnd0
+# endif
pcmpeqb %xmm2, %xmm0
movdqu %xmm1, (%rdi)
pmovmskb %xmm0, %rdx
@@ -372,6 +439,9 @@ L(CopyFrom1To16BytesUnaligned_0):
# ifdef USE_AS_STPCPY
lea (%rdi, %rdx), %rax
# endif
+# ifdef __CHKP__
+ bndcu 15(%rdi), %bnd0
+# endif
movdqu %xmm4, (%rdi)
add $63, %r8
sub %rdx, %r8
@@ -384,6 +454,9 @@ L(CopyFrom1To16BytesUnaligned_0):
.p2align 4
L(CopyFrom1To16BytesUnaligned_16):
bsf %rcx, %rdx
+# ifdef __CHKP__
+ bndcu 31(%rdi), %bnd0
+# endif
movdqu %xmm4, (%rdi)
# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
# ifdef USE_AS_STPCPY
@@ -403,6 +476,9 @@ L(CopyFrom1To16BytesUnaligned_16):
.p2align 4
L(CopyFrom1To16BytesUnaligned_32):
bsf %rdx, %rdx
+# ifdef __CHKP__
+ bndcu 47(%rdi), %bnd0
+# endif
movdqu %xmm4, (%rdi)
movdqu %xmm5, 16(%rdi)
# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
@@ -529,6 +605,9 @@ L(CopyFrom1To16BytesTail1Case2OrCase3):
.p2align 4
L(Exit1):
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+# endif
mov %dh, (%rdi)
# ifdef USE_AS_STPCPY
lea (%rdi), %rax
@@ -543,6 +622,9 @@ L(Exit1):
.p2align 4
L(Exit2):
mov (%rsi), %dx
+# ifdef __CHKP__
+ bndcu 1(%rdi), %bnd0
+# endif
mov %dx, (%rdi)
# ifdef USE_AS_STPCPY
lea 1(%rdi), %rax
@@ -557,6 +639,9 @@ L(Exit2):
.p2align 4
L(Exit3):
mov (%rsi), %cx
+# ifdef __CHKP__
+ bndcu 2(%rdi), %bnd0
+# endif
mov %cx, (%rdi)
mov %dh, 2(%rdi)
# ifdef USE_AS_STPCPY
@@ -572,6 +657,9 @@ L(Exit3):
.p2align 4
L(Exit4):
mov (%rsi), %edx
+# ifdef __CHKP__
+ bndcu 3(%rdi), %bnd0
+# endif
mov %edx, (%rdi)
# ifdef USE_AS_STPCPY
lea 3(%rdi), %rax
@@ -586,6 +674,9 @@ L(Exit4):
.p2align 4
L(Exit5):
mov (%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 4(%rdi), %bnd0
+# endif
mov %dh, 4(%rdi)
mov %ecx, (%rdi)
# ifdef USE_AS_STPCPY
@@ -602,6 +693,9 @@ L(Exit5):
L(Exit6):
mov (%rsi), %ecx
mov 4(%rsi), %dx
+# ifdef __CHKP__
+ bndcu 5(%rdi), %bnd0
+# endif
mov %ecx, (%rdi)
mov %dx, 4(%rdi)
# ifdef USE_AS_STPCPY
@@ -618,6 +712,9 @@ L(Exit6):
L(Exit7):
mov (%rsi), %ecx
mov 3(%rsi), %edx
+# ifdef __CHKP__
+ bndcu 6(%rdi), %bnd0
+# endif
mov %ecx, (%rdi)
mov %edx, 3(%rdi)
# ifdef USE_AS_STPCPY
@@ -633,6 +730,9 @@ L(Exit7):
.p2align 4
L(Exit8):
mov (%rsi), %rdx
+# ifdef __CHKP__
+ bndcu 7(%rdi), %bnd0
+# endif
mov %rdx, (%rdi)
# ifdef USE_AS_STPCPY
lea 7(%rdi), %rax
@@ -647,6 +747,9 @@ L(Exit8):
.p2align 4
L(Exit9):
mov (%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 8(%rdi), %bnd0
+# endif
mov %dh, 8(%rdi)
mov %rcx, (%rdi)
# ifdef USE_AS_STPCPY
@@ -663,6 +766,9 @@ L(Exit9):
L(Exit10):
mov (%rsi), %rcx
mov 8(%rsi), %dx
+# ifdef __CHKP__
+ bndcu 9(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %dx, 8(%rdi)
# ifdef USE_AS_STPCPY
@@ -679,6 +785,9 @@ L(Exit10):
L(Exit11):
mov (%rsi), %rcx
mov 7(%rsi), %edx
+# ifdef __CHKP__
+ bndcu 10(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %edx, 7(%rdi)
# ifdef USE_AS_STPCPY
@@ -695,6 +804,9 @@ L(Exit11):
L(Exit12):
mov (%rsi), %rcx
mov 8(%rsi), %edx
+# ifdef __CHKP__
+ bndcu 11(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %edx, 8(%rdi)
# ifdef USE_AS_STPCPY
@@ -711,6 +823,9 @@ L(Exit12):
L(Exit13):
mov (%rsi), %rcx
mov 5(%rsi), %rdx
+# ifdef __CHKP__
+ bndcu 12(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %rdx, 5(%rdi)
# ifdef USE_AS_STPCPY
@@ -727,6 +842,9 @@ L(Exit13):
L(Exit14):
mov (%rsi), %rcx
mov 6(%rsi), %rdx
+# ifdef __CHKP__
+ bndcu 13(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %rdx, 6(%rdi)
# ifdef USE_AS_STPCPY
@@ -743,6 +861,9 @@ L(Exit14):
L(Exit15):
mov (%rsi), %rcx
mov 7(%rsi), %rdx
+# ifdef __CHKP__
+ bndcu 14(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %rdx, 7(%rdi)
# ifdef USE_AS_STPCPY
@@ -758,6 +879,9 @@ L(Exit15):
.p2align 4
L(Exit16):
movdqu (%rsi), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
# ifdef USE_AS_STPCPY
lea 15(%rdi), %rax
@@ -772,6 +896,9 @@ L(Exit16):
.p2align 4
L(Exit17):
movdqu (%rsi), %xmm0
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %dh, 16(%rdi)
# ifdef USE_AS_STPCPY
@@ -788,6 +915,9 @@ L(Exit17):
L(Exit18):
movdqu (%rsi), %xmm0
mov 16(%rsi), %cx
+# ifdef __CHKP__
+ bndcu 17(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %cx, 16(%rdi)
# ifdef USE_AS_STPCPY
@@ -804,6 +934,9 @@ L(Exit18):
L(Exit19):
movdqu (%rsi), %xmm0
mov 15(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 18(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %ecx, 15(%rdi)
# ifdef USE_AS_STPCPY
@@ -820,6 +953,9 @@ L(Exit19):
L(Exit20):
movdqu (%rsi), %xmm0
mov 16(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 19(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %ecx, 16(%rdi)
# ifdef USE_AS_STPCPY
@@ -836,6 +972,9 @@ L(Exit20):
L(Exit21):
movdqu (%rsi), %xmm0
mov 16(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 20(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %ecx, 16(%rdi)
mov %dh, 20(%rdi)
@@ -853,6 +992,9 @@ L(Exit21):
L(Exit22):
movdqu (%rsi), %xmm0
mov 14(%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 21(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rcx, 14(%rdi)
# ifdef USE_AS_STPCPY
@@ -869,6 +1011,9 @@ L(Exit22):
L(Exit23):
movdqu (%rsi), %xmm0
mov 15(%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 22(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rcx, 15(%rdi)
# ifdef USE_AS_STPCPY
@@ -885,6 +1030,9 @@ L(Exit23):
L(Exit24):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 23(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rcx, 16(%rdi)
# ifdef USE_AS_STPCPY
@@ -901,6 +1049,9 @@ L(Exit24):
L(Exit25):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 24(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rcx, 16(%rdi)
mov %dh, 24(%rdi)
@@ -919,6 +1070,9 @@ L(Exit26):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 24(%rsi), %cx
+# ifdef __CHKP__
+ bndcu 25(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %cx, 24(%rdi)
@@ -937,6 +1091,9 @@ L(Exit27):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 23(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 26(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %ecx, 23(%rdi)
@@ -955,6 +1112,9 @@ L(Exit28):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 24(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 27(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %ecx, 24(%rdi)
@@ -972,6 +1132,9 @@ L(Exit28):
L(Exit29):
movdqu (%rsi), %xmm0
movdqu 13(%rsi), %xmm2
+# ifdef __CHKP__
+ bndcu 28(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
movdqu %xmm2, 13(%rdi)
# ifdef USE_AS_STPCPY
@@ -988,6 +1151,9 @@ L(Exit29):
L(Exit30):
movdqu (%rsi), %xmm0
movdqu 14(%rsi), %xmm2
+# ifdef __CHKP__
+ bndcu 29(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
movdqu %xmm2, 14(%rdi)
# ifdef USE_AS_STPCPY
@@ -1004,6 +1170,9 @@ L(Exit30):
L(Exit31):
movdqu (%rsi), %xmm0
movdqu 15(%rsi), %xmm2
+# ifdef __CHKP__
+ bndcu 30(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
movdqu %xmm2, 15(%rdi)
# ifdef USE_AS_STPCPY
@@ -1020,6 +1189,9 @@ L(Exit31):
L(Exit32):
movdqu (%rsi), %xmm0
movdqu 16(%rsi), %xmm2
+# ifdef __CHKP__
+ bndcu 31(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
movdqu %xmm2, 16(%rdi)
# ifdef USE_AS_STPCPY
diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c
index 9c0dcf0..dfdde27 100644
--- a/sysdeps/x86_64/multiarch/strcspn-c.c
+++ b/sysdeps/x86_64/multiarch/strcspn-c.c
@@ -20,6 +20,8 @@
#include <nmmintrin.h>
#include <string.h>
#include "varshift.h"
+#ifdef __CHKP__
+#endif
/* We use 0x2:
_SIDD_SBYTE_OPS
@@ -84,6 +86,12 @@ STRCSPN_SSE42 (const char *s, const char *a)
if (*a == 0)
RETURN (NULL, strlen (s));
+#ifdef __CHKP__
+/* TODO: Implement MPX support for these vertorized version manually using mpx intrinsics */
+ a = __bnd_init_ptr_bounds(a);
+ s = __bnd_init_ptr_bounds(s);
+#endif
+
const char *aligned;
__m128i mask;
int offset = (int) ((size_t) a & 15);
diff --git a/sysdeps/x86_64/multiarch/strrchr.S b/sysdeps/x86_64/multiarch/strrchr.S
index 3f92a41..1fed105 100644
--- a/sysdeps/x86_64/multiarch/strrchr.S
+++ b/sysdeps/x86_64/multiarch/strrchr.S
@@ -97,6 +97,10 @@ __strrchr_sse42:
CALL_MCOUNT
testb %sil, %sil
je __strend_sse4
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+# endif
xor %eax,%eax /* RAX has the last occurrence of s. */
movd %esi, %xmm1
punpcklbw %xmm1, %xmm1
@@ -135,6 +139,9 @@ L(unaligned_no_byte):
contain the NULL terminator. */
jg L(exit)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
/* Loop start on aligned string. */
.p2align 4
@@ -142,6 +149,9 @@ L(loop):
pcmpistri $0x4a, (%r8), %xmm1
jbe L(match_or_eos)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
jmp L(loop)
.p2align 4
L(match_or_eos):
@@ -149,11 +159,17 @@ L(match_or_eos):
L(match_no_eos):
leaq (%r8,%rcx), %rax
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
jmp L(loop)
.p2align 4
L(had_eos):
jnc L(exit)
leaq (%r8,%rcx), %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
.p2align 4
L(exit):
ret
diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c
index 8128cb9..ecc3a3a 100644
--- a/sysdeps/x86_64/multiarch/strspn-c.c
+++ b/sysdeps/x86_64/multiarch/strspn-c.c
@@ -18,6 +18,8 @@
<http://www.gnu.org/licenses/>. */
#include <nmmintrin.h>
+#ifdef __CHKP__
+#endif
#include <string.h>
#include "varshift.h"
@@ -62,6 +64,12 @@ __strspn_sse42 (const char *s, const char *a)
if (*a == 0)
return 0;
+#ifdef __CHKP__
+/* TODO: Implement Intel MPX manual checks for these vertorized version using new intrinsics */
+ s = __bnd_init_ptr_bounds(s);
+ a = __bnd_init_ptr_bounds(a);
+#endif
+
const char *aligned;
__m128i mask;
int offset = (int) ((size_t) a & 15);
diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
index cd63b68..577744b 100644
--- a/sysdeps/x86_64/multiarch/strstr.c
+++ b/sysdeps/x86_64/multiarch/strstr.c
@@ -165,8 +165,14 @@ char *
__attribute__ ((section (".text.sse4.2")))
STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2)
{
-#define p1 s1
+#ifdef __CHKP__
+/* TODO: Implement Intel MPX manual checks for these vertorized version using new intrinsics */
+ unsigned char *p1 = __bnd_init_ptr_bounds(s1);
+ unsigned char *p2 = __bnd_init_ptr_bounds(s2);
+#else
+# define p1 s1
const unsigned char *p2 = s2;
+#endif
#ifndef STRCASESTR_NONASCII
if (__builtin_expect (p2[0] == '\0', 0))
diff --git a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
index b7de092..77889dd 100644
--- a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
@@ -25,13 +25,27 @@ ENTRY (__wcscpy_ssse3)
mov %rsi, %rcx
mov %rdi, %rdx
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+# endif
cmpl $0, (%rcx)
jz L(Exit4)
+# ifdef __CHKP__
+ bndcu 4(%rcx), %bnd1
+# endif
cmpl $0, 4(%rcx)
jz L(Exit8)
+# ifdef __CHKP__
+ bndcu 8(%rcx), %bnd1
+# endif
cmpl $0, 8(%rcx)
jz L(Exit12)
+# ifdef __CHKP__
+ bndcu 12(%rcx), %bnd1
+# endif
cmpl $0, 12(%rcx)
jz L(Exit16)
@@ -40,10 +54,19 @@ ENTRY (__wcscpy_ssse3)
pxor %xmm0, %xmm0
mov (%rcx), %r9
+# ifdef __CHKP__
+ bndcu 7(%rdx), %bnd0
+# endif
mov %r9, (%rdx)
+# ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+# endif
pcmpeqd (%rsi), %xmm0
mov 8(%rcx), %r9
+# ifdef __CHKP__
+ bndcu 15(%rdx), %bnd0
+# endif
mov %r9, 8(%rdx)
pmovmskb %xmm0, %rax
@@ -72,6 +95,10 @@ ENTRY (__wcscpy_ssse3)
jmp L(Shl12)
L(Align16Both):
+# ifdef __CHKP__
+ bndcu 16(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps (%rcx), %xmm1
movaps 16(%rcx), %xmm2
movaps %xmm1, (%rdx)
@@ -82,6 +109,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm3
movaps %xmm2, (%rdx, %rsi)
pcmpeqd %xmm3, %xmm0
@@ -91,6 +122,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm4
movaps %xmm3, (%rdx, %rsi)
pcmpeqd %xmm4, %xmm0
@@ -100,6 +135,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm1
movaps %xmm4, (%rdx, %rsi)
pcmpeqd %xmm1, %xmm0
@@ -109,6 +148,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm2
movaps %xmm1, (%rdx, %rsi)
pcmpeqd %xmm2, %xmm0
@@ -118,6 +161,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm3
movaps %xmm2, (%rdx, %rsi)
pcmpeqd %xmm3, %xmm0
@@ -127,6 +174,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps %xmm3, (%rdx, %rsi)
mov %rcx, %rax
lea 16(%rcx, %rsi), %rcx
@@ -138,6 +189,10 @@ L(Align16Both):
.p2align 4
L(Aligned64Loop):
+# ifdef __CHKP__
+ bndcu (%rcx), %bnd1
+ bndcu 63(%rdx), %bnd0
+# endif
movaps (%rcx), %xmm2
movaps %xmm2, %xmm4
movaps 16(%rcx), %xmm5
@@ -168,6 +223,9 @@ L(Aligned64Leave):
pcmpeqd %xmm5, %xmm0
pmovmskb %xmm0, %rax
+# ifdef __CHKP__
+ bndcu -49(%rdx), %bnd0
+# endif
movaps %xmm4, -64(%rdx)
test %rax, %rax
lea 16(%rsi), %rsi
@@ -176,11 +234,17 @@ L(Aligned64Leave):
pcmpeqd %xmm6, %xmm0
pmovmskb %xmm0, %rax
+# ifdef __CHKP__
+ bndcu -33(%rdx), %bnd0
+# endif
movaps %xmm5, -48(%rdx)
test %rax, %rax
lea 16(%rsi), %rsi
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu -17(%rdx), %bnd0
+# endif
movaps %xmm6, -32(%rdx)
pcmpeqd %xmm7, %xmm0
@@ -190,11 +254,17 @@ L(Aligned64Leave):
jnz L(CopyFrom1To16Bytes)
mov $-0x40, %rsi
+# ifdef __CHKP__
+ bndcu -1(%rdx), %bnd0
+# endif
movaps %xmm7, -16(%rdx)
jmp L(Aligned64Loop)
.p2align 4
L(Shl4):
+# ifdef __CHKP__
+ bndcu 12(%rcx), %bnd1
+# endif
movaps -4(%rcx), %xmm1
movaps 12(%rcx), %xmm2
L(Shl4Start):
@@ -206,6 +276,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 28(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
@@ -219,6 +293,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 28(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
@@ -232,6 +310,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 28(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
@@ -244,6 +326,9 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
lea 28(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -258,6 +343,9 @@ L(Shl4Start):
.p2align 4
L(Shl4LoopStart):
+# ifdef __CHKP__
+ bndcu 12(%rcx), %bnd1
+# endif
movaps 12(%rcx), %xmm2
movaps 28(%rcx), %xmm3
movaps %xmm3, %xmm6
@@ -279,6 +367,9 @@ L(Shl4LoopStart):
lea 64(%rcx), %rcx
palignr $4, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%rdx), %bnd0
+# endif
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
@@ -287,6 +378,10 @@ L(Shl4LoopStart):
jmp L(Shl4LoopStart)
L(Shl4LoopExit):
+# ifdef __CHKP__
+ bndcu -4(%rcx), %bnd1
+ bndcu 11(%rdx), %bnd0
+# endif
movdqu -4(%rcx), %xmm1
mov $12, %rsi
movdqu %xmm1, -4(%rdx)
@@ -294,6 +389,9 @@ L(Shl4LoopExit):
.p2align 4
L(Shl8):
+# ifdef __CHKP__
+ bndcu 8(%rcx), %bnd1
+# endif
movaps -8(%rcx), %xmm1
movaps 8(%rcx), %xmm2
L(Shl8Start):
@@ -305,6 +403,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 24(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
@@ -318,6 +420,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 24(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
@@ -331,6 +437,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 24(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
@@ -343,6 +453,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 24(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
lea 24(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -357,6 +471,9 @@ L(Shl8Start):
.p2align 4
L(Shl8LoopStart):
+# ifdef __CHKP__
+ bndcu 8(%rcx), %bnd1
+# endif
movaps 8(%rcx), %xmm2
movaps 24(%rcx), %xmm3
movaps %xmm3, %xmm6
@@ -378,6 +495,9 @@ L(Shl8LoopStart):
lea 64(%rcx), %rcx
palignr $8, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%rdx), %bnd0
+# endif
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
@@ -386,6 +506,10 @@ L(Shl8LoopStart):
jmp L(Shl8LoopStart)
L(Shl8LoopExit):
+# ifdef __CHKP__
+ bndcu (%rcx), %bnd1
+ bndcu 7(%rdx), %bnd0
+# endif
mov (%rcx), %r9
mov $8, %rsi
mov %r9, (%rdx)
@@ -393,6 +517,9 @@ L(Shl8LoopExit):
.p2align 4
L(Shl12):
+# ifdef __CHKP__
+ bndcu 4(%rcx), %bnd1
+# endif
movaps -12(%rcx), %xmm1
movaps 4(%rcx), %xmm2
L(Shl12Start):
@@ -404,6 +531,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 20(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
@@ -417,6 +548,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 20(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
@@ -430,6 +565,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 20(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
@@ -442,6 +581,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 20(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
lea 20(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -456,6 +599,9 @@ L(Shl12Start):
.p2align 4
L(Shl12LoopStart):
+# ifdef __CHKP__
+ bndcu 4(%rcx), %bnd1
+# endif
movaps 4(%rcx), %xmm2
movaps 20(%rcx), %xmm3
movaps %xmm3, %xmm6
@@ -476,6 +622,9 @@ L(Shl12LoopStart):
lea 64(%rcx), %rcx
palignr $12, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%rdx), %bnd0
+# endif
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
@@ -484,6 +633,10 @@ L(Shl12LoopStart):
jmp L(Shl12LoopStart)
L(Shl12LoopExit):
+# ifdef __CHKP__
+ bndcu (%rcx), %bnd1
+ bndcu 3(%rdx), %bnd0
+# endif
mov (%rcx), %r9d
mov $4, %rsi
mov %r9d, (%rdx)
@@ -500,6 +653,9 @@ L(CopyFrom1To16Bytes):
jnz L(Exit4)
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 7(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov %rdi, %rax
ret
@@ -510,6 +666,9 @@ L(ExitHigh):
jnz L(Exit12)
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 15(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov 8(%rcx), %rax
mov %rax, 8(%rdx)
@@ -519,6 +678,9 @@ L(ExitHigh):
.p2align 4
L(Exit4):
movl (%rcx), %eax
+# ifdef __CHKP__
+ bndcu 3(%rdx), %bnd0
+# endif
movl %eax, (%rdx)
mov %rdi, %rax
ret
@@ -526,6 +688,9 @@ L(Exit4):
.p2align 4
L(Exit8):
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 7(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov %rdi, %rax
ret
@@ -533,6 +698,9 @@ L(Exit8):
.p2align 4
L(Exit12):
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 11(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov 8(%rcx), %eax
mov %eax, 8(%rdx)
@@ -542,6 +710,9 @@ L(Exit12):
.p2align 4
L(Exit16):
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 15(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov 8(%rcx), %rax
mov %rax, 8(%rdx)
diff --git a/sysdeps/x86_64/rawmemchr.S b/sysdeps/x86_64/rawmemchr.S
index f4d5591..2f4cb25 100644
--- a/sysdeps/x86_64/rawmemchr.S
+++ b/sysdeps/x86_64/rawmemchr.S
@@ -20,11 +20,23 @@
#include <sysdep.h>
+#ifdef __CHKP__
+# define RETURN \
+ bndcu (%rax), %bnd0; \
+ ret
+#else
+# define RETURN ret
+#endif
+
.text
ENTRY (rawmemchr)
movd %rsi, %xmm1
mov %rdi, %rcx
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+#endif
+
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
@@ -63,7 +75,7 @@ L(crosscache):
add %rdi, %rax
add %rcx, %rax
- ret
+ RETURN
.p2align 4
L(unaligned_no_match):
@@ -71,24 +83,36 @@ L(unaligned_no_match):
.p2align 4
L(loop_prolog):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
+#ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+#endif
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
+#ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+#endif
movdqa 48(%rdi), %xmm4
pcmpeqb %xmm1, %xmm4
add $64, %rdi
@@ -99,24 +123,36 @@ L(loop_prolog):
test $0x3f, %rdi
jz L(align64_loop)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
+#ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+#endif
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
+#ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+#endif
movdqa 48(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
@@ -129,6 +165,9 @@ L(loop_prolog):
.p2align 4
L(align64_loop):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
movdqa 16(%rdi), %xmm2
movdqa 32(%rdi), %xmm3
@@ -170,36 +209,36 @@ L(align64_loop):
pmovmskb %xmm1, %eax
bsf %eax, %eax
lea 48(%rdi, %rax), %rax
- ret
+ RETURN
.p2align 4
L(matches0):
bsf %eax, %eax
lea -16(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(matches):
bsf %eax, %eax
add %rdi, %rax
- ret
+ RETURN
.p2align 4
L(matches16):
bsf %eax, %eax
lea 16(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(matches32):
bsf %eax, %eax
lea 32(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(return_null):
xor %rax, %rax
- ret
+ RETURN
END (rawmemchr)
diff --git a/sysdeps/x86_64/stpcpy_chk-c.c b/sysdeps/x86_64/stpcpy_chk-c.c
new file mode 100644
index 0000000..5de29f9
--- /dev/null
+++ b/sysdeps/x86_64/stpcpy_chk-c.c
@@ -0,0 +1,3 @@
+#ifdef __CHKP__
+# include <debug/stpcpy_chk.c>
+#endif
diff --git a/sysdeps/x86_64/stpcpy_chk.S b/sysdeps/x86_64/stpcpy_chk.S
index 905e8d7..d4a2764 100644
--- a/sysdeps/x86_64/stpcpy_chk.S
+++ b/sysdeps/x86_64/stpcpy_chk.S
@@ -1,3 +1,5 @@
-#define USE_AS_STPCPY_CHK
-#define STRCPY_CHK __stpcpy_chk
-#include <sysdeps/x86_64/strcpy_chk.S>
+#ifndef __CHKP__
+# define USE_AS_STPCPY_CHK
+# define STRCPY_CHK __stpcpy_chk
+# include <sysdeps/x86_64/strcpy_chk.S>
+#endif
diff --git a/sysdeps/x86_64/strcat.S b/sysdeps/x86_64/strcat.S
index 8bea6fb..7832379 100644
--- a/sysdeps/x86_64/strcat.S
+++ b/sysdeps/x86_64/strcat.S
@@ -25,6 +25,11 @@
.text
ENTRY (strcat)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+#endif
+
movq %rdi, %rcx /* Dest. register. */
andl $7, %ecx /* mask alignment bits */
movq %rdi, %rax /* Duplicate destination pointer. */
@@ -36,7 +41,11 @@ ENTRY (strcat)
neg %ecx /* We need to align to 8 bytes. */
addl $8,%ecx
/* Search the first bytes directly. */
-0: cmpb $0x0,(%rax) /* is byte NUL? */
+0:
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
+ cmpb $0x0,(%rax) /* is byte NUL? */
je 2f /* yes => start copy */
incq %rax /* increment pointer */
decl %ecx
@@ -48,6 +57,9 @@ ENTRY (strcat)
.p2align 4
4:
/* First unroll. */
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movq (%rax), %rcx /* get double word (= 8 bytes) in question */
addq $8,%rax /* adjust pointer for next word */
movq %r8, %rdx /* magic value */
@@ -62,6 +74,9 @@ ENTRY (strcat)
jnz 3f /* found NUL => return pointer */
/* Second unroll. */
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movq (%rax), %rcx /* get double word (= 8 bytes) in question */
addq $8,%rax /* adjust pointer for next word */
movq %r8, %rdx /* magic value */
@@ -76,6 +91,9 @@ ENTRY (strcat)
jnz 3f /* found NUL => return pointer */
/* Third unroll. */
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movq (%rax), %rcx /* get double word (= 8 bytes) in question */
addq $8,%rax /* adjust pointer for next word */
movq %r8, %rdx /* magic value */
@@ -90,6 +108,9 @@ ENTRY (strcat)
jnz 3f /* found NUL => return pointer */
/* Fourth unroll. */
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movq (%rax), %rcx /* get double word (= 8 bytes) in question */
addq $8,%rax /* adjust pointer for next word */
movq %r8, %rdx /* magic value */
@@ -163,6 +184,9 @@ ENTRY (strcat)
.p2align 4
22:
/* 1st unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -177,10 +201,16 @@ ENTRY (strcat)
jnz 23f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 2nd unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -195,10 +225,16 @@ ENTRY (strcat)
jnz 23f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 3rd unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -213,10 +249,16 @@ ENTRY (strcat)
jnz 23f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 4th unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -231,6 +273,9 @@ ENTRY (strcat)
jnz 23f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
jmp 22b /* Next iteration. */
@@ -239,10 +284,16 @@ ENTRY (strcat)
The loop is unrolled twice. */
.p2align 4
23:
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movb %al, (%rdx) /* 1st byte. */
testb %al, %al /* Is it NUL. */
jz 24f /* yes, finish. */
incq %rdx /* Increment destination. */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movb %ah, (%rdx) /* 2nd byte. */
testb %ah, %ah /* Is it NUL?. */
jz 24f /* yes, finish. */
diff --git a/sysdeps/x86_64/strchr.S b/sysdeps/x86_64/strchr.S
index d89f1eb..8519a81 100644
--- a/sysdeps/x86_64/strchr.S
+++ b/sysdeps/x86_64/strchr.S
@@ -22,6 +22,10 @@
.text
ENTRY (strchr)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
movd %esi, %xmm1
movq %rdi, %rcx
punpcklbw %xmm1, %xmm1
@@ -29,6 +33,9 @@ ENTRY (strchr)
pxor %xmm2, %xmm2
punpcklbw %xmm1, %xmm1
orl $0xffffffff, %esi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pshufd $0, %xmm1, %xmm1
subq %rdi, %rcx
@@ -44,7 +51,11 @@ ENTRY (strchr)
orl %edx, %ecx
jnz 1f
-2: movdqa (%rdi), %xmm0
+2:
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
+ movdqa (%rdi), %xmm0
leaq 16(%rdi), %rdi
movdqa %xmm0, %xmm3
pcmpeqb %xmm1, %xmm0
diff --git a/sysdeps/x86_64/strchrnul.S b/sysdeps/x86_64/strchrnul.S
index d8c345b..3e4abfa 100644
--- a/sysdeps/x86_64/strchrnul.S
+++ b/sysdeps/x86_64/strchrnul.S
@@ -23,6 +23,10 @@
.text
ENTRY (__strchrnul)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
movd %esi, %xmm1
movq %rdi, %rcx
punpcklbw %xmm1, %xmm1
@@ -44,7 +48,11 @@ ENTRY (__strchrnul)
andl %esi, %ecx
jnz 1f
-2: movdqa (%rdi), %xmm0
+2:
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
+ movdqa (%rdi), %xmm0
leaq 16(%rdi), %rdi
movdqa %xmm0, %xmm3
pcmpeqb %xmm1, %xmm0
@@ -56,6 +64,9 @@ ENTRY (__strchrnul)
1: bsfl %ecx, %edx
leaq -16(%rdi,%rdx), %rax
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
ret
END (__strchrnul)
diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S
index 7680937..ece49c9 100644
--- a/sysdeps/x86_64/strcmp.S
+++ b/sysdeps/x86_64/strcmp.S
@@ -128,7 +128,16 @@ libc_hidden_def (__strncasecmp)
ENTRY (STRCMP)
#ifdef NOT_IN_libc
/* Simple version since we can't use SSE registers in ld.so. */
-L(oop): movb (%rdi), %al
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+#endif
+L(oop):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+#endif
+ movb (%rdi), %al
cmpb (%rsi), %al
jne L(neq)
incq %rdi
@@ -177,6 +186,12 @@ END (STRCMP)
je LABEL(Byte0)
mov %rdx, %r11
# endif
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+#endif
mov %esi, %ecx
mov %edi, %eax
/* Use 64bit AND here to avoid long NOP padding. */
@@ -243,6 +258,10 @@ END (STRCMP)
# endif
add $16, %rsi /* prepare to search next 16 bytes */
add $16, %rdi /* prepare to search next 16 bytes */
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+#endif
/*
* Determine source and destination string offsets from 16-byte alignment.
@@ -263,6 +282,11 @@ LABEL(crosscache):
mov %edx, %r8d /* r8d is offset flag for exit tail */
xchg %ecx, %eax
xchg %rsi, %rdi
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
LABEL(bigger):
lea 15(%rax), %r9
sub %rcx, %r9
@@ -310,6 +334,10 @@ LABEL(ashr_0):
*/
.p2align 4
LABEL(loop_ashr_0):
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rcx), %bnd0
+ bndcu -1(%rsi, %rcx), %bnd1
+#endif
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
TOLOWER (%xmm1, %xmm2)
@@ -326,6 +354,10 @@ LABEL(loop_ashr_0):
jbe LABEL(strcmp_exitz)
# endif
add $16, %rcx
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rcx), %bnd0
+ bndcu -1(%rsi, %rcx), %bnd1
+#endif
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
TOLOWER (%xmm1, %xmm2)
@@ -377,6 +409,15 @@ LABEL(ashr_1):
lea 1(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_1)
+LABEL(ashr_1_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_1)
+# endif
.p2align 4
LABEL(loop_ashr_1):
@@ -460,7 +501,11 @@ LABEL(nibble_ashr_1):
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* substract 4K from %r10 */
+# ifdef __CHKP__
+ ja LABEL(ashr_1_check)
+# else
jmp LABEL(gobble_ashr_1)
+# endif
/*
* Once find null char, determine if there is a string mismatch
@@ -507,6 +552,15 @@ LABEL(ashr_2):
lea 2(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_2)
+LABEL(ashr_2_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_2)
+# endif
.p2align 4
LABEL(loop_ashr_2):
@@ -588,7 +642,11 @@ LABEL(nibble_ashr_2):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_2_check)
+# else
jmp LABEL(gobble_ashr_2)
+# endif
.p2align 4
LABEL(ashr_2_exittail):
@@ -632,6 +690,15 @@ LABEL(ashr_3):
lea 3(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_3)
+LABEL(ashr_3_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_3)
+# endif
.p2align 4
LABEL(loop_ashr_3):
@@ -713,7 +780,11 @@ LABEL(nibble_ashr_3):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_3_check)
+# else
jmp LABEL(gobble_ashr_3)
+# endif
.p2align 4
LABEL(ashr_3_exittail):
@@ -757,6 +828,15 @@ LABEL(ashr_4):
lea 4(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_4)
+LABEL(ashr_4_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_4)
+# endif
.p2align 4
LABEL(loop_ashr_4):
@@ -838,7 +918,11 @@ LABEL(nibble_ashr_4):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_4_check)
+# else
jmp LABEL(gobble_ashr_4)
+# endif
.p2align 4
LABEL(ashr_4_exittail):
@@ -882,6 +966,15 @@ LABEL(ashr_5):
lea 5(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_5)
+LABEL(ashr_5_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_5)
+# endif
.p2align 4
LABEL(loop_ashr_5):
@@ -963,7 +1056,11 @@ LABEL(nibble_ashr_5):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_5_check)
+# else
jmp LABEL(gobble_ashr_5)
+# endif
.p2align 4
LABEL(ashr_5_exittail):
@@ -1007,6 +1104,15 @@ LABEL(ashr_6):
lea 6(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_6)
+LABEL(ashr_6_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_6)
+# endif
.p2align 4
LABEL(loop_ashr_6):
@@ -1088,7 +1194,11 @@ LABEL(nibble_ashr_6):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_6_check)
+# else
jmp LABEL(gobble_ashr_6)
+# endif
.p2align 4
LABEL(ashr_6_exittail):
@@ -1132,6 +1242,15 @@ LABEL(ashr_7):
lea 7(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_7)
+LABEL(ashr_7_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_7)
+# endif
.p2align 4
LABEL(loop_ashr_7):
@@ -1213,7 +1332,11 @@ LABEL(nibble_ashr_7):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_7_check)
+# else
jmp LABEL(gobble_ashr_7)
+# endif
.p2align 4
LABEL(ashr_7_exittail):
@@ -1257,6 +1380,15 @@ LABEL(ashr_8):
lea 8(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_8)
+LABEL(ashr_8_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_8)
+# endif
.p2align 4
LABEL(loop_ashr_8):
@@ -1338,7 +1470,11 @@ LABEL(nibble_ashr_8):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_8_check)
+# else
jmp LABEL(gobble_ashr_8)
+# endif
.p2align 4
LABEL(ashr_8_exittail):
@@ -1382,6 +1518,15 @@ LABEL(ashr_9):
lea 9(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_9)
+LABEL(ashr_9_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_9)
+# endif
.p2align 4
LABEL(loop_ashr_9):
@@ -1463,7 +1608,11 @@ LABEL(nibble_ashr_9):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_9_check)
+# else
jmp LABEL(gobble_ashr_9)
+# endif
.p2align 4
LABEL(ashr_9_exittail):
@@ -1507,6 +1656,15 @@ LABEL(ashr_10):
lea 10(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_10)
+LABEL(ashr_10_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_10)
+# endif
.p2align 4
LABEL(loop_ashr_10):
@@ -1588,7 +1746,11 @@ LABEL(nibble_ashr_10):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_10_check)
+# else
jmp LABEL(gobble_ashr_10)
+# endif
.p2align 4
LABEL(ashr_10_exittail):
@@ -1632,6 +1794,15 @@ LABEL(ashr_11):
lea 11(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_11)
+LABEL(ashr_11_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_11)
+# endif
.p2align 4
LABEL(loop_ashr_11):
@@ -1713,7 +1884,11 @@ LABEL(nibble_ashr_11):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_11_check)
+# else
jmp LABEL(gobble_ashr_11)
+# endif
.p2align 4
LABEL(ashr_11_exittail):
@@ -1757,6 +1932,15 @@ LABEL(ashr_12):
lea 12(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_12)
+LABEL(ashr_12_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_12)
+# endif
.p2align 4
LABEL(loop_ashr_12):
@@ -1838,7 +2022,11 @@ LABEL(nibble_ashr_12):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_12_check)
+# else
jmp LABEL(gobble_ashr_12)
+# endif
.p2align 4
LABEL(ashr_12_exittail):
@@ -1882,6 +2070,15 @@ LABEL(ashr_13):
lea 13(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_13)
+LABEL(ashr_13_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_13)
+# endif
.p2align 4
LABEL(loop_ashr_13):
@@ -1963,7 +2160,11 @@ LABEL(nibble_ashr_13):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_13_check)
+# else
jmp LABEL(gobble_ashr_13)
+# endif
.p2align 4
LABEL(ashr_13_exittail):
@@ -2007,6 +2208,15 @@ LABEL(ashr_14):
lea 14(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_14)
+LABEL(ashr_14_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_14)
+# endif
.p2align 4
LABEL(loop_ashr_14):
@@ -2088,7 +2298,11 @@ LABEL(nibble_ashr_14):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_14_check)
+# else
jmp LABEL(gobble_ashr_14)
+# endif
.p2align 4
LABEL(ashr_14_exittail):
@@ -2134,6 +2348,15 @@ LABEL(ashr_15):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_15)
+LABEL(ashr_15_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_15)
+# endif
.p2align 4
LABEL(loop_ashr_15):
@@ -2215,7 +2438,11 @@ LABEL(nibble_ashr_15):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_15_check)
+# else
jmp LABEL(gobble_ashr_15)
+# endif
.p2align 4
LABEL(ashr_15_exittail):
@@ -2240,6 +2467,11 @@ LABEL(less32bytes):
test %r8d, %r8d
jz LABEL(ret)
xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
.p2align 4
LABEL(ret):
@@ -2250,6 +2482,10 @@ LABEL(less16bytes):
sub %rdx, %r11
jbe LABEL(strcmp_exitz)
# endif
+/*#ifdef __CHKP__
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+#endif*/
movzbl (%rsi, %rdx), %ecx
movzbl (%rdi, %rdx), %eax
diff --git a/sysdeps/x86_64/strcpy.S b/sysdeps/x86_64/strcpy.S
index 6128247..2b78e95 100644
--- a/sysdeps/x86_64/strcpy.S
+++ b/sysdeps/x86_64/strcpy.S
@@ -26,6 +26,10 @@
.text
ENTRY (STRCPY)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+#endif
movq %rsi, %rcx /* Source register. */
andl $7, %ecx /* mask alignment bits */
movq %rdi, %rdx /* Duplicate destination pointer. */
@@ -36,8 +40,14 @@ ENTRY (STRCPY)
addl $8,%ecx
/* Search the first bytes directly. */
0:
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movb (%rsi), %al /* Fetch a byte */
testb %al, %al /* Is it NUL? */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movb %al, (%rdx) /* Store it */
jz 4f /* If it was NUL, done! */
incq %rsi
@@ -54,6 +64,9 @@ ENTRY (STRCPY)
.p2align 4
1:
/* 1st unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -68,10 +81,16 @@ ENTRY (STRCPY)
jnz 3f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 2nd unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -86,10 +105,16 @@ ENTRY (STRCPY)
jnz 3f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 3rd unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -104,10 +129,16 @@ ENTRY (STRCPY)
jnz 3f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 4th unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -122,6 +153,9 @@ ENTRY (STRCPY)
jnz 3f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
jmp 1b /* Next iteration. */
@@ -132,10 +166,16 @@ ENTRY (STRCPY)
3:
/* Note that stpcpy needs to return with the value of the NUL
byte. */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movb %al, (%rdx) /* 1st byte. */
testb %al, %al /* Is it NUL. */
jz 4f /* yes, finish. */
incq %rdx /* Increment destination. */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movb %ah, (%rdx) /* 2nd byte. */
testb %ah, %ah /* Is it NUL?. */
jz 4f /* yes, finish. */
diff --git a/sysdeps/x86_64/strcpy_chk-c.c b/sysdeps/x86_64/strcpy_chk-c.c
new file mode 100644
index 0000000..4deabcc
--- /dev/null
+++ b/sysdeps/x86_64/strcpy_chk-c.c
@@ -0,0 +1,3 @@
+#ifdef __CHKP__
+# include <debug/strcpy_chk.c>
+#endif
diff --git a/sysdeps/x86_64/strcpy_chk.S b/sysdeps/x86_64/strcpy_chk.S
index 7e171de..4b79124 100644
--- a/sysdeps/x86_64/strcpy_chk.S
+++ b/sysdeps/x86_64/strcpy_chk.S
@@ -18,6 +18,7 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#ifndef __CHKP__
#include <sysdep.h>
#include "asm-syntax.h"
@@ -206,3 +207,4 @@ ENTRY (STRCPY_CHK)
jmp HIDDEN_JUMPTARGET (__chk_fail)
END (STRCPY_CHK)
+#endif
diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S
index 65f8a9e..0acca21 100644
--- a/sysdeps/x86_64/strcspn.S
+++ b/sysdeps/x86_64/strcspn.S
@@ -29,6 +29,12 @@
.text
ENTRY (strcspn)
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+# endif
movq %rdi, %rdx /* Save SRC. */
@@ -54,21 +60,34 @@ ENTRY (strcspn)
have a correct zero-extended 64-bit value in %rcx. */
.p2align 4
-L(2): movb (%rax), %cl /* get byte from skipset */
+L(2):
+# ifdef __CHKP__
+ bndcu (%rax), %bnd1
+# endif
+ movb (%rax), %cl /* get byte from skipset */
testb %cl, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
+# ifdef __CHKP__
+ bndcu 1(%rax), %bnd1
+# endif
movb 1(%rax), %cl /* get byte from skipset */
testb $0xff, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
+# ifdef __CHKP__
+ bndcu 2(%rax), %bnd1
+# endif
movb 2(%rax), %cl /* get byte from skipset */
testb $0xff, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
+# ifdef __CHKP__
+ bndcu 3(%rax), %bnd1
+# endif
movb 3(%rax), %cl /* get byte from skipset */
addq $4, %rax /* increment skipset pointer */
movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
@@ -89,18 +108,30 @@ L(1): leaq -4(%rdx), %rax /* prepare loop */
.p2align 4
L(3): addq $4, %rax /* adjust pointer for full loop round */
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
movb (%rax), %cl /* get byte from string */
cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
je L(4) /* yes => return */
+# ifdef __CHKP__
+ bndcu 1(%rax), %bnd0
+# endif
movb 1(%rax), %cl /* get byte from string */
cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
je L(5) /* yes => return */
+# ifdef __CHKP__
+ bndcu 2(%rax), %bnd0
+# endif
movb 2(%rax), %cl /* get byte from string */
cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jz L(6) /* yes => return */
+# ifdef __CHKP__
+ bndcu 3(%rax), %bnd0
+# endif
movb 3(%rax), %cl /* get byte from string */
cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jne L(3) /* no => start loop again */
diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
index eeb1092..065f0e6 100644
--- a/sysdeps/x86_64/strlen.S
+++ b/sysdeps/x86_64/strlen.S
@@ -63,6 +63,10 @@ L(n_nonzero):
mov %rsi, %r11
#endif
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
pxor %xmm8, %xmm8
pxor %xmm9, %xmm9
pxor %xmm10, %xmm10
@@ -157,6 +161,9 @@ L(loop_init):
L(loop):
addq $64, %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
cmpq %rax, %r10
je L(exit_end)
@@ -182,6 +189,9 @@ L(first):
bsfq %rdx, %rdx
addq %rdx, %rax
subq %rdi, %rax
+# ifdef __CHKP__
+ bndcu -1(%rdi, %rax), %bnd0
+# endif
ret
.p2align 4
@@ -192,6 +202,9 @@ L(exit):
bsfq %rdx, %rdx
addq %rdx, %rax
subq %rdi, %rax
+# ifdef __CHKP__
+ bndcu -1(%rdi, %rax), %bnd0
+# endif
ret
#else
@@ -199,6 +212,9 @@ L(exit):
/* Main loop. Unrolled twice to improve L2 cache performance on core2. */
.p2align 4
L(loop):
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
movdqa 64(%rax), %xmm8
pminub 80(%rax), %xmm8
@@ -231,6 +247,9 @@ L(exit0):
bsfq %rdx, %rdx
addq %rdx, %rax
subq %rdi, %rax
+# ifdef __CHKP__
+ bndcu -1(%rdi, %rax), %bnd0
+# endif
ret
#endif
diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S
index e413b07..0bd3405 100644
--- a/sysdeps/x86_64/strrchr.S
+++ b/sysdeps/x86_64/strrchr.S
@@ -22,6 +22,10 @@
.text
ENTRY (strrchr)
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+# endif
movd %esi, %xmm1
movq %rdi, %rcx
punpcklbw %xmm1, %xmm1
@@ -46,7 +50,11 @@ ENTRY (strrchr)
orl %ecx, %esi
jnz 1f
-2: movdqa (%rdi), %xmm0
+2:
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+# endif
+ movdqa (%rdi), %xmm0
leaq 16(%rdi), %rdi
movdqa %xmm0, %xmm3
pcmpeqb %xmm1, %xmm0
@@ -73,6 +81,9 @@ ENTRY (strrchr)
bsrl %edx, %edx
jz 4f
leaq -16(%rdi,%rdx), %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
4: ret
END (strrchr)
diff --git a/sysdeps/x86_64/strspn.S b/sysdeps/x86_64/strspn.S
index 2911da2..bd3be8a 100644
--- a/sysdeps/x86_64/strspn.S
+++ b/sysdeps/x86_64/strspn.S
@@ -25,6 +25,12 @@
.text
ENTRY (strspn)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+#endif
movq %rdi, %rdx /* Save SRC. */
@@ -50,21 +56,34 @@ ENTRY (strspn)
have a correct zero-extended 64-bit value in %rcx. */
.p2align 4
-L(2): movb (%rax), %cl /* get byte from stopset */
+L(2):
+#ifdef __CHKP__
+ bndcu (%rax), %bnd1
+#endif
+ movb (%rax), %cl /* get byte from stopset */
testb %cl, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
+#ifdef __CHKP__
+ bndcu 1(%rax), %bnd1
+#endif
movb 1(%rax), %cl /* get byte from stopset */
testb $0xff, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
+#ifdef __CHKP__
+ bndcu 2(%rax), %bnd1
+#endif
movb 2(%rax), %cl /* get byte from stopset */
testb $0xff, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
+#ifdef __CHKP__
+ bndcu 3(%rax), %bnd1
+#endif
movb 3(%rax), %cl /* get byte from stopset */
addq $4, %rax /* increment stopset pointer */
movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
@@ -85,18 +104,30 @@ L(1): leaq -4(%rdx), %rax /* prepare loop */
.p2align 4
L(3): addq $4, %rax /* adjust pointer for full loop round */
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movb (%rax), %cl /* get byte from string */
testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jz L(4) /* no => return */
+#ifdef __CHKP__
+ bndcu 1(%rax), %bnd0
+#endif
movb 1(%rax), %cl /* get byte from string */
testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jz L(5) /* no => return */
+#ifdef __CHKP__
+ bndcu 2(%rax), %bnd0
+#endif
movb 2(%rax), %cl /* get byte from string */
testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jz L(6) /* no => return */
+#ifdef __CHKP__
+ bndcu 3(%rax), %bnd0
+#endif
movb 3(%rax), %cl /* get byte from string */
testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jnz L(3) /* yes => start loop again */
diff --git a/sysdeps/x86_64/strtok.S b/sysdeps/x86_64/strtok.S
index 5636d9a..17e2521 100644
--- a/sysdeps/x86_64/strtok.S
+++ b/sysdeps/x86_64/strtok.S
@@ -90,6 +90,9 @@ ENTRY (FUNCTION)
the last run. */
cmpq $0, %rdx
cmove %rax, %rdx
+#ifdef __CHKP__
+ bndldx (,%rax,1),%bnd0
+#endif
testq %rdx, %rdx
jz L(returnNULL)
movq %rsi, %rax /* Get start of delimiter set. */
diff --git a/sysdeps/x86_64/wcschr.S b/sysdeps/x86_64/wcschr.S
index 3f098dc..3ab1e47 100644
--- a/sysdeps/x86_64/wcschr.S
+++ b/sysdeps/x86_64/wcschr.S
@@ -22,6 +22,11 @@
.text
ENTRY (wcschr)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
+
movd %rsi, %xmm1
pxor %xmm2, %xmm2
mov %rdi, %rcx
@@ -43,6 +48,9 @@ ENTRY (wcschr)
and $-16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -78,6 +86,9 @@ L(cross_cache):
L(unaligned_match):
add %rdi, %rax
add %rcx, %rax
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
ret
.p2align 4
@@ -91,6 +102,9 @@ L(unaligned_no_match):
.p2align 4
/* Loop start on aligned string. */
L(loop):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -100,6 +114,9 @@ L(loop):
or %rax, %rdx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -109,6 +126,9 @@ L(loop):
or %rax, %rdx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -118,6 +138,9 @@ L(loop):
or %rax, %rdx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -142,6 +165,9 @@ L(matches):
L(match):
sub $16, %rdi
add %rdi, %rax
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
ret
.p2align 4
diff --git a/sysdeps/x86_64/wcscmp.S b/sysdeps/x86_64/wcscmp.S
index d6b516b..38e2849 100644
--- a/sysdeps/x86_64/wcscmp.S
+++ b/sysdeps/x86_64/wcscmp.S
@@ -28,6 +28,14 @@ ENTRY (wcscmp)
*/
mov %esi, %eax
mov %edi, %edx
+
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+#endif
+
pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
mov %al, %ch
mov %dl, %cl
diff --git a/sysdeps/x86_64/wcslen.S b/sysdeps/x86_64/wcslen.S
index 5927352..a7d944f 100644
--- a/sysdeps/x86_64/wcslen.S
+++ b/sysdeps/x86_64/wcslen.S
@@ -21,20 +21,45 @@
.text
ENTRY (__wcslen)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
cmpl $0, (%rdi)
jz L(exit_tail0)
+#ifdef __CHKP__
+ bndcu 4(%rdi), %bnd0
+#endif
cmpl $0, 4(%rdi)
jz L(exit_tail1)
+#ifdef __CHKP__
+ bndcu 8(%rdi), %bnd0
+#endif
cmpl $0, 8(%rdi)
jz L(exit_tail2)
+#ifdef __CHKP__
+ bndcu 12(%rdi), %bnd0
+#endif
cmpl $0, 12(%rdi)
jz L(exit_tail3)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
cmpl $0, 16(%rdi)
jz L(exit_tail4)
+#ifdef __CHKP__
+ bndcu 20(%rdi), %bnd0
+#endif
cmpl $0, 20(%rdi)
jz L(exit_tail5)
+#ifdef __CHKP__
+ bndcu 24(%rdi), %bnd0
+#endif
cmpl $0, 24(%rdi)
jz L(exit_tail6)
+#ifdef __CHKP__
+ bndcu 28(%rdi), %bnd0
+#endif
cmpl $0, 28(%rdi)
jz L(exit_tail7)
@@ -44,6 +69,9 @@ ENTRY (__wcslen)
lea 16(%rdi), %rcx
and $-16, %rax
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
@@ -51,6 +79,9 @@ ENTRY (__wcslen)
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm1
pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2
@@ -58,6 +89,9 @@ ENTRY (__wcslen)
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
@@ -65,54 +99,81 @@ ENTRY (__wcslen)
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -123,6 +184,9 @@ ENTRY (__wcslen)
.p2align 4
L(aligned_64_loop):
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movaps (%rax), %xmm0
movaps 16(%rax), %xmm1
movaps 32(%rax), %xmm2
@@ -173,6 +237,9 @@ L(exit):
mov %dl, %cl
and $15, %cl
jz L(exit_1)
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rax, 4), %bnd0
+#endif
ret
.p2align 4
@@ -181,11 +248,17 @@ L(exit_high):
and $15, %ch
jz L(exit_3)
add $2, %rax
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rax, 4), %bnd0
+#endif
ret
.p2align 4
L(exit_1):
add $1, %rax
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rax, 4), %bnd0
+#endif
ret
.p2align 4
diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S
index ea1e2e5..8edfc46 100644
--- a/sysdeps/x86_64/wcsrchr.S
+++ b/sysdeps/x86_64/wcsrchr.S
@@ -19,9 +19,22 @@
#include <sysdep.h>
+#ifdef __CHKP__
+# define RETURN \
+ bndcu (%rax), %bnd0; \
+ ret
+#else
+# define RETURN ret
+#endif
+
+
.text
ENTRY (wcsrchr)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
movd %rsi, %xmm1
mov %rdi, %rcx
punpckldq %xmm1, %xmm1
@@ -92,6 +105,9 @@ L(unaligned_match):
/* Loop start on aligned string. */
.p2align 4
L(loop):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -101,6 +117,9 @@ L(loop):
or %rax, %rcx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm3
pcmpeqd %xmm3, %xmm2
add $16, %rdi
@@ -110,6 +129,9 @@ L(loop):
or %rax, %rcx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm4
pcmpeqd %xmm4, %xmm2
add $16, %rdi
@@ -119,6 +141,9 @@ L(loop):
or %rax, %rcx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm5
pcmpeqd %xmm5, %xmm2
add $16, %rdi
@@ -145,7 +170,7 @@ L(return_value):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(match):
@@ -175,14 +200,14 @@ L(find_zero):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(find_zero_in_first_wchar):
test $1, %rax
jz L(return_value)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(find_zero_in_second_wchar):
@@ -192,7 +217,7 @@ L(find_zero_in_second_wchar):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(find_zero_in_third_wchar):
@@ -204,12 +229,12 @@ L(find_zero_in_third_wchar):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(prolog_find_zero):
add %rcx, %rdi
- mov %rdx, %rcx
+ mov %rdx, %rcx
L(prolog_find_zero_1):
test $15, %cl
jnz L(prolog_find_zero_in_first_wchar)
@@ -228,14 +253,14 @@ L(prolog_find_zero_1):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(prolog_find_zero_in_first_wchar):
test $1, %rax
jz L(return_null)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(prolog_find_zero_in_second_wchar):
@@ -245,7 +270,7 @@ L(prolog_find_zero_in_second_wchar):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(prolog_find_zero_in_third_wchar):
@@ -257,22 +282,22 @@ L(prolog_find_zero_in_third_wchar):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(match_second_wchar):
lea -12(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(match_third_wchar):
lea -8(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(match_fourth_wchar):
lea -4(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(return_null):
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=2c6f2eca5187fbd39bdbe267ce0d2c81fe0de696
commit 2c6f2eca5187fbd39bdbe267ce0d2c81fe0de696
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Fri May 24 13:18:17 2013 +0400
Implemented bounds check support for string/memory routines for x86_32.
Warning: Not completed and haven't tested.
diff --git a/sysdeps/i386/i486/strcat.S b/sysdeps/i386/i486/strcat.S
index 7d45862..af2602e 100644
--- a/sysdeps/i386/i486/strcat.S
+++ b/sysdeps/i386/i486/strcat.S
@@ -35,9 +35,19 @@ ENTRY (strcat)
movl DEST(%esp), %edx
movl SRC(%esp), %ecx
+#ifdef __CHKP__
+ bndldx DEST(%esp,%edx,1), %bnd0
+ bndldx SRC(%esp,%ecx,1), %bnd1
+ bndcl (%ecx), %bnd1
+ bndcu (%ecx), %bnd1
+#endif
testb $0xff, (%ecx) /* Is source string empty? */
jz L(8) /* yes => return */
+#ifdef __CHKP__
+ bndcl (%edx), %bnd0
+ bndcu (%edx), %bnd0
+#endif
/* Test the first bytes separately until destination is aligned. */
testl $3, %edx /* destination pointer aligned? */
@@ -66,7 +76,11 @@ ENTRY (strcat)
L(4): addl $16,%edx /* increment destination pointer for round */
-L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */
+L(1):
+#ifdef __CHKP__
+ bndcu (%edx), %bnd0
+#endif
+ movl (%edx), %eax /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
/* If you compare this with the algorithm in memchr.S you will
@@ -98,6 +112,9 @@ L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */
/* If at least one byte of the word is C we don't get 0 in %ecx. */
jnz L(3)
+#ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+#endif
movl 4(%edx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -110,6 +127,9 @@ L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(5) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu 8(%edx), %bnd0
+#endif
movl 8(%edx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -122,6 +142,9 @@ L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(6) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu 12(%edx), %bnd0
+#endif
movl 12(%edx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -155,6 +178,10 @@ L(2): subl %ecx, %edx /* reduce number of loop variants */
/* Now we have to align the source pointer. */
testl $3, %ecx /* pointer correctly aligned? */
jz L(29) /* yes => start copy loop */
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+ bndcu (%ecx, %edx), %bnd0
+#endif
movb (%ecx), %al /* get first byte */
movb %al, (%ecx,%edx) /* and store it */
andb %al, %al /* is byte NUL? */
@@ -163,6 +190,10 @@ L(2): subl %ecx, %edx /* reduce number of loop variants */
testl $3, %ecx /* pointer correctly aligned? */
jz L(29) /* yes => start copy loop */
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+ bndcu (%ecx, %edx), %bnd0
+#endif
movb (%ecx), %al /* get first byte */
movb %al, (%ecx,%edx) /* and store it */
andb %al, %al /* is byte NUL? */
@@ -171,6 +202,10 @@ L(2): subl %ecx, %edx /* reduce number of loop variants */
testl $3, %ecx /* pointer correctly aligned? */
jz L(29) /* yes => start copy loop */
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+ bndcu (%ecx, %edx), %bnd0
+#endif
movb (%ecx), %al /* get first byte */
movb %al, (%ecx,%edx) /* and store it */
andb %al, %al /* is byte NUL? */
@@ -182,10 +217,18 @@ L(2): subl %ecx, %edx /* reduce number of loop variants */
ALIGN(4)
-L(28): movl %eax, 12(%ecx,%edx)/* store word at destination */
+L(28):
+#ifdef __CHKP__
+ bndcu 12(%ecx, %edx), %bnd0
+#endif
+ movl %eax, 12(%ecx,%edx)/* store word at destination */
addl $16, %ecx /* adjust pointer for full round */
-L(29): movl (%ecx), %eax /* get word from source */
+L(29):
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+#endif
+ movl (%ecx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
carry bits reported for each byte which
@@ -196,8 +239,14 @@ L(29): movl (%ecx), %eax /* get word from source */
incl %edi /* add 1: if one carry bit was *not* set
the addition will not result in 0. */
jnz L(9) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu (%ecx, %edx), %bnd0
+#endif
movl %eax, (%ecx,%edx) /* store word to destination */
+#ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+#endif
movl 4(%ecx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -209,8 +258,14 @@ L(29): movl (%ecx), %eax /* get word from source */
incl %edi /* add 1: if one carry bit was *not* set
the addition will not result in 0. */
jnz L(91) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu 4(%ecx, %edx), %bnd0
+#endif
movl %eax, 4(%ecx,%edx) /* store word to destination */
+#ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+#endif
movl 8(%ecx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -222,8 +277,14 @@ L(29): movl (%ecx), %eax /* get word from source */
incl %edi /* add 1: if one carry bit was *not* set
the addition will not result in 0. */
jnz L(92) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu 8(%ecx, %edx), %bnd0
+#endif
movl %eax, 8(%ecx,%edx) /* store word to destination */
+#ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+#endif
movl 12(%ecx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -240,15 +301,25 @@ L(93): addl $4, %ecx /* adjust pointer */
L(92): addl $4, %ecx
L(91): addl $4, %ecx
-L(9): movb %al, (%ecx,%edx) /* store first byte of last word */
+L(9):
+#ifdef __CHKP__
+ bndcu (%ecx, %edx), %bnd0
+#endif
+ movb %al, (%ecx,%edx) /* store first byte of last word */
orb %al, %al /* is it NUL? */
jz L(8) /* yes => return */
+#ifdef __CHKP__
+ bndcu 1(%ecx, %edx), %bnd0
+#endif
movb %ah, 1(%ecx,%edx) /* store second byte of last word */
orb %ah, %ah /* is it NUL? */
jz L(8) /* yes => return */
shrl $16, %eax /* make upper bytes accessible */
+#ifdef __CHKP__
+ bndcu 2(%ecx, %edx), %bnd0
+#endif
movb %al, 2(%ecx,%edx) /* store third byte of last word */
orb %al, %al /* is it NUL? */
jz L(8) /* yes => return */
diff --git a/sysdeps/i386/i586/strchr.S b/sysdeps/i386/i586/strchr.S
index 648d528..4efa935 100644
--- a/sysdeps/i386/i586/strchr.S
+++ b/sysdeps/i386/i586/strchr.S
@@ -54,6 +54,10 @@ ENTRY (strchr)
movl STR(%esp), %eax
movl CHR(%esp), %edx
+#ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcl (%eax), %bnd0
+#endif
movl %eax, %edi /* duplicate string pointer for later */
cfi_rel_offset (edi, 12)
@@ -83,6 +87,9 @@ ENTRY (strchr)
xorb %dl, %cl /* load single byte and test for NUL */
je L(3) /* yes => return NULL */
+#ifdef __CHKP__
+ bndcu 1(%eax), %bnd0
+#endif
movb 1(%eax), %cl /* load single byte */
incl %eax
@@ -97,7 +104,11 @@ ENTRY (strchr)
jne L(11)
-L(0): movb (%eax), %cl /* load single byte */
+L(0):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
+ movb (%eax), %cl /* load single byte */
cmpb %cl, %dl /* is byte == C? */
je L(out) /* aligned => return pointer */
@@ -115,7 +126,11 @@ L(0): movb (%eax), %cl /* load single byte */
four instruction up to `L1' will not be executed in the loop
because the same code is found at the end of the loop, but
there it is executed in parallel with other instructions. */
-L(11): movl (%eax), %ecx
+L(11):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
+ movl (%eax), %ecx
movl $magic, %ebp
movl $magic, %edi
@@ -159,6 +174,9 @@ L(1): xorl %ecx, %ebp /* (word^magic) */
movl $magic, %esi /* load magic value */
xorl %edx, %ebx /* clear words which are C */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx
addl %ebx, %esi /* (word+magic) */
@@ -189,6 +207,9 @@ L(1): xorl %ecx, %ebp /* (word^magic) */
movl $magic, %esi
xorl %edx, %ebx
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx
addl %ebx, %esi
@@ -219,6 +240,9 @@ L(1): xorl %ecx, %ebp /* (word^magic) */
movl $magic, %esi
xorl %edx, %ebx
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx
addl %ebx, %esi
@@ -249,6 +273,9 @@ L(1): xorl %ecx, %ebp /* (word^magic) */
movl $magic, %esi
xorl %edx, %ebx
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx
addl %ebx, %esi
diff --git a/sysdeps/i386/i586/strcpy.S b/sysdeps/i386/i586/strcpy.S
index c940369..6392a8e 100644
--- a/sysdeps/i386/i586/strcpy.S
+++ b/sysdeps/i386/i586/strcpy.S
@@ -45,6 +45,10 @@ ENTRY (STRCPY)
cfi_rel_offset (edi, 8)
movl SRC(%esp), %esi
cfi_rel_offset (esi, 4)
+#ifdef __CHKP__
+ bndldx DEST(%esp,%edi,1), %bnd0
+ bndldx SRC(%esp,%esi,1), %bnd1
+#endif
xorl %eax, %eax
leal -1(%esi), %ecx
@@ -61,6 +65,9 @@ ENTRY (STRCPY)
/* 0xb is the distance between 2: and 1: but we avoid writing
1f-2b because the assembler generates worse code. */
leal 0xb(%edx,%ecx,8), %ecx
+# ifdef __CHKP__
+ jmp L(1)
+# endif
#else
leal 1f(,%ecx,8), %ecx
#endif
diff --git a/sysdeps/i386/i586/strlen.S b/sysdeps/i386/i586/strlen.S
index b50fffa..9034625 100644
--- a/sysdeps/i386/i586/strlen.S
+++ b/sysdeps/i386/i586/strlen.S
@@ -41,6 +41,10 @@
ENTRY (strlen)
movl STR(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcu (%eax),%bnd0
+#endif
movl $3, %edx /* load mask (= 3) */
andl %eax, %edx /* separate last two bits of address */
@@ -48,10 +52,16 @@ ENTRY (strlen)
jz L(1) /* aligned => start loop */
jp L(0) /* exactly two bits set */
+#ifdef __CHKP__
+ bndcu (%eax),%bnd0
+#endif
cmpb %dh, (%eax) /* is byte NUL? */
je L(2) /* yes => return */
incl %eax /* increment pointer */
+#ifdef __CHKP__
+ bndcu (%eax),%bnd0
+#endif
cmpb %dh, (%eax) /* is byte NUL? */
je L(2) /* yes => return */
@@ -61,7 +71,11 @@ ENTRY (strlen)
jz L(1)
-L(0): cmpb %dh, (%eax) /* is byte NUL? */
+L(0):
+#ifdef __CHKP__
+ bndcu (%eax),%bnd0
+#endif
+ cmpb %dh, (%eax) /* is byte NUL? */
je L(2) /* yes => return */
incl %eax /* increment pointer */
@@ -174,7 +188,11 @@ L(3): subl $4, %eax /* correct too early pointer increment */
incl %eax /* increment pointer */
-L(2): subl STR(%esp), %eax /* now compute the length as difference
+L(2):
+#ifdef __CHKP__
+ bndcu (%eax),%bnd0
+#endif
+ subl STR(%esp), %eax /* now compute the length as difference
between start and terminating NUL
character */
ret
diff --git a/sysdeps/i386/i686/memcmp.S b/sysdeps/i386/i686/memcmp.S
index b8091a6..6cb03e7 100644
--- a/sysdeps/i386/i686/memcmp.S
+++ b/sysdeps/i386/i686/memcmp.S
@@ -48,9 +48,19 @@ ENTRY (memcmp)
movl BLK1(%esp), %eax
movl BLK2(%esp), %edx
movl LEN(%esp), %ecx
+#ifdef __CHKP__
+ bndldx BLK1(%esp,%eax,1), %bnd0
+ bndldx BLK2(%esp,%edx,1), %bnd1
+#endif
cmpl $1, %ecx
jne L(not_1)
+#ifdef __CHKP__
+ bndcl (%eax), %bnd0
+ bndcu (%eax), %bnd0
+ bndcl (%edx), %bnd1
+ bndcu (%edx), %bnd1
+#endif
movzbl (%eax), %ecx /* LEN == 1 */
cmpb (%edx), %cl
jne L(neq)
@@ -69,6 +79,12 @@ L(neq):
cfi_rel_offset (ebx, 0)
L(not_1):
jl L(bye) /* LEN == 0 */
+#ifdef __CHKP__
+ bndcl (%eax), %bnd0
+ bndcu (%eax), %bnd0
+ bndcl (%edx), %bnd1
+ bndcu (%edx), %bnd1
+#endif
pushl %esi
cfi_adjust_cfa_offset (4)
@@ -84,36 +100,64 @@ L(not_1):
ALIGN (4)
L(28bytes):
+#ifdef __CHKP__
+ bndcu -28(%esi), %bnd0
+ bndcu -28(%edx), %bnd1
+#endif
movl -28(%esi), %eax
movl -28(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(24bytes):
+#ifdef __CHKP__
+ bndcu -24(%esi), %bnd0
+ bndcu -24(%edx), %bnd1
+#endif
movl -24(%esi), %eax
movl -24(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(20bytes):
+#ifdef __CHKP__
+ bndcu -20(%esi), %bnd0
+ bndcu -20(%edx), %bnd1
+#endif
movl -20(%esi), %eax
movl -20(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(16bytes):
+#ifdef __CHKP__
+ bndcu -16(%esi), %bnd0
+ bndcu -16(%edx), %bnd1
+#endif
movl -16(%esi), %eax
movl -16(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(12bytes):
+#ifdef __CHKP__
+ bndcu -12(%esi), %bnd0
+ bndcu -12(%edx), %bnd1
+#endif
movl -12(%esi), %eax
movl -12(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(8bytes):
+#ifdef __CHKP__
+ bndcu -8(%esi), %bnd0
+ bndcu -8(%edx), %bnd1
+#endif
movl -8(%esi), %eax
movl -8(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(4bytes):
+#ifdef __CHKP__
+ bndcu -4(%esi), %bnd0
+ bndcu -4(%edx), %bnd1
+#endif
movl -4(%esi), %eax
movl -4(%edx), %ecx
cmpl %ecx, %eax
@@ -129,41 +173,73 @@ L(0bytes):
cfi_rel_offset (esi, 0)
cfi_rel_offset (ebx, 4)
L(29bytes):
+#ifdef __CHKP__
+ bndcu -29(%esi), %bnd0
+ bndcu -29(%edx), %bnd1
+#endif
movl -29(%esi), %eax
movl -29(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(25bytes):
+#ifdef __CHKP__
+ bndcu -25(%esi), %bnd0
+ bndcu -25(%edx), %bnd1
+#endif
movl -25(%esi), %eax
movl -25(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(21bytes):
+#ifdef __CHKP__
+ bndcu -21(%esi), %bnd0
+ bndcu -21(%edx), %bnd1
+#endif
movl -21(%esi), %eax
movl -21(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(17bytes):
+#ifdef __CHKP__
+ bndcu -17(%esi), %bnd0
+ bndcu -17(%edx), %bnd1
+#endif
movl -17(%esi), %eax
movl -17(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(13bytes):
+#ifdef __CHKP__
+ bndcu -13(%esi), %bnd0
+ bndcu -13(%edx), %bnd1
+#endif
movl -13(%esi), %eax
movl -13(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(9bytes):
+#ifdef __CHKP__
+ bndcu -9(%esi), %bnd0
+ bndcu -9(%edx), %bnd1
+#endif
movl -9(%esi), %eax
movl -9(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(5bytes):
+#ifdef __CHKP__
+ bndcu -5(%esi), %bnd0
+ bndcu -5(%edx), %bnd1
+#endif
movl -5(%esi), %eax
movl -5(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(1bytes):
+#ifdef __CHKP__
+ bndcu -1(%esi), %bnd0
+ bndcu -1(%edx), %bnd1
+#endif
movzbl -1(%esi), %eax
cmpb -1(%edx), %al
jne L(set)
@@ -177,41 +253,73 @@ L(1bytes):
cfi_rel_offset (esi, 0)
cfi_rel_offset (ebx, 4)
L(30bytes):
+#ifdef __CHKP__
+ bndcu -30(%esi), %bnd0
+ bndcu -30(%edx), %bnd1
+#endif
movl -30(%esi), %eax
movl -30(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(26bytes):
+#ifdef __CHKP__
+ bndcu -26(%esi), %bnd0
+ bndcu -26(%edx), %bnd1
+#endif
movl -26(%esi), %eax
movl -26(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(22bytes):
+#ifdef __CHKP__
+ bndcu -22(%esi), %bnd0
+ bndcu -22(%edx), %bnd1
+#endif
movl -22(%esi), %eax
movl -22(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(18bytes):
+#ifdef __CHKP__
+ bndcu -18(%esi), %bnd0
+ bndcu -18(%edx), %bnd1
+#endif
movl -18(%esi), %eax
movl -18(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(14bytes):
+#ifdef __CHKP__
+ bndcu -14(%esi), %bnd0
+ bndcu -14(%edx), %bnd1
+#endif
movl -14(%esi), %eax
movl -14(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(10bytes):
+#ifdef __CHKP__
+ bndcu -10(%esi), %bnd0
+ bndcu -10(%edx), %bnd1
+#endif
movl -10(%esi), %eax
movl -10(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(6bytes):
+#ifdef __CHKP__
+ bndcu -6(%esi), %bnd0
+ bndcu -6(%edx), %bnd1
+#endif
movl -6(%esi), %eax
movl -6(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(2bytes):
+#ifdef __CHKP__
+ bndcu -2(%esi), %bnd0
+ bndcu -2(%edx), %bnd1
+#endif
movzwl -2(%esi), %eax
movzwl -2(%edx), %ecx
cmpb %cl, %al
@@ -228,41 +336,73 @@ L(2bytes):
cfi_rel_offset (esi, 0)
cfi_rel_offset (ebx, 4)
L(31bytes):
+#ifdef __CHKP__
+ bndcu -31(%esi), %bnd0
+ bndcu -31(%edx), %bnd1
+#endif
movl -31(%esi), %eax
movl -31(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(27bytes):
+#ifdef __CHKP__
+ bndcu -27(%esi), %bnd0
+ bndcu -27(%edx), %bnd1
+#endif
movl -27(%esi), %eax
movl -27(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(23bytes):
+#ifdef __CHKP__
+ bndcu -23(%esi), %bnd0
+ bndcu -23(%edx), %bnd1
+#endif
movl -23(%esi), %eax
movl -23(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(19bytes):
+#ifdef __CHKP__
+ bndcu -19(%esi), %bnd0
+ bndcu -19(%edx), %bnd1
+#endif
movl -19(%esi), %eax
movl -19(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(15bytes):
+#ifdef __CHKP__
+ bndcu -15(%esi), %bnd0
+ bndcu -15(%edx), %bnd1
+#endif
movl -15(%esi), %eax
movl -15(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(11bytes):
+#ifdef __CHKP__
+ bndcu -11(%esi), %bnd0
+ bndcu -11(%edx), %bnd1
+#endif
movl -11(%esi), %eax
movl -11(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(7bytes):
+#ifdef __CHKP__
+ bndcu -7(%esi), %bnd0
+ bndcu -7(%edx), %bnd1
+#endif
movl -7(%esi), %eax
movl -7(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(3bytes):
+#ifdef __CHKP__
+ bndcu -3(%esi), %bnd0
+ bndcu -3(%edx), %bnd1
+#endif
movzwl -3(%esi), %eax
movzwl -3(%edx), %ecx
cmpb %cl, %al
@@ -286,34 +426,66 @@ L(3bytes):
L(32bytesormore):
subl $32, %ecx
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+ bndcu (%edx), %bnd1
+#endif
movl (%esi), %eax
cmpl (%edx), %eax
jne L(load_ecx)
+#ifdef __CHKP__
+ bndcu 4(%esi), %bnd0
+ bndcu 4(%edx), %bnd1
+#endif
movl 4(%esi), %eax
cmpl 4(%edx), %eax
jne L(load_ecx_4)
+#ifdef __CHKP__
+ bndcu 8(%esi), %bnd0
+ bndcu 8(%edx), %bnd1
+#endif
movl 8(%esi), %eax
cmpl 8(%edx), %eax
jne L(load_ecx_8)
+#ifdef __CHKP__
+ bndcu 12(%esi), %bnd0
+ bndcu 12(%edx), %bnd1
+#endif
movl 12(%esi), %eax
cmpl 12(%edx), %eax
jne L(load_ecx_12)
+#ifdef __CHKP__
+ bndcu 16(%esi), %bnd0
+ bndcu 16(%edx), %bnd1
+#endif
movl 16(%esi), %eax
cmpl 16(%edx), %eax
jne L(load_ecx_16)
+#ifdef __CHKP__
+ bndcu 20(%esi), %bnd0
+ bndcu 20(%edx), %bnd1
+#endif
movl 20(%esi), %eax
cmpl 20(%edx), %eax
jne L(load_ecx_20)
+#ifdef __CHKP__
+ bndcu 24(%esi), %bnd0
+ bndcu 24(%edx), %bnd1
+#endif
movl 24(%esi), %eax
cmpl 24(%edx), %eax
jne L(load_ecx_24)
+#ifdef __CHKP__
+ bndcu 28(%esi), %bnd0
+ bndcu 28(%edx), %bnd1
+#endif
movl 28(%esi), %eax
cmpl 28(%edx), %eax
jne L(load_ecx_28)
diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S
index aed79a8..3fd4370 100644
--- a/sysdeps/i386/i686/memset.S
+++ b/sysdeps/i386/i686/memset.S
@@ -50,6 +50,11 @@ ENTRY (memset)
cfi_adjust_cfa_offset (4)
movl DEST(%esp), %edx
movl LEN(%esp), %ecx
+#ifdef __CHKP__
+ bndldx DEST(%esp,%edx,1),%bnd0
+ bndcl (%edx), %bnd0
+ bndcu -1(%edx, %ecx), %bnd0
+#endif
#if BZERO_P
xorl %eax, %eax /* fill with 0 */
#else
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index 8946bfa..7a4999a 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -6,9 +6,7 @@ endif
ifeq ($(subdir),string)
gen-as-const-headers += locale-defines.sym
-sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
- memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \
- memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \
+sysdep_routines += bzero-sse2 memset-sse2 \
memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \
strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \
memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift \
@@ -23,7 +21,8 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
strnlen-sse2 strnlen-c \
strcasecmp_l-c strcasecmp-c strcasecmp_l-ssse3 \
strncase_l-c strncase-c strncase_l-ssse3 \
- strcasecmp_l-sse4 strncase_l-sse4
+ strcasecmp_l-sse4 strncase_l-sse4 mpx_memcpy_nobnd \
+ mpx_mempcpy_nobnd mpx_memmove_nobnd
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
CFLAGS-varshift.c += -msse4
diff --git a/sysdeps/i386/i686/multiarch/Versions b/sysdeps/i386/i686/multiarch/Versions
index 59b185a..7f0cbbc 100644
--- a/sysdeps/i386/i686/multiarch/Versions
+++ b/sysdeps/i386/i686/multiarch/Versions
@@ -2,4 +2,11 @@ libc {
GLIBC_PRIVATE {
__get_cpu_features;
}
+%ifdef __CHKP__
+ GLIBC_2.14 {
+ mpx_memcpy_nobnd;
+ mpx_memmove_nobnd;
+ mpx_mempcpy_nobnd;
+ }
+%endif
}
diff --git a/sysdeps/i386/i686/multiarch/bcopy.S b/sysdeps/i386/i686/multiarch/__bcopy.S
similarity index 100%
rename from sysdeps/i386/i686/multiarch/bcopy.S
rename to sysdeps/i386/i686/multiarch/__bcopy.S
diff --git a/sysdeps/i386/i686/multiarch/memcpy.S b/sysdeps/i386/i686/multiarch/__memcpy.S
similarity index 100%
rename from sysdeps/i386/i686/multiarch/memcpy.S
rename to sysdeps/i386/i686/multiarch/__memcpy.S
diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.S b/sysdeps/i386/i686/multiarch/__memcpy_chk.S
similarity index 100%
rename from sysdeps/i386/i686/multiarch/memcpy_chk.S
rename to sysdeps/i386/i686/multiarch/__memcpy_chk.S
diff --git a/sysdeps/i386/i686/multiarch/memmove.S b/sysdeps/i386/i686/multiarch/__memmove.S
similarity index 100%
rename from sysdeps/i386/i686/multiarch/memmove.S
rename to sysdeps/i386/i686/multiarch/__memmove.S
diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.S b/sysdeps/i386/i686/multiarch/__memmove_chk.S
similarity index 100%
rename from sysdeps/i386/i686/multiarch/memmove_chk.S
rename to sysdeps/i386/i686/multiarch/__memmove_chk.S
diff --git a/sysdeps/i386/i686/multiarch/mempcpy.S b/sysdeps/i386/i686/multiarch/__mempcpy.S
similarity index 100%
rename from sysdeps/i386/i686/multiarch/mempcpy.S
rename to sysdeps/i386/i686/multiarch/__mempcpy.S
diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.S b/sysdeps/i386/i686/multiarch/__mempcpy_chk.S
similarity index 100%
rename from sysdeps/i386/i686/multiarch/mempcpy_chk.S
rename to sysdeps/i386/i686/multiarch/__mempcpy_chk.S
diff --git a/sysdeps/i386/i686/multiarch/bcopy.c b/sysdeps/i386/i686/multiarch/bcopy.c
new file mode 100644
index 0000000..6f5efba
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/bcopy.c
@@ -0,0 +1,7 @@
+#include <stddef.h>
+
+void
+bcopy (const void *src, void *dst, size_t n)
+{
+ memmove (dst, src, n);
+}
diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
index 2c282bd..63f0704 100644
--- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
+++ b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
@@ -37,11 +37,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
size_t i = 0;
/* Support sysdeps/i386/i686/multiarch/bcopy.S. */
- IFUNC_IMPL (i, name, bcopy,
- IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3,
- __bcopy_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3, __bcopy_ssse3)
- IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32))
+// IFUNC_IMPL (i, name, bcopy,
+// IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3,
+// __bcopy_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3, __bcopy_ssse3)
+// IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32))
/* Support sysdeps/i386/i686/multiarch/bzero.S. */
IFUNC_IMPL (i, name, bzero,
@@ -64,21 +64,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_ia32))
/* Support sysdeps/i386/i686/multiarch/memmove_chk.S. */
- IFUNC_IMPL (i, name, __memmove_chk,
- IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
- __memmove_chk_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
- __memmove_chk_ssse3)
- IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
- __memmove_chk_ia32))
+// IFUNC_IMPL (i, name, __memmove_chk,
+// IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
+// __memmove_chk_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
+// __memmove_chk_ssse3)
+// IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
+// __memmove_chk_ia32))
/* Support sysdeps/i386/i686/multiarch/memmove.S. */
- IFUNC_IMPL (i, name, memmove,
- IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
- __memmove_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
- __memmove_ssse3)
- IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32))
+// IFUNC_IMPL (i, name, memmove,
+// IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
+// __memmove_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
+// __memmove_ssse3)
+// IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32))
/* Support sysdeps/i386/i686/multiarch/memrchr.S. */
IFUNC_IMPL (i, name, memrchr,
@@ -274,37 +274,37 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
#ifdef SHARED
/* Support sysdeps/i386/i686/multiarch/memcpy_chk.S. */
- IFUNC_IMPL (i, name, __memcpy_chk,
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
- __memcpy_chk_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
- __memcpy_chk_ssse3)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
- __memcpy_chk_ia32))
+// IFUNC_IMPL (i, name, __memcpy_chk,
+// IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
+// __memcpy_chk_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
+// __memcpy_chk_ssse3)
+// IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
+// __memcpy_chk_ia32))
/* Support sysdeps/i386/i686/multiarch/memcpy.S. */
- IFUNC_IMPL (i, name, memcpy,
- IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3,
- __memcpy_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3)
- IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32))
+// IFUNC_IMPL (i, name, memcpy,
+// IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3,
+// __memcpy_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3)
+// IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32))
/* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S. */
- IFUNC_IMPL (i, name, __mempcpy_chk,
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
- __mempcpy_chk_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
- __mempcpy_chk_ssse3)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
- __mempcpy_chk_ia32))
+// IFUNC_IMPL (i, name, __mempcpy_chk,
+// IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
+// __mempcpy_chk_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
+// __mempcpy_chk_ssse3)
+// IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
+// __mempcpy_chk_ia32))
/* Support sysdeps/i386/i686/multiarch/mempcpy.S. */
- IFUNC_IMPL (i, name, mempcpy,
- IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
- __mempcpy_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
- __mempcpy_ssse3)
- IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32))
+// IFUNC_IMPL (i, name, mempcpy,
+// IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
+// __mempcpy_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
+// __mempcpy_ssse3)
+// IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32))
/* Support sysdeps/i386/i686/multiarch/strlen.S. */
IFUNC_IMPL (i, name, strlen,
diff --git a/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S
index d364177..80be0d9 100644
--- a/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S
+++ b/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S
@@ -58,6 +58,12 @@ ENTRY (MEMCHR)
# endif
mov %ecx, %eax
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx), %bnd0
+ bndcu (%ecx), %bnd0
+# endif
+
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
@@ -79,9 +85,18 @@ ENTRY (MEMCHR)
# ifndef USE_AS_RAWMEMCHR
sub %ecx, %edx
jbe L(return_null_1)
-# endif
add %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
+ ret
+# else
+ add %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
+# endif
.p2align 4
L(unaligned_no_match_1):
@@ -163,8 +178,15 @@ L(loop_prolog):
# ifndef USE_AS_RAWMEMCHR
sub $64, %edx
jbe L(exit_loop)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
# else
+
+# ifdef __CHKP__
+ bndcu (%edx), %bnd0
+# endif
movdqa (%edx), %xmm0
# endif
pcmpeqb %xmm1, %xmm0
@@ -173,8 +195,15 @@ L(loop_prolog):
jnz L(matches)
# ifndef USE_AS_RAWMEMCHR
+# ifdef __CHKP__
+ bndcu 16(%edi), %bnd0
+# endif
movdqa 16(%edi), %xmm2
# else
+
+# ifdef __CHKP__
+ bndcu 16(%edx), %bnd0
+# endif
movdqa 16(%edx), %xmm2
# endif
pcmpeqb %xmm1, %xmm2
@@ -183,8 +212,15 @@ L(loop_prolog):
jnz L(matches16)
# ifndef USE_AS_RAWMEMCHR
+# ifdef __CHKP__
+ bndcu 32(%edi), %bnd0
+# endif
movdqa 32(%edi), %xmm3
# else
+
+# ifdef __CHKP__
+ bndcu 32(%edx), %bnd0
+# endif
movdqa 32(%edx), %xmm3
# endif
pcmpeqb %xmm1, %xmm3
@@ -193,8 +229,15 @@ L(loop_prolog):
jnz L(matches32)
# ifndef USE_AS_RAWMEMCHR
+# ifdef __CHKP__
+ bndcu 48(%edi), %bnd0
+# endif
movdqa 48(%edi), %xmm4
# else
+
+# ifdef __CHKP__
+ bndcu 48(%edx), %bnd0
+# endif
movdqa 48(%edx), %xmm4
# endif
pcmpeqb %xmm1, %xmm4
@@ -277,11 +320,18 @@ L(align64_loop):
# ifndef USE_AS_RAWMEMCHR
sub $64, %edx
jbe L(exit_loop)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
movdqa 16(%edi), %xmm2
movdqa 32(%edi), %xmm3
movdqa 48(%edi), %xmm4
# else
+
+# ifdef __CHKP__
+ bndcu (%edx), %bnd0
+# endif
movdqa (%edx), %xmm0
movdqa 16(%edx), %xmm2
movdqa 32(%edx), %xmm3
@@ -342,9 +392,15 @@ L(align64_loop):
# ifndef USE_AS_RAWMEMCHR
lea 48(%edi, %eax), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
lea 48(%edx, %eax), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
@@ -404,9 +460,15 @@ L(matches0):
bsf %eax, %eax
# ifndef USE_AS_RAWMEMCHR
lea -16(%eax, %edi), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
lea -16(%eax, %edx), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
@@ -415,9 +477,15 @@ L(matches):
bsf %eax, %eax
# ifndef USE_AS_RAWMEMCHR
add %edi, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
add %edx, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
@@ -426,9 +494,15 @@ L(matches16):
bsf %eax, %eax
# ifndef USE_AS_RAWMEMCHR
lea 16(%eax, %edi), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
lea 16(%eax, %edx), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
@@ -437,9 +511,15 @@ L(matches32):
bsf %eax, %eax
# ifndef USE_AS_RAWMEMCHR
lea 32(%eax, %edi), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
lea 32(%eax, %edx), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
diff --git a/sysdeps/i386/i686/multiarch/memcmp-sse4.S b/sysdeps/i386/i686/multiarch/memcmp-sse4.S
index 2984a37..3ccfe66 100644
--- a/sysdeps/i386/i686/multiarch/memcmp-sse4.S
+++ b/sysdeps/i386/i686/multiarch/memcmp-sse4.S
@@ -91,6 +91,15 @@ ENTRY (MEMCMP)
jbe L(less1bytes)
# endif
+# ifdef __CHKP__
+ bndldx BLK1(%esp,%eax,1), %bnd0
+ bndldx BLK2(%esp,%edx,1), %bnd1
+ bndcl (%eax), %bnd0
+ bndcl (%edx), %bnd1
+ bndcu (%eax), %bnd0
+ bndcu (%edx), %bnd1
+# endif
+
pxor %xmm0, %xmm0
cmp $64, %ecx
ja L(64bytesormore)
@@ -115,6 +124,10 @@ L(less8bytes):
cmpb (%edx), %bl
jne L(nonzero)
+# ifdef __CHKP__
+ bndcu 1(%eax), %bnd0
+ bndcu 1(%edx), %bnd1
+# endif
mov 1(%eax), %bl
cmpb 1(%edx), %bl
jne L(nonzero)
@@ -122,6 +135,10 @@ L(less8bytes):
cmp $2, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 2(%eax), %bnd0
+ bndcu 2(%edx), %bnd1
+# endif
mov 2(%eax), %bl
cmpb 2(%edx), %bl
jne L(nonzero)
@@ -129,6 +146,10 @@ L(less8bytes):
cmp $3, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 3(%eax), %bnd0
+ bndcu 3(%edx), %bnd1
+# endif
mov 3(%eax), %bl
cmpb 3(%edx), %bl
jne L(nonzero)
@@ -136,6 +157,10 @@ L(less8bytes):
cmp $4, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 4(%eax), %bnd0
+ bndcu 4(%edx), %bnd1
+# endif
mov 4(%eax), %bl
cmpb 4(%edx), %bl
jne L(nonzero)
@@ -143,6 +168,10 @@ L(less8bytes):
cmp $5, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 5(%eax), %bnd0
+ bndcu 5(%edx), %bnd1
+# endif
mov 5(%eax), %bl
cmpb 5(%edx), %bl
jne L(nonzero)
@@ -150,6 +179,10 @@ L(less8bytes):
cmp $6, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 6(%eax), %bnd0
+ bndcu 6(%edx), %bnd1
+# endif
mov 6(%eax), %bl
cmpb 6(%edx), %bl
je L(0bytes)
@@ -198,6 +231,14 @@ L(return0):
.p2align 4
L(less1bytes):
jb L(0bytesend)
+# ifdef __CHKP__
+ bndldx BLK1(%esp,%eax,1), %bnd0
+ bndldx BLK2(%esp,%edx,1), %bnd1
+ bndcl (%eax), %bnd0
+ bndcl (%edx), %bnd1
+ bndcu (%eax), %bnd0
+ bndcu (%edx), %bnd1
+# endif
movzbl (%eax), %eax
movzbl (%edx), %edx
sub %edx, %eax
@@ -221,18 +262,30 @@ L(64bytesormore_loop):
ptest %xmm2, %xmm0
jnc L(find_16diff)
+# ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+ bndcu 16(%edx), %bnd1
+# endif
movdqu 16(%eax), %xmm1
movdqu 16(%edx), %xmm2
pxor %xmm1, %xmm2
ptest %xmm2, %xmm0
jnc L(find_32diff)
+# ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+ bndcu 32(%edx), %bnd1
+# endif
movdqu 32(%eax), %xmm1
movdqu 32(%edx), %xmm2
pxor %xmm1, %xmm2
ptest %xmm2, %xmm0
jnc L(find_48diff)
+# ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+ bndcu 48(%edx), %bnd1
+# endif
movdqu 48(%eax), %xmm1
movdqu 48(%edx), %xmm2
pxor %xmm1, %xmm2
diff --git a/sysdeps/i386/i686/multiarch/memcpy.c b/sysdeps/i386/i686/multiarch/memcpy.c
new file mode 100644
index 0000000..824cdcb
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memcpy.c
@@ -0,0 +1,40 @@
+#include <stddef.h>
+
+void *
+__memcpy (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ return ret;
+}
+
+weak_alias (__memcpy, __GI_memcpy)
+weak_alias (__memcpy, memcpy)
diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.c b/sysdeps/i386/i686/multiarch/memcpy_chk.c
new file mode 100644
index 0000000..1eee86c
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memcpy_chk.c
@@ -0,0 +1 @@
+#include <debug/memcpy_chk.c>
diff --git a/sysdeps/i386/i686/multiarch/memmove.c b/sysdeps/i386/i686/multiarch/memmove.c
new file mode 100644
index 0000000..9e5ad6d
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memmove.c
@@ -0,0 +1,76 @@
+#include <stddef.h>
+
+void *
+__memmove (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ if (s < d)
+ {
+ // backward copying
+ d += n;
+ s += n;
+ while (n--)
+ *--d = *--s;
+ }
+ else
+ // forward copying
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (s < d)
+ {
+ offset_src = (offset_src + (size_t)src) & (sizeof(size_t) - 1);
+ // backward copying
+ d += n;
+ s += n;
+ while (n-- && offset_src--)
+ *--d = *--s;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *--d1 = *--s1;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *--d = *--s;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ // forward copying
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ }
+ return ret;
+}
+
+weak_alias (__memmove, __GI_memmove)
+weak_alias (__memmove, memmove)
diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.c b/sysdeps/i386/i686/multiarch/memmove_chk.c
new file mode 100644
index 0000000..bbf53d0
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memmove_chk.c
@@ -0,0 +1 @@
+#include <debug/memmove_chk.c>
diff --git a/sysdeps/i386/i686/multiarch/mempcpy.c b/sysdeps/i386/i686/multiarch/mempcpy.c
new file mode 100644
index 0000000..6cbdad1
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mempcpy.c
@@ -0,0 +1,40 @@
+#include <stddef.h>
+
+void *
+mempcpy (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst + n;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ return ret;
+}
+
+weak_alias (mempcpy, __GI_mempcpy)
+weak_alias (mempcpy, __mempcpy)
diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.c b/sysdeps/i386/i686/multiarch/mempcpy_chk.c
new file mode 100644
index 0000000..ba17078
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mempcpy_chk.c
@@ -0,0 +1 @@
+#include <debug/mempcpy_chk.c>
diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S
index c5c3e97..75c947c 100644
--- a/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S
+++ b/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S
@@ -45,6 +45,12 @@ ENTRY (MEMCHR)
movd STR2(%esp), %xmm1
mov LEN(%esp), %edx
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx), %bnd0
+ bndcu -1(%ecx, %edx), %bnd0
+# endif
+
sub $16, %edx
jbe L(length_less16)
diff --git a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
index bcea296..ce112b1 100644
--- a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
+++ b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
@@ -90,6 +90,7 @@ ENTRY (__memset_sse2_rep)
ENTRANCE
movl LEN(%esp), %ecx
+
#ifdef USE_AS_BZERO
xor %eax, %eax
#else
@@ -101,6 +102,11 @@ ENTRY (__memset_sse2_rep)
or %edx, %eax
#endif
movl DEST(%esp), %edx
+#ifdef __CHKP__
+ bndldx DEST(%esp,%edx,1),%bnd0
+ bndcl (%edx), %bnd0
+ bndcu -1(%edx, %ecx), %bnd0
+#endif
cmp $32, %ecx
jae L(32bytesormore)
diff --git a/sysdeps/i386/i686/multiarch/mpx_memcpy_nobnd.S b/sysdeps/i386/i686/multiarch/mpx_memcpy_nobnd.S
new file mode 100644
index 0000000..b7f4e0e
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mpx_memcpy_nobnd.S
@@ -0,0 +1,1803 @@
+/* memcpy with SSSE3 and REP string.
+ Copyright (C) 2010-2013 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#include "asm-syntax.h"
+
+#ifndef MEMCPY
+# define MEMCPY mpx_memcpy_nobnd
+#endif
+
+#ifdef USE_AS_BCOPY
+# define SRC PARMS
+# define DEST SRC+4
+# define LEN DEST+4
+#else
+# define DEST PARMS
+# define SRC DEST+4
+# define LEN SRC+4
+#endif
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#ifdef SHARED
+# define PARMS 8 /* Preserve EBX. */
+# define ENTRANCE PUSH (%ebx);
+# define RETURN_END POP (%ebx); ret
+# define RETURN RETURN_END; CFI_PUSH (%ebx)
+# define JMPTBL(I, B) I - B
+
+/* Load an entry in a jump table into EBX and branch to it. TABLE is a
+ jump table with relative offsets. INDEX is a register contains the
+ index into the jump table. SCALE is the scale of INDEX. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ /* We first load PC into EBX. */ \
+ SETUP_PIC_REG(bx); \
+ /* Get the address of the jump table. */ \
+ addl $(TABLE - .), %ebx; \
+ /* Get the entry and convert the relative offset to the \
+ absolute address. */ \
+ addl (%ebx,INDEX,SCALE), %ebx; \
+ /* We loaded the jump table. Go. */ \
+ jmp *%ebx
+
+# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) \
+ addl $(TABLE - .), %ebx
+
+# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \
+ addl (%ebx,INDEX,SCALE), %ebx; \
+ /* We loaded the jump table. Go. */ \
+ jmp *%ebx
+#else
+# define PARMS 4
+# define ENTRANCE
+# define RETURN_END ret
+# define RETURN RETURN_END
+# define JMPTBL(I, B) I
+
+/* Branch to an entry in a jump table. TABLE is a jump table with
+ absolute offsets. INDEX is a register contains the index into the
+ jump table. SCALE is the scale of INDEX. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ jmp *TABLE(,INDEX,SCALE)
+
+# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE)
+
+# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \
+ jmp *TABLE(,INDEX,SCALE)
+#endif
+
+ .section .text.ssse3,"ax",@progbits
+ENTRY (MEMCPY)
+ ENTRANCE
+ movl LEN(%esp), %ecx
+ movl SRC(%esp), %eax
+ movl DEST(%esp), %edx
+
+#ifdef __CHKP__
+ bndldx SRC(%esp,%eax,1), %bnd1
+ bndldx DEST(%esp,%edx,1), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu -1(%eax, %ecx), %bnd1
+ bndcl (%edx), %bnd0
+ bndcu -1(%edx, %ecx), %bnd0
+#endif
+
+#ifdef USE_AS_MEMMOVE
+ cmp %eax, %edx
+ jb L(copy_forward)
+ je L(fwd_write_0bytes)
+ cmp $48, %ecx
+ jb L(bk_write_less48bytes)
+ add %ecx, %eax
+ cmp %eax, %edx
+ movl SRC(%esp), %eax
+ jb L(copy_backward)
+
+L(copy_forward):
+#endif
+ cmp $48, %ecx
+ jae L(48bytesormore)
+
+L(fwd_write_less32bytes):
+#ifndef USE_AS_MEMMOVE
+ cmp %dl, %al
+ jb L(bk_write)
+#endif
+ add %ecx, %edx
+ add %ecx, %eax
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+#ifndef USE_AS_MEMMOVE
+L(bk_write):
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
+#endif
+
+ ALIGN (4)
+/* ECX > 32 and EDX is 4 byte aligned. */
+L(48bytesormore):
+ movdqu (%eax), %xmm0
+ PUSH (%edi)
+ movl %edx, %edi
+ and $-16, %edx
+ PUSH (%esi)
+ cfi_remember_state
+ add $16, %edx
+ movl %edi, %esi
+ sub %edx, %edi
+ add %edi, %ecx
+ sub %edi, %eax
+
+#ifdef SHARED_CACHE_SIZE_HALF
+ cmp $SHARED_CACHE_SIZE_HALF, %ecx
+#else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_shared_cache_size_half, %ecx
+# endif
+#endif
+
+ mov %eax, %edi
+ jae L(large_page)
+ and $0xf, %edi
+ jz L(shl_0)
+
+ BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
+
+ ALIGN (4)
+L(shl_0):
+ movdqu %xmm0, (%esi)
+ xor %edi, %edi
+ cmp $127, %ecx
+ ja L(shl_0_gobble)
+ lea -32(%ecx), %ecx
+L(shl_0_loop):
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+L(shl_0_end):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ add %edi, %eax
+ POP (%esi)
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+L(shl_0_gobble):
+
+#ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+#else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ mov __x86_data_cache_size_half@GOTOFF(%ebx), %edi
+# else
+ mov __x86_data_cache_size_half, %edi
+# endif
+#endif
+ mov %edi, %esi
+ shr $3, %esi
+ sub %esi, %edi
+ cmp %edi, %ecx
+ jae L(shl_0_gobble_mem_start)
+ sub $128, %ecx
+ ALIGN (4)
+L(shl_0_gobble_cache_loop):
+ movdqa (%eax), %xmm0
+ movaps 0x10(%eax), %xmm1
+ movaps 0x20(%eax), %xmm2
+ movaps 0x30(%eax), %xmm3
+ movaps 0x40(%eax), %xmm4
+ movaps 0x50(%eax), %xmm5
+ movaps 0x60(%eax), %xmm6
+ movaps 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+ sub $128, %ecx
+ movdqa %xmm0, (%edx)
+ movaps %xmm1, 0x10(%edx)
+ movaps %xmm2, 0x20(%edx)
+ movaps %xmm3, 0x30(%edx)
+ movaps %xmm4, 0x40(%edx)
+ movaps %xmm5, 0x50(%edx)
+ movaps %xmm6, 0x60(%edx)
+ movaps %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+
+ jae L(shl_0_gobble_cache_loop)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(shl_0_cache_less_64bytes)
+
+ movdqa (%eax), %xmm0
+ sub $0x40, %ecx
+ movdqa 0x10(%eax), %xmm1
+
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+
+ movdqa 0x20(%eax), %xmm0
+ movdqa 0x30(%eax), %xmm1
+ add $0x40, %eax
+
+ movdqa %xmm0, 0x20(%edx)
+ movdqa %xmm1, 0x30(%edx)
+ add $0x40, %edx
+L(shl_0_cache_less_64bytes):
+ cmp $0x20, %ecx
+ jb L(shl_0_cache_less_32bytes)
+ movdqa (%eax), %xmm0
+ sub $0x20, %ecx
+ movdqa 0x10(%eax), %xmm1
+ add $0x20, %eax
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+ add $0x20, %edx
+L(shl_0_cache_less_32bytes):
+ cmp $0x10, %ecx
+ jb L(shl_0_cache_less_16bytes)
+ sub $0x10, %ecx
+ movdqa (%eax), %xmm0
+ add $0x10, %eax
+ movdqa %xmm0, (%edx)
+ add $0x10, %edx
+L(shl_0_cache_less_16bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ POP (%esi)
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_0_gobble_mem_start):
+ cmp %al, %dl
+ je L(copy_page_by_rep)
+ sub $128, %ecx
+L(shl_0_gobble_mem_loop):
+ prefetchnta 0x1c0(%eax)
+ prefetchnta 0x280(%eax)
+ prefetchnta 0x1c0(%edx)
+ prefetchnta 0x280(%edx)
+
+ movdqa (%eax), %xmm0
+ movaps 0x10(%eax), %xmm1
+ movaps 0x20(%eax), %xmm2
+ movaps 0x30(%eax), %xmm3
+ movaps 0x40(%eax), %xmm4
+ movaps 0x50(%eax), %xmm5
+ movaps 0x60(%eax), %xmm6
+ movaps 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+ sub $0x80, %ecx
+ movdqa %xmm0, (%edx)
+ movaps %xmm1, 0x10(%edx)
+ movaps %xmm2, 0x20(%edx)
+ movaps %xmm3, 0x30(%edx)
+ movaps %xmm4, 0x40(%edx)
+ movaps %xmm5, 0x50(%edx)
+ movaps %xmm6, 0x60(%edx)
+ movaps %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+
+ jae L(shl_0_gobble_mem_loop)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(shl_0_mem_less_64bytes)
+
+ movdqa (%eax), %xmm0
+ sub $0x40, %ecx
+ movdqa 0x10(%eax), %xmm1
+
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+
+ movdqa 0x20(%eax), %xmm0
+ movdqa 0x30(%eax), %xmm1
+ add $0x40, %eax
+
+ movdqa %xmm0, 0x20(%edx)
+ movdqa %xmm1, 0x30(%edx)
+ add $0x40, %edx
+L(shl_0_mem_less_64bytes):
+ cmp $0x20, %ecx
+ jb L(shl_0_mem_less_32bytes)
+ movdqa (%eax), %xmm0
+ sub $0x20, %ecx
+ movdqa 0x10(%eax), %xmm1
+ add $0x20, %eax
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+ add $0x20, %edx
+L(shl_0_mem_less_32bytes):
+ cmp $0x10, %ecx
+ jb L(shl_0_mem_less_16bytes)
+ sub $0x10, %ecx
+ movdqa (%eax), %xmm0
+ add $0x10, %eax
+ movdqa %xmm0, (%edx)
+ add $0x10, %edx
+L(shl_0_mem_less_16bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ POP (%esi)
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_1):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $1, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_1_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $1, %xmm2, %xmm3
+ palignr $1, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_1_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $1, %xmm2, %xmm3
+ palignr $1, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_1_loop)
+
+L(shl_1_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 1(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_2):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $2, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_2_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $2, %xmm2, %xmm3
+ palignr $2, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_2_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $2, %xmm2, %xmm3
+ palignr $2, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_2_loop)
+
+L(shl_2_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 2(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_3):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $3, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_3_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $3, %xmm2, %xmm3
+ palignr $3, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_3_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $3, %xmm2, %xmm3
+ palignr $3, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_3_loop)
+
+L(shl_3_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 3(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_4):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $4, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_4_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $4, %xmm2, %xmm3
+ palignr $4, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_4_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $4, %xmm2, %xmm3
+ palignr $4, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_4_loop)
+
+L(shl_4_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 4(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_5):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $5, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_5_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $5, %xmm2, %xmm3
+ palignr $5, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_5_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $5, %xmm2, %xmm3
+ palignr $5, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_5_loop)
+
+L(shl_5_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 5(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_6):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $6, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_6_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $6, %xmm2, %xmm3
+ palignr $6, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_6_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $6, %xmm2, %xmm3
+ palignr $6, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_6_loop)
+
+L(shl_6_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 6(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_7):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $7, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_7_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $7, %xmm2, %xmm3
+ palignr $7, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_7_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $7, %xmm2, %xmm3
+ palignr $7, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_7_loop)
+
+L(shl_7_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 7(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_8):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $8, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_8_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $8, %xmm2, %xmm3
+ palignr $8, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_8_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $8, %xmm2, %xmm3
+ palignr $8, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_8_loop)
+
+L(shl_8_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 8(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_9):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $9, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_9_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $9, %xmm2, %xmm3
+ palignr $9, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_9_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $9, %xmm2, %xmm3
+ palignr $9, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_9_loop)
+
+L(shl_9_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 9(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_10):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $10, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_10_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $10, %xmm2, %xmm3
+ palignr $10, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_10_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $10, %xmm2, %xmm3
+ palignr $10, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_10_loop)
+
+L(shl_10_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 10(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_11):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $11, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_11_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $11, %xmm2, %xmm3
+ palignr $11, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_11_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $11, %xmm2, %xmm3
+ palignr $11, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_11_loop)
+
+L(shl_11_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 11(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_12):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $12, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_12_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $12, %xmm2, %xmm3
+ palignr $12, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_12_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $12, %xmm2, %xmm3
+ palignr $12, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_12_loop)
+
+L(shl_12_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 12(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_13):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $13, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_13_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $13, %xmm2, %xmm3
+ palignr $13, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_13_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $13, %xmm2, %xmm3
+ palignr $13, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_13_loop)
+
+L(shl_13_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 13(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_14):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $14, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_14_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $14, %xmm2, %xmm3
+ palignr $14, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_14_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $14, %xmm2, %xmm3
+ palignr $14, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_14_loop)
+
+L(shl_14_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 14(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_15):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $15, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_15_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $15, %xmm2, %xmm3
+ palignr $15, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_15_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $15, %xmm2, %xmm3
+ palignr $15, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_15_loop)
+
+L(shl_15_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 15(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+
+ ALIGN (4)
+L(fwd_write_44bytes):
+ movl -44(%eax), %ecx
+ movl %ecx, -44(%edx)
+L(fwd_write_40bytes):
+ movl -40(%eax), %ecx
+ movl %ecx, -40(%edx)
+L(fwd_write_36bytes):
+ movl -36(%eax), %ecx
+ movl %ecx, -36(%edx)
+L(fwd_write_32bytes):
+ movl -32(%eax), %ecx
+ movl %ecx, -32(%edx)
+L(fwd_write_28bytes):
+ movl -28(%eax), %ecx
+ movl %ecx, -28(%edx)
+L(fwd_write_24bytes):
+ movl -24(%eax), %ecx
+ movl %ecx, -24(%edx)
+L(fwd_write_20bytes):
+ movl -20(%eax), %ecx
+ movl %ecx, -20(%edx)
+L(fwd_write_16bytes):
+ movl -16(%eax), %ecx
+ movl %ecx, -16(%edx)
+L(fwd_write_12bytes):
+ movl -12(%eax), %ecx
+ movl %ecx, -12(%edx)
+L(fwd_write_8bytes):
+ movl -8(%eax), %ecx
+ movl %ecx, -8(%edx)
+L(fwd_write_4bytes):
+ movl -4(%eax), %ecx
+ movl %ecx, -4(%edx)
+L(fwd_write_0bytes):
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_5bytes):
+ movl -5(%eax), %ecx
+ movl -4(%eax), %eax
+ movl %ecx, -5(%edx)
+ movl %eax, -4(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_45bytes):
+ movl -45(%eax), %ecx
+ movl %ecx, -45(%edx)
+L(fwd_write_41bytes):
+ movl -41(%eax), %ecx
+ movl %ecx, -41(%edx)
+L(fwd_write_37bytes):
+ movl -37(%eax), %ecx
+ movl %ecx, -37(%edx)
+L(fwd_write_33bytes):
+ movl -33(%eax), %ecx
+ movl %ecx, -33(%edx)
+L(fwd_write_29bytes):
+ movl -29(%eax), %ecx
+ movl %ecx, -29(%edx)
+L(fwd_write_25bytes):
+ movl -25(%eax), %ecx
+ movl %ecx, -25(%edx)
+L(fwd_write_21bytes):
+ movl -21(%eax), %ecx
+ movl %ecx, -21(%edx)
+L(fwd_write_17bytes):
+ movl -17(%eax), %ecx
+ movl %ecx, -17(%edx)
+L(fwd_write_13bytes):
+ movl -13(%eax), %ecx
+ movl %ecx, -13(%edx)
+L(fwd_write_9bytes):
+ movl -9(%eax), %ecx
+ movl %ecx, -9(%edx)
+ movl -5(%eax), %ecx
+ movl %ecx, -5(%edx)
+L(fwd_write_1bytes):
+ movzbl -1(%eax), %ecx
+ movb %cl, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_46bytes):
+ movl -46(%eax), %ecx
+ movl %ecx, -46(%edx)
+L(fwd_write_42bytes):
+ movl -42(%eax), %ecx
+ movl %ecx, -42(%edx)
+L(fwd_write_38bytes):
+ movl -38(%eax), %ecx
+ movl %ecx, -38(%edx)
+L(fwd_write_34bytes):
+ movl -34(%eax), %ecx
+ movl %ecx, -34(%edx)
+L(fwd_write_30bytes):
+ movl -30(%eax), %ecx
+ movl %ecx, -30(%edx)
+L(fwd_write_26bytes):
+ movl -26(%eax), %ecx
+ movl %ecx, -26(%edx)
+L(fwd_write_22bytes):
+ movl -22(%eax), %ecx
+ movl %ecx, -22(%edx)
+L(fwd_write_18bytes):
+ movl -18(%eax), %ecx
+ movl %ecx, -18(%edx)
+L(fwd_write_14bytes):
+ movl -14(%eax), %ecx
+ movl %ecx, -14(%edx)
+L(fwd_write_10bytes):
+ movl -10(%eax), %ecx
+ movl %ecx, -10(%edx)
+L(fwd_write_6bytes):
+ movl -6(%eax), %ecx
+ movl %ecx, -6(%edx)
+L(fwd_write_2bytes):
+ movzwl -2(%eax), %ecx
+ movw %cx, -2(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_47bytes):
+ movl -47(%eax), %ecx
+ movl %ecx, -47(%edx)
+L(fwd_write_43bytes):
+ movl -43(%eax), %ecx
+ movl %ecx, -43(%edx)
+L(fwd_write_39bytes):
+ movl -39(%eax), %ecx
+ movl %ecx, -39(%edx)
+L(fwd_write_35bytes):
+ movl -35(%eax), %ecx
+ movl %ecx, -35(%edx)
+L(fwd_write_31bytes):
+ movl -31(%eax), %ecx
+ movl %ecx, -31(%edx)
+L(fwd_write_27bytes):
+ movl -27(%eax), %ecx
+ movl %ecx, -27(%edx)
+L(fwd_write_23bytes):
+ movl -23(%eax), %ecx
+ movl %ecx, -23(%edx)
+L(fwd_write_19bytes):
+ movl -19(%eax), %ecx
+ movl %ecx, -19(%edx)
+L(fwd_write_15bytes):
+ movl -15(%eax), %ecx
+ movl %ecx, -15(%edx)
+L(fwd_write_11bytes):
+ movl -11(%eax), %ecx
+ movl %ecx, -11(%edx)
+L(fwd_write_7bytes):
+ movl -7(%eax), %ecx
+ movl %ecx, -7(%edx)
+L(fwd_write_3bytes):
+ movzwl -3(%eax), %ecx
+ movzbl -1(%eax), %eax
+ movw %cx, -3(%edx)
+ movb %al, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN_END
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(large_page):
+ movdqu (%eax), %xmm1
+ movdqu %xmm0, (%esi)
+ movntdq %xmm1, (%edx)
+ add $0x10, %eax
+ add $0x10, %edx
+ sub $0x10, %ecx
+ cmp %al, %dl
+ je L(copy_page_by_rep)
+L(large_page_loop_init):
+ POP (%esi)
+ sub $0x80, %ecx
+ POP (%edi)
+L(large_page_loop):
+ prefetchnta 0x1c0(%eax)
+ prefetchnta 0x280(%eax)
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ movdqu 0x20(%eax), %xmm2
+ movdqu 0x30(%eax), %xmm3
+ movdqu 0x40(%eax), %xmm4
+ movdqu 0x50(%eax), %xmm5
+ movdqu 0x60(%eax), %xmm6
+ movdqu 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+ lfence
+ sub $0x80, %ecx
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ movntdq %xmm2, 0x20(%edx)
+ movntdq %xmm3, 0x30(%edx)
+ movntdq %xmm4, 0x40(%edx)
+ movntdq %xmm5, 0x50(%edx)
+ movntdq %xmm6, 0x60(%edx)
+ movntdq %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+ jae L(large_page_loop)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(large_page_less_64bytes)
+
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ movdqu 0x20(%eax), %xmm2
+ movdqu 0x30(%eax), %xmm3
+ lea 0x40(%eax), %eax
+
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ movntdq %xmm2, 0x20(%edx)
+ movntdq %xmm3, 0x30(%edx)
+ lea 0x40(%edx), %edx
+ sub $0x40, %ecx
+L(large_page_less_64bytes):
+ cmp $32, %ecx
+ jb L(large_page_less_32bytes)
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ lea 0x20(%eax), %eax
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ lea 0x20(%edx), %edx
+ sub $0x20, %ecx
+L(large_page_less_32bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ sfence
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(copy_page_by_rep):
+ mov %eax, %esi
+ mov %edx, %edi
+ mov %ecx, %edx
+ shr $2, %ecx
+ and $3, %edx
+ rep movsl
+ jz L(copy_page_by_rep_exit)
+ cmp $2, %edx
+ jb L(copy_page_by_rep_left_1)
+ movzwl (%esi), %eax
+ movw %ax, (%edi)
+ add $2, %esi
+ add $2, %edi
+ sub $2, %edx
+ jz L(copy_page_by_rep_exit)
+L(copy_page_by_rep_left_1):
+ movzbl (%esi), %eax
+ movb %al, (%edi)
+L(copy_page_by_rep_exit):
+ POP (%esi)
+ POP (%edi)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_44bytes):
+ movl 40(%eax), %ecx
+ movl %ecx, 40(%edx)
+L(bk_write_40bytes):
+ movl 36(%eax), %ecx
+ movl %ecx, 36(%edx)
+L(bk_write_36bytes):
+ movl 32(%eax), %ecx
+ movl %ecx, 32(%edx)
+L(bk_write_32bytes):
+ movl 28(%eax), %ecx
+ movl %ecx, 28(%edx)
+L(bk_write_28bytes):
+ movl 24(%eax), %ecx
+ movl %ecx, 24(%edx)
+L(bk_write_24bytes):
+ movl 20(%eax), %ecx
+ movl %ecx, 20(%edx)
+L(bk_write_20bytes):
+ movl 16(%eax), %ecx
+ movl %ecx, 16(%edx)
+L(bk_write_16bytes):
+ movl 12(%eax), %ecx
+ movl %ecx, 12(%edx)
+L(bk_write_12bytes):
+ movl 8(%eax), %ecx
+ movl %ecx, 8(%edx)
+L(bk_write_8bytes):
+ movl 4(%eax), %ecx
+ movl %ecx, 4(%edx)
+L(bk_write_4bytes):
+ movl (%eax), %ecx
+ movl %ecx, (%edx)
+L(bk_write_0bytes):
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_45bytes):
+ movl 41(%eax), %ecx
+ movl %ecx, 41(%edx)
+L(bk_write_41bytes):
+ movl 37(%eax), %ecx
+ movl %ecx, 37(%edx)
+L(bk_write_37bytes):
+ movl 33(%eax), %ecx
+ movl %ecx, 33(%edx)
+L(bk_write_33bytes):
+ movl 29(%eax), %ecx
+ movl %ecx, 29(%edx)
+L(bk_write_29bytes):
+ movl 25(%eax), %ecx
+ movl %ecx, 25(%edx)
+L(bk_write_25bytes):
+ movl 21(%eax), %ecx
+ movl %ecx, 21(%edx)
+L(bk_write_21bytes):
+ movl 17(%eax), %ecx
+ movl %ecx, 17(%edx)
+L(bk_write_17bytes):
+ movl 13(%eax), %ecx
+ movl %ecx, 13(%edx)
+L(bk_write_13bytes):
+ movl 9(%eax), %ecx
+ movl %ecx, 9(%edx)
+L(bk_write_9bytes):
+ movl 5(%eax), %ecx
+ movl %ecx, 5(%edx)
+L(bk_write_5bytes):
+ movl 1(%eax), %ecx
+ movl %ecx, 1(%edx)
+L(bk_write_1bytes):
+ movzbl (%eax), %ecx
+ movb %cl, (%edx)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_46bytes):
+ movl 42(%eax), %ecx
+ movl %ecx, 42(%edx)
+L(bk_write_42bytes):
+ movl 38(%eax), %ecx
+ movl %ecx, 38(%edx)
+L(bk_write_38bytes):
+ movl 34(%eax), %ecx
+ movl %ecx, 34(%edx)
+L(bk_write_34bytes):
+ movl 30(%eax), %ecx
+ movl %ecx, 30(%edx)
+L(bk_write_30bytes):
+ movl 26(%eax), %ecx
+ movl %ecx, 26(%edx)
+L(bk_write_26bytes):
+ movl 22(%eax), %ecx
+ movl %ecx, 22(%edx)
+L(bk_write_22bytes):
+ movl 18(%eax), %ecx
+ movl %ecx, 18(%edx)
+L(bk_write_18bytes):
+ movl 14(%eax), %ecx
+ movl %ecx, 14(%edx)
+L(bk_write_14bytes):
+ movl 10(%eax), %ecx
+ movl %ecx, 10(%edx)
+L(bk_write_10bytes):
+ movl 6(%eax), %ecx
+ movl %ecx, 6(%edx)
+L(bk_write_6bytes):
+ movl 2(%eax), %ecx
+ movl %ecx, 2(%edx)
+L(bk_write_2bytes):
+ movzwl (%eax), %ecx
+ movw %cx, (%edx)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_47bytes):
+ movl 43(%eax), %ecx
+ movl %ecx, 43(%edx)
+L(bk_write_43bytes):
+ movl 39(%eax), %ecx
+ movl %ecx, 39(%edx)
+L(bk_write_39bytes):
+ movl 35(%eax), %ecx
+ movl %ecx, 35(%edx)
+L(bk_write_35bytes):
+ movl 31(%eax), %ecx
+ movl %ecx, 31(%edx)
+L(bk_write_31bytes):
+ movl 27(%eax), %ecx
+ movl %ecx, 27(%edx)
+L(bk_write_27bytes):
+ movl 23(%eax), %ecx
+ movl %ecx, 23(%edx)
+L(bk_write_23bytes):
+ movl 19(%eax), %ecx
+ movl %ecx, 19(%edx)
+L(bk_write_19bytes):
+ movl 15(%eax), %ecx
+ movl %ecx, 15(%edx)
+L(bk_write_15bytes):
+ movl 11(%eax), %ecx
+ movl %ecx, 11(%edx)
+L(bk_write_11bytes):
+ movl 7(%eax), %ecx
+ movl %ecx, 7(%edx)
+L(bk_write_7bytes):
+ movl 3(%eax), %ecx
+ movl %ecx, 3(%edx)
+L(bk_write_3bytes):
+ movzwl 1(%eax), %ecx
+ movw %cx, 1(%edx)
+ movzbl (%eax), %eax
+ movb %al, (%edx)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN_END
+
+
+ .pushsection .rodata.ssse3,"a",@progbits
+ ALIGN (2)
+L(table_48bytes_fwd):
+ .int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd))
+
+ ALIGN (2)
+L(shl_table):
+ .int JMPTBL (L(shl_0), L(shl_table))
+ .int JMPTBL (L(shl_1), L(shl_table))
+ .int JMPTBL (L(shl_2), L(shl_table))
+ .int JMPTBL (L(shl_3), L(shl_table))
+ .int JMPTBL (L(shl_4), L(shl_table))
+ .int JMPTBL (L(shl_5), L(shl_table))
+ .int JMPTBL (L(shl_6), L(shl_table))
+ .int JMPTBL (L(shl_7), L(shl_table))
+ .int JMPTBL (L(shl_8), L(shl_table))
+ .int JMPTBL (L(shl_9), L(shl_table))
+ .int JMPTBL (L(shl_10), L(shl_table))
+ .int JMPTBL (L(shl_11), L(shl_table))
+ .int JMPTBL (L(shl_12), L(shl_table))
+ .int JMPTBL (L(shl_13), L(shl_table))
+ .int JMPTBL (L(shl_14), L(shl_table))
+ .int JMPTBL (L(shl_15), L(shl_table))
+
+ ALIGN (2)
+L(table_48_bytes_bwd):
+ .int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd))
+
+ .popsection
+
+#ifdef USE_AS_MEMMOVE
+ ALIGN (4)
+L(copy_backward):
+ PUSH (%esi)
+ movl %eax, %esi
+ add %ecx, %edx
+ add %ecx, %esi
+ testl $0x3, %edx
+ jnz L(bk_align)
+
+L(bk_aligned_4):
+ cmp $64, %ecx
+ jae L(bk_write_more64bytes)
+
+L(bk_write_64bytesless):
+ cmp $32, %ecx
+ jb L(bk_write_less32bytes)
+
+L(bk_write_more32bytes):
+ /* Copy 32 bytes at a time. */
+ sub $32, %ecx
+ movl -4(%esi), %eax
+ movl %eax, -4(%edx)
+ movl -8(%esi), %eax
+ movl %eax, -8(%edx)
+ movl -12(%esi), %eax
+ movl %eax, -12(%edx)
+ movl -16(%esi), %eax
+ movl %eax, -16(%edx)
+ movl -20(%esi), %eax
+ movl %eax, -20(%edx)
+ movl -24(%esi), %eax
+ movl %eax, -24(%edx)
+ movl -28(%esi), %eax
+ movl %eax, -28(%edx)
+ movl -32(%esi), %eax
+ movl %eax, -32(%edx)
+ sub $32, %edx
+ sub $32, %esi
+
+L(bk_write_less32bytes):
+ movl %esi, %eax
+ sub %ecx, %edx
+ sub %ecx, %eax
+ POP (%esi)
+L(bk_write_less48bytes):
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
+
+ CFI_PUSH (%esi)
+ ALIGN (4)
+L(bk_align):
+ cmp $8, %ecx
+ jbe L(bk_write_less32bytes)
+ testl $1, %edx
+ /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
+ then (EDX & 2) must be != 0. */
+ jz L(bk_got2)
+ sub $1, %esi
+ sub $1, %ecx
+ sub $1, %edx
+ movzbl (%esi), %eax
+ movb %al, (%edx)
+
+ testl $2, %edx
+ jz L(bk_aligned_4)
+
+L(bk_got2):
+ sub $2, %esi
+ sub $2, %ecx
+ sub $2, %edx
+ movzwl (%esi), %eax
+ movw %ax, (%edx)
+ jmp L(bk_aligned_4)
+
+ ALIGN (4)
+L(bk_write_more64bytes):
+ /* Check alignment of last byte. */
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+/* EDX is aligned 4 bytes, but not 16 bytes. */
+L(bk_ssse3_align):
+ sub $4, %esi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%esi), %eax
+ movl %eax, (%edx)
+
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+ sub $4, %esi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%esi), %eax
+ movl %eax, (%edx)
+
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+ sub $4, %esi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%esi), %eax
+ movl %eax, (%edx)
+
+L(bk_ssse3_cpy_pre):
+ cmp $64, %ecx
+ jb L(bk_write_more32bytes)
+
+L(bk_ssse3_cpy):
+ sub $64, %esi
+ sub $64, %ecx
+ sub $64, %edx
+ movdqu 0x30(%esi), %xmm3
+ movdqa %xmm3, 0x30(%edx)
+ movdqu 0x20(%esi), %xmm2
+ movdqa %xmm2, 0x20(%edx)
+ movdqu 0x10(%esi), %xmm1
+ movdqa %xmm1, 0x10(%edx)
+ movdqu (%esi), %xmm0
+ movdqa %xmm0, (%edx)
+ cmp $64, %ecx
+ jae L(bk_ssse3_cpy)
+ jmp L(bk_write_64bytesless)
+
+#endif
+
+END (MEMCPY)
diff --git a/sysdeps/i386/i686/multiarch/mpx_memmove_nobnd.S b/sysdeps/i386/i686/multiarch/mpx_memmove_nobnd.S
new file mode 100644
index 0000000..caaa89a
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mpx_memmove_nobnd.S
@@ -0,0 +1,3 @@
+#define USE_AS_MEMMOVE
+#define MEMCPY mpx_memmove_nobnd
+#include "mpx_memcpy_nobnd.S"
diff --git a/sysdeps/i386/i686/multiarch/mpx_mempcpy_nobnd.S b/sysdeps/i386/i686/multiarch/mpx_mempcpy_nobnd.S
new file mode 100644
index 0000000..4b0af49
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mpx_mempcpy_nobnd.S
@@ -0,0 +1,3 @@
+#define USE_AS_MEMPCPY
+#define MEMCPY mpx_mempcpy_nobnd
+#include "mpx_memcpy_nobnd.S"
diff --git a/sysdeps/i386/i686/multiarch/strcat-sse2.S b/sysdeps/i386/i686/multiarch/strcat-sse2.S
index 62d60cd..b1d39ae 100644
--- a/sysdeps/i386/i686/multiarch/strcat-sse2.S
+++ b/sysdeps/i386/i686/multiarch/strcat-sse2.S
@@ -95,10 +95,20 @@ ENTRY (STRCAT)
test %ebx, %ebx
jz L(ExitZero)
# endif
+# ifdef __CHKP__
+ bndldx STR1(%esp,%eax,1), %bnd0
+ bndldx STR2(%esp,%esi,1), %bnd1
+ bndcl (%esi), %bnd1
+ bndcu (%esi), %bnd1
+# endif
cmpb $0, (%esi)
mov %esi, %ecx
mov %eax, %edx
jz L(ExitZero)
+# ifdef __CHKP__
+ bndcl (%eax), %bnd0
+ bndcu (%eax), %bnd0
+# endif
and $63, %ecx
and $63, %edx
@@ -113,6 +123,9 @@ ENTRY (STRCAT)
movdqu (%eax), %xmm1
movdqu (%esi), %xmm5
pcmpeqb %xmm1, %xmm0
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
movdqu 16(%esi), %xmm6
pmovmskb %xmm0, %ecx
pcmpeqb %xmm5, %xmm4
@@ -132,6 +145,9 @@ L(alignment_prolog):
and $-16, %eax
pcmpeqb (%eax), %xmm0
movdqu (%esi), %xmm5
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
movdqu 16(%esi), %xmm6
pmovmskb %xmm0, %edx
pcmpeqb %xmm5, %xmm4
@@ -148,21 +164,33 @@ L(loop_prolog):
pxor %xmm3, %xmm3
.p2align 4
L(align16_loop):
+# ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+# endif
pcmpeqb 16(%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+# endif
pcmpeqb 32(%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+# endif
pcmpeqb 48(%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%eax), %bnd0
+# endif
pcmpeqb 64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 64(%eax), %eax
@@ -212,6 +240,9 @@ L(StartStrcpyPart):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail1)
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm5, (%eax)
pmovmskb %xmm7, %edx
# ifdef USE_AS_STRNCAT
@@ -250,21 +281,33 @@ L(StrlenCore7_1):
.p2align 4
L(align16_loop_1):
+# ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+# endif
pcmpeqb 16(%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16_1)
+# ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+# endif
pcmpeqb 32(%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32_1)
+# ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+# endif
pcmpeqb 48(%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48_1)
+# ifdef __CHKP__
+ bndcu 64(%eax), %bnd0
+# endif
pcmpeqb 64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 64(%eax), %eax
@@ -323,6 +366,9 @@ L(StartStrcpyPart_1):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
pmovmskb %xmm0, %edx
# ifdef USE_AS_STRNCAT
@@ -341,6 +387,9 @@ L(Unalign16Both):
mov $16, %ecx
movdqa (%esi, %ecx), %xmm1
movaps 16(%esi, %ecx), %xmm2
+# ifdef __CHKP__
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movdqu %xmm1, (%eax, %ecx)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %edx
@@ -352,6 +401,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
L(Unalign16BothBigN):
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%eax, %ecx)
pcmpeqb %xmm3, %xmm0
@@ -364,6 +417,10 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm4
movdqu %xmm3, (%eax, %ecx)
pcmpeqb %xmm4, %xmm0
@@ -376,6 +433,10 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm1
movdqu %xmm4, (%eax, %ecx)
pcmpeqb %xmm1, %xmm0
@@ -388,6 +449,10 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm2
movdqu %xmm1, (%eax, %ecx)
pcmpeqb %xmm2, %xmm0
@@ -400,6 +465,10 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%eax, %ecx)
pcmpeqb %xmm3, %xmm0
@@ -412,6 +481,9 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movdqu %xmm3, (%eax, %ecx)
mov %esi, %edx
lea 16(%esi, %ecx), %esi
@@ -421,6 +493,9 @@ L(Unalign16BothBigN):
# ifdef USE_AS_STRNCAT
lea 128(%ebx, %edx), %ebx
# endif
+# ifdef __CHKP__
+ bndcu (%esi), %bnd1
+# endif
movaps (%esi), %xmm2
movaps %xmm2, %xmm4
movaps 16(%esi), %xmm5
@@ -443,6 +518,10 @@ L(Unalign16BothBigN):
L(Unaligned64Loop_start):
add $64, %eax
add $64, %esi
+# ifdef __CHKP__
+ bndcu (%esi), %bnd1
+ bndcu -1(%eax), %bnd0
+# endif
movdqu %xmm4, -64(%eax)
movaps (%esi), %xmm2
movdqa %xmm2, %xmm4
@@ -485,11 +564,18 @@ L(Unaligned64Leave):
jnz L(CopyFrom1To16BytesUnaligned_32)
bsf %ecx, %edx
+# ifdef __CHKP__
+ bndcu 47(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
movdqu %xmm5, 16(%eax)
movdqu %xmm6, 32(%eax)
add $48, %esi
add $48, %eax
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
# ifdef USE_AS_STRNCAT
@@ -501,12 +587,18 @@ L(BigN):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(CopyFrom1To32Bytes)
movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm1, (%eax)
sub %ecx, %eax
sub $48, %ebx
@@ -515,6 +607,9 @@ L(BigN):
mov $16, %ecx
movdqa (%esi, %ecx), %xmm1
movaps 16(%esi, %ecx), %xmm2
+# ifdef __CHKP__
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movdqu %xmm1, (%eax, %ecx)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %edx
@@ -532,12 +627,20 @@ L(CopyFrom1To16Bytes):
add %ecx, %eax
add %ecx, %esi
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesTail):
add %ecx, %esi
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
@@ -546,6 +649,10 @@ L(CopyFrom1To32Bytes1):
add $16, %eax
L(CopyFrom1To16BytesTail1):
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
@@ -554,34 +661,60 @@ L(CopyFrom1To32Bytes):
add %ecx, %esi
add $16, %edx
sub %ecx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesUnaligned_0):
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesUnaligned_16):
bsf %ecx, %edx
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
add $16, %esi
add $16, %eax
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesUnaligned_32):
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu 31(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
movdqu %xmm5, 16(%eax)
add $32, %esi
add $32, %eax
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
# ifdef USE_AS_STRNCAT
.p2align 4
L(CopyFrom1To16BytesExit):
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
/* Case2 */
@@ -594,6 +727,10 @@ L(CopyFrom1To16BytesCase2):
bsf %edx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
@@ -605,6 +742,10 @@ L(CopyFrom1To32BytesCase2):
sub %ecx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
L(CopyFrom1To16BytesTailCase2):
@@ -613,12 +754,20 @@ L(CopyFrom1To16BytesTailCase2):
bsf %edx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
L(CopyFrom1To16BytesTail1Case2):
bsf %edx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
/* Case2 or Case3, Case3 */
@@ -631,6 +780,10 @@ L(CopyFrom1To16BytesCase3):
add $16, %ebx
add %ecx, %eax
add %ecx, %esi
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
@@ -639,6 +792,10 @@ L(CopyFrom1To32BytesCase2OrCase3):
jnz L(CopyFrom1To32BytesCase2)
sub %ecx, %ebx
add %ecx, %esi
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
@@ -647,6 +804,10 @@ L(CopyFrom1To16BytesTailCase2OrCase3):
jnz L(CopyFrom1To16BytesTailCase2)
sub %ecx, %ebx
add %ecx, %esi
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
@@ -657,6 +818,10 @@ L(CopyFrom1To32Bytes1Case2OrCase3):
L(CopyFrom1To16BytesTail1Case2OrCase3):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail1Case2)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
# endif
@@ -1110,15 +1275,27 @@ L(Unaligned64LeaveCase3):
and $-16, %ecx
add $48, %ebx
jl L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
sub $16, %ebx
jb L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 31(%eax), %bnd0
+# endif
movdqu %xmm5, 16(%eax)
sub $16, %ebx
jb L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 47(%eax), %bnd0
+# endif
movdqu %xmm6, 32(%eax)
sub $16, %ebx
jb L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 63(%eax), %bnd0
+# endif
movdqu %xmm7, 48(%eax)
xor %bh, %bh
movb %bh, 64(%eax)
@@ -1137,6 +1314,9 @@ L(Unaligned64LeaveCase2):
pcmpeqb %xmm5, %xmm0
pmovmskb %xmm0, %edx
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
add $16, %ecx
sub $16, %ebx
@@ -1146,6 +1326,9 @@ L(Unaligned64LeaveCase2):
pcmpeqb %xmm6, %xmm0
pmovmskb %xmm0, %edx
+# ifdef __CHKP__
+ bndcu 31(%eax), %bnd0
+# endif
movdqu %xmm5, 16(%eax)
add $16, %ecx
sub $16, %ebx
@@ -1155,6 +1338,9 @@ L(Unaligned64LeaveCase2):
pcmpeqb %xmm7, %xmm0
pmovmskb %xmm0, %edx
+# ifdef __CHKP__
+ bndcu 47(%eax), %bnd0
+# endif
movdqu %xmm6, 32(%eax)
lea 16(%eax, %ecx), %eax
lea 16(%esi, %ecx), %esi
diff --git a/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
index 938d74d..1e59581 100644
--- a/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
+++ b/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
@@ -46,6 +46,12 @@ ENTRY (__strchr_sse2_bsf)
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx), %bnd0
+ bndcu (%ecx), %bnd0
+# endif
+
pxor %xmm2, %xmm2
mov %ecx, %edi
punpcklbw %xmm1, %xmm1
@@ -81,6 +87,9 @@ ENTRY (__strchr_sse2_bsf)
L(unaligned_match):
add %edi, %eax
add %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
.p2align 4
@@ -94,6 +103,9 @@ L(unaligned_no_match):
.p2align 4
/* Loop start on aligned string. */
L(loop):
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -103,6 +115,9 @@ L(loop):
or %eax, %edx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -112,6 +127,9 @@ L(loop):
or %eax, %edx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -121,6 +139,9 @@ L(loop):
or %eax, %edx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -146,6 +167,9 @@ L(matches):
L(match):
sub $16, %edi
add %edi, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
/* Return NULL. */
diff --git a/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/sysdeps/i386/i686/multiarch/strcmp-sse4.S
index 355ed4e..1958b36 100644
--- a/sysdeps/i386/i686/multiarch/strcmp-sse4.S
+++ b/sysdeps/i386/i686/multiarch/strcmp-sse4.S
@@ -222,6 +222,12 @@ L(ascii):
test REM, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndldx STR1(%esp,%edx,1), %bnd0
+ bndldx STR2(%esp,%eax,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%eax), %bnd1
+#endif
mov %dx, %cx
and $0xfff, %cx
cmp $0xff0, %cx
@@ -280,6 +286,10 @@ L(ascii):
add $16, %edx
add $16, %eax
L(first4bytes):
+#ifdef __CHKP__
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
movzbl (%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl (%edx), %edi
@@ -303,6 +313,10 @@ L(first4bytes):
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 1(%edx), %bnd0
+ bndcu 1(%eax), %bnd1
+#endif
movzbl 1(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 1(%edx), %edi
@@ -325,6 +339,10 @@ L(first4bytes):
cmp $2, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 2(%edx), %bnd0
+ bndcu 2(%eax), %bnd1
+#endif
movzbl 2(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 2(%edx), %edi
@@ -347,6 +365,10 @@ L(first4bytes):
cmp $3, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+ bndcu 3(%eax), %bnd1
+#endif
movzbl 3(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 3(%edx), %edi
@@ -369,6 +391,10 @@ L(first4bytes):
cmp $4, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+ bndcu 4(%eax), %bnd1
+#endif
movzbl 4(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 4(%edx), %edi
@@ -391,6 +417,10 @@ L(first4bytes):
cmp $5, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 5(%edx), %bnd0
+ bndcu 5(%eax), %bnd1
+#endif
movzbl 5(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 5(%edx), %edi
@@ -413,6 +443,10 @@ L(first4bytes):
cmp $6, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 6(%edx), %bnd0
+ bndcu 6(%eax), %bnd1
+#endif
movzbl 6(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 6(%edx), %edi
@@ -435,6 +469,10 @@ L(first4bytes):
cmp $7, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+ bndcu 7(%eax), %bnd1
+#endif
movzbl 7(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 7(%edx), %edi
@@ -483,6 +521,10 @@ L(check_offset):
testl %edx, %edx
jg L(crosspage)
L(loop):
+#ifdef __CHKP__
+ bndcu (%edi,%edx), %bnd0
+ bndcu (%esi,%edx), %bnd1
+#endif
movdqu (%esi,%edx), %xmm2
movdqu (%edi,%edx), %xmm1
TOLOWER (%xmm2, %xmm1)
@@ -497,6 +539,10 @@ L(loop):
add $16, %edx
jle L(loop)
L(crosspage):
+#ifdef __CHKP__
+ bndcu (%edi,%edx), %bnd0
+ bndcu (%esi,%edx), %bnd1
+#endif
movzbl (%edi,%edx), %eax
movzbl (%esi,%edx), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
@@ -625,6 +671,10 @@ L(less16bytes):
add $8, %eax
L(less4bytes):
+#ifdef __CHKP__
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
movzbl (%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl (%edx), %edi
@@ -647,6 +697,10 @@ L(less4bytes):
cmp $1, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 1(%edx), %bnd0
+ bndcu 1(%eax), %bnd1
+#endif
movzbl 1(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 1(%edx), %edi
@@ -670,6 +724,10 @@ L(less4bytes):
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 2(%edx), %bnd0
+ bndcu 2(%eax), %bnd1
+#endif
movzbl 2(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 2(%edx), %edi
@@ -692,6 +750,10 @@ L(less4bytes):
cmp $3, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+ bndcu 3(%eax), %bnd1
+#endif
movzbl 3(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 3(%edx), %edi
@@ -715,6 +777,10 @@ L(more4bytes):
cmp $4, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+ bndcu 4(%eax), %bnd1
+#endif
movzbl 4(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 4(%edx), %edi
@@ -738,6 +804,10 @@ L(more4bytes):
cmp $5, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 5(%edx), %bnd0
+ bndcu 5(%eax), %bnd1
+#endif
movzbl 5(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 5(%edx), %edi
@@ -760,6 +830,10 @@ L(more4bytes):
cmp $6, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 6(%edx), %bnd0
+ bndcu 6(%eax), %bnd1
+#endif
movzbl 6(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 6(%edx), %edi
@@ -782,6 +856,10 @@ L(more4bytes):
cmp $7, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+ bndcu 7(%eax), %bnd1
+#endif
movzbl 7(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 7(%edx), %edi
diff --git a/sysdeps/i386/i686/multiarch/strcpy-sse2.S b/sysdeps/i386/i686/multiarch/strcpy-sse2.S
index d942ac2..4fdf7e0 100644
--- a/sysdeps/i386/i686/multiarch/strcpy-sse2.S
+++ b/sysdeps/i386/i686/multiarch/strcpy-sse2.S
@@ -85,6 +85,14 @@ ENTRY (STRCPY)
movl LEN(%esp), %ebx
test %ebx, %ebx
jz L(ExitZero)
+# ifdef __CHKP__
+ bndldx STR1(%esp,%edi,1), %bnd0
+ bndldx STR2(%esp,%esi,1), %bnd1
+ bndcl (%esi), %bnd1
+ bndcu (%esi), %bnd1
+ bndcl (%edi), %bnd0
+ bndcu -1(%edi, %ebx), %bnd0
+# endif
mov %esi, %ecx
# ifndef USE_AS_STPCPY
@@ -111,6 +119,9 @@ ENTRY (STRCPY)
test %edx, %edx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
pmovmskb %xmm0, %edx
# ifdef USE_AS_STPCPY
@@ -124,6 +135,9 @@ ENTRY (STRCPY)
jnz L(CopyFrom1To32Bytes)
movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
+# ifdef __CHKP__
+ bndcu 15(%edi), %bnd0
+# endif
movdqu %xmm1, (%edi)
sub %ecx, %edi
@@ -132,6 +146,10 @@ ENTRY (STRCPY)
.p2align 4
L(Unalign16Both):
mov $16, %ecx
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movdqa (%esi, %ecx), %xmm1
movaps 16(%esi, %ecx), %xmm2
movdqu %xmm1, (%edi, %ecx)
@@ -143,6 +161,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm2)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%edi, %ecx)
pcmpeqb %xmm3, %xmm0
@@ -153,6 +175,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm3)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm4
movdqu %xmm3, (%edi, %ecx)
pcmpeqb %xmm4, %xmm0
@@ -163,6 +189,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm4)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm1
movdqu %xmm4, (%edi, %ecx)
pcmpeqb %xmm1, %xmm0
@@ -173,6 +203,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm1)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm2
movdqu %xmm1, (%edi, %ecx)
pcmpeqb %xmm2, %xmm0
@@ -183,6 +217,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm2)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%edi, %ecx)
pcmpeqb %xmm3, %xmm0
@@ -193,6 +231,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm3)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movdqu %xmm3, (%edi, %ecx)
mov %esi, %edx
lea 16(%esi, %ecx), %esi
@@ -202,6 +244,9 @@ L(Unalign16Both):
lea 128(%ebx, %edx), %ebx
L(Unaligned64Loop):
+# ifdef __CHKP__
+ bndcu 48(%esi), %bnd1
+# endif
movaps (%esi), %xmm2
movaps %xmm2, %xmm4
movaps 16(%esi), %xmm5
@@ -220,6 +265,10 @@ L(Unaligned64Loop):
L(Unaligned64Loop_start):
add $64, %edi
add $64, %esi
+# ifdef __CHKP__
+ bndcu (%esi), %bnd1
+ bndcu (%edi), %bnd0
+# endif
movdqu %xmm4, -64(%edi)
movaps (%esi), %xmm2
movdqa %xmm2, %xmm4
@@ -259,15 +308,27 @@ L(Unaligned64Leave):
jnz L(CopyFrom1To16BytesUnaligned_32)
bsf %ecx, %edx
+# ifdef __CHKP__
+ bndcu 47(%edi), %bnd0
+# endif
movdqu %xmm4, (%edi)
movdqu %xmm5, 16(%edi)
movdqu %xmm6, 32(%edi)
# ifdef USE_AS_STPCPY
+# ifdef __CHKP__
+ bndcu 48(%edi, %edx), %bnd0
+# endif
lea 48(%edi, %edx), %eax
# endif
+# ifdef __CHKP__
+ bndcu 63(%edi), %bnd0
+# endif
movdqu %xmm7, 48(%edi)
add $15, %ebx
sub %edx, %ebx
+# ifdef __CHKP__
+ bndcu 49(%edi, %edx), %bnd0
+# endif
lea 49(%edi, %edx), %edi
jmp L(StrncpyFillTailWithZero)
@@ -288,6 +349,10 @@ L(SourceStringAlignmentZero):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail1)
+# ifdef __CHKP__
+ bndcu 15(%edi), %bnd0
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
movdqu %xmm1, (%edi)
pmovmskb %xmm0, %edx
@@ -313,7 +378,7 @@ L(CopyFrom1To16BytesTail):
bsf %edx, %edx
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
- .p2align 4
+ .p2align 9
L(CopyFrom1To32Bytes1):
add $16, %esi
add $16, %edi
@@ -337,6 +402,9 @@ L(CopyFrom1To16BytesUnaligned_0):
# ifdef USE_AS_STPCPY
lea (%edi, %edx), %eax
# endif
+# ifdef __CHKP__
+ bndcu 15(%edi), %bnd0
+# endif
movdqu %xmm4, (%edi)
add $63, %ebx
sub %edx, %ebx
@@ -350,6 +418,9 @@ L(CopyFrom1To16BytesUnaligned_16):
# ifdef USE_AS_STPCPY
lea 16(%edi, %edx), %eax
# endif
+# ifdef __CHKP__
+ bndcu 31(%edi), %bnd0
+# endif
movdqu %xmm5, 16(%edi)
add $47, %ebx
sub %edx, %ebx
@@ -364,6 +435,9 @@ L(CopyFrom1To16BytesUnaligned_32):
# ifdef USE_AS_STPCPY
lea 32(%edi, %edx), %eax
# endif
+# ifdef __CHKP__
+ bndcu 47(%edi), %bnd0
+# endif
movdqu %xmm6, 32(%edi)
add $31, %ebx
sub %edx, %ebx
@@ -495,6 +569,9 @@ L(Exit1):
.p2align 4
L(Exit2):
movw (%esi), %dx
+# ifdef __CHKP__
+ bndcu 1(%edi), %bnd0
+# endif
movw %dx, (%edi)
# ifdef USE_AS_STPCPY
lea 1(%edi), %eax
@@ -507,6 +584,9 @@ L(Exit2):
.p2align 4
L(Exit3):
movw (%esi), %cx
+# ifdef __CHKP__
+ bndcu 2(%edi), %bnd0
+# endif
movw %cx, (%edi)
movb %dh, 2(%edi)
# ifdef USE_AS_STPCPY
@@ -520,6 +600,9 @@ L(Exit3):
.p2align 4
L(Exit4):
movl (%esi), %edx
+# ifdef __CHKP__
+ bndcu 3(%edi), %bnd0
+# endif
movl %edx, (%edi)
# ifdef USE_AS_STPCPY
lea 3(%edi), %eax
@@ -532,6 +615,9 @@ L(Exit4):
.p2align 4
L(Exit5):
movl (%esi), %ecx
+# ifdef __CHKP__
+ bndcu 4(%edi), %bnd0
+# endif
movb %dh, 4(%edi)
movl %ecx, (%edi)
# ifdef USE_AS_STPCPY
@@ -546,6 +632,9 @@ L(Exit5):
L(Exit6):
movl (%esi), %ecx
movw 4(%esi), %dx
+# ifdef __CHKP__
+ bndcu 5(%edi), %bnd0
+# endif
movl %ecx, (%edi)
movw %dx, 4(%edi)
# ifdef USE_AS_STPCPY
@@ -560,6 +649,9 @@ L(Exit6):
L(Exit7):
movl (%esi), %ecx
movl 3(%esi), %edx
+# ifdef __CHKP__
+ bndcu 6(%edi), %bnd0
+# endif
movl %ecx, (%edi)
movl %edx, 3(%edi)
# ifdef USE_AS_STPCPY
@@ -573,6 +665,9 @@ L(Exit7):
.p2align 4
L(Exit8):
movlpd (%esi), %xmm0
+# ifdef __CHKP__
+ bndcu 7(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
# ifdef USE_AS_STPCPY
lea 7(%edi), %eax
@@ -585,6 +680,9 @@ L(Exit8):
.p2align 4
L(Exit9):
movlpd (%esi), %xmm0
+# ifdef __CHKP__
+ bndcu 8(%edi), %bnd0
+# endif
movb %dh, 8(%edi)
movlpd %xmm0, (%edi)
# ifdef USE_AS_STPCPY
@@ -599,6 +697,9 @@ L(Exit9):
L(Exit10):
movlpd (%esi), %xmm0
movw 8(%esi), %dx
+# ifdef __CHKP__
+ bndcu 9(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movw %dx, 8(%edi)
# ifdef USE_AS_STPCPY
@@ -613,6 +714,9 @@ L(Exit10):
L(Exit11):
movlpd (%esi), %xmm0
movl 7(%esi), %edx
+# ifdef __CHKP__
+ bndcu 10(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movl %edx, 7(%edi)
# ifdef USE_AS_STPCPY
@@ -627,6 +731,9 @@ L(Exit11):
L(Exit12):
movlpd (%esi), %xmm0
movl 8(%esi), %edx
+# ifdef __CHKP__
+ bndcu 11(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movl %edx, 8(%edi)
# ifdef USE_AS_STPCPY
@@ -641,6 +748,9 @@ L(Exit12):
L(Exit13):
movlpd (%esi), %xmm0
movlpd 5(%esi), %xmm1
+# ifdef __CHKP__
+ bndcu 12(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movlpd %xmm1, 5(%edi)
# ifdef USE_AS_STPCPY
@@ -655,6 +765,9 @@ L(Exit13):
L(Exit14):
movlpd (%esi), %xmm0
movlpd 6(%esi), %xmm1
+# ifdef __CHKP__
+ bndcu 13(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movlpd %xmm1, 6(%edi)
# ifdef USE_AS_STPCPY
@@ -669,6 +782,9 @@ L(Exit14):
L(Exit15):
movlpd (%esi), %xmm0
movlpd 7(%esi), %xmm1
+# ifdef __CHKP__
+ bndcu 14(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movlpd %xmm1, 7(%edi)
# ifdef USE_AS_STPCPY
@@ -682,6 +798,9 @@ L(Exit15):
.p2align 4
L(Exit16):
movdqu (%esi), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
# ifdef USE_AS_STPCPY
lea 15(%edi), %eax
@@ -694,6 +813,9 @@ L(Exit16):
.p2align 4
L(Exit17):
movdqu (%esi), %xmm0
+# ifdef __CHKP__
+ bndcu 16(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movb %dh, 16(%edi)
# ifdef USE_AS_STPCPY
@@ -708,6 +830,9 @@ L(Exit17):
L(Exit18):
movdqu (%esi), %xmm0
movw 16(%esi), %cx
+# ifdef __CHKP__
+ bndcu 17(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movw %cx, 16(%edi)
# ifdef USE_AS_STPCPY
@@ -722,6 +847,9 @@ L(Exit18):
L(Exit19):
movdqu (%esi), %xmm0
movl 15(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 18(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movl %ecx, 15(%edi)
# ifdef USE_AS_STPCPY
@@ -736,6 +864,9 @@ L(Exit19):
L(Exit20):
movdqu (%esi), %xmm0
movl 16(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 19(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movl %ecx, 16(%edi)
# ifdef USE_AS_STPCPY
@@ -750,6 +881,9 @@ L(Exit20):
L(Exit21):
movdqu (%esi), %xmm0
movl 16(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 20(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movl %ecx, 16(%edi)
movb %dh, 20(%edi)
@@ -765,6 +899,9 @@ L(Exit21):
L(Exit22):
movdqu (%esi), %xmm0
movlpd 14(%esi), %xmm3
+# ifdef __CHKP__
+ bndcu 21(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm3, 14(%edi)
# ifdef USE_AS_STPCPY
@@ -779,6 +916,9 @@ L(Exit22):
L(Exit23):
movdqu (%esi), %xmm0
movlpd 15(%esi), %xmm3
+# ifdef __CHKP__
+ bndcu 22(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm3, 15(%edi)
# ifdef USE_AS_STPCPY
@@ -793,6 +933,9 @@ L(Exit23):
L(Exit24):
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 23(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
# ifdef USE_AS_STPCPY
@@ -807,6 +950,9 @@ L(Exit24):
L(Exit25):
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 24(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
movb %dh, 24(%edi)
@@ -823,6 +969,9 @@ L(Exit26):
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
movw 24(%esi), %cx
+# ifdef __CHKP__
+ bndcu 25(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
movw %cx, 24(%edi)
@@ -839,6 +988,9 @@ L(Exit27):
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
movl 23(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 26(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
movl %ecx, 23(%edi)
@@ -855,6 +1007,9 @@ L(Exit28):
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
movl 24(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 27(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
movl %ecx, 24(%edi)
@@ -870,6 +1025,9 @@ L(Exit28):
L(Exit29):
movdqu (%esi), %xmm0
movdqu 13(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 28(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movdqu %xmm2, 13(%edi)
# ifdef USE_AS_STPCPY
@@ -884,6 +1042,9 @@ L(Exit29):
L(Exit30):
movdqu (%esi), %xmm0
movdqu 14(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 29(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movdqu %xmm2, 14(%edi)
# ifdef USE_AS_STPCPY
@@ -899,6 +1060,9 @@ L(Exit30):
L(Exit31):
movdqu (%esi), %xmm0
movdqu 15(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 30(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movdqu %xmm2, 15(%edi)
# ifdef USE_AS_STPCPY
@@ -913,6 +1077,9 @@ L(Exit31):
L(Exit32):
movdqu (%esi), %xmm0
movdqu 16(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 31(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movdqu %xmm2, 16(%edi)
# ifdef USE_AS_STPCPY
@@ -1612,37 +1779,90 @@ ENTRY (STRCPY)
ENTRANCE
mov STR1(%esp), %edx
mov STR2(%esp), %ecx
+# ifdef __CHKP__
+ bndldx STR1(%esp,%edx,1), %bnd0
+ bndldx STR2(%esp,%ecx,1), %bnd1
+ bndcl (%ecx), %bnd1
+ bndcu (%ecx), %bnd1
+ bndcl (%edx), %bnd0
+ bndcu (%edx), %bnd0
+# endif
cmpb $0, (%ecx)
jz L(ExitTail1)
+# ifdef __CHKP__
+ bndcu 1(%ecx), %bnd1
+# endif
cmpb $0, 1(%ecx)
jz L(ExitTail2)
+# ifdef __CHKP__
+ bndcu 2(%ecx), %bnd1
+# endif
cmpb $0, 2(%ecx)
jz L(ExitTail3)
+# ifdef __CHKP__
+ bndcu 3(%ecx), %bnd1
+# endif
cmpb $0, 3(%ecx)
jz L(ExitTail4)
+# ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+# endif
cmpb $0, 4(%ecx)
jz L(ExitTail5)
+# ifdef __CHKP__
+ bndcu 5(%ecx), %bnd1
+# endif
cmpb $0, 5(%ecx)
jz L(ExitTail6)
+# ifdef __CHKP__
+ bndcu 6(%ecx), %bnd1
+# endif
cmpb $0, 6(%ecx)
jz L(ExitTail7)
+# ifdef __CHKP__
+ bndcu 7(%ecx), %bnd1
+# endif
cmpb $0, 7(%ecx)
jz L(ExitTail8)
+# ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+# endif
cmpb $0, 8(%ecx)
jz L(ExitTail9)
+# ifdef __CHKP__
+ bndcu 9(%ecx), %bnd1
+# endif
cmpb $0, 9(%ecx)
jz L(ExitTail10)
+# ifdef __CHKP__
+ bndcu 10(%ecx), %bnd1
+# endif
cmpb $0, 10(%ecx)
jz L(ExitTail11)
+# ifdef __CHKP__
+ bndcu 11(%ecx), %bnd1
+# endif
cmpb $0, 11(%ecx)
jz L(ExitTail12)
+# ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+# endif
cmpb $0, 12(%ecx)
jz L(ExitTail13)
+# ifdef __CHKP__
+ bndcu 13(%ecx), %bnd1
+# endif
cmpb $0, 13(%ecx)
jz L(ExitTail14)
+# ifdef __CHKP__
+ bndcu 14(%ecx), %bnd1
+# endif
cmpb $0, 14(%ecx)
jz L(ExitTail15)
+# ifdef __CHKP__
+ bndcu 15(%ecx), %bnd1
+# endif
cmpb $0, 15(%ecx)
jz L(ExitTail16)
@@ -1654,6 +1874,9 @@ ENTRY (STRCPY)
and $-16, %ebx
pxor %xmm0, %xmm0
movdqu (%ecx), %xmm1
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movdqu %xmm1, (%edx)
pcmpeqb (%ebx), %xmm0
pmovmskb %xmm0, %eax
@@ -1669,6 +1892,10 @@ ENTRY (STRCPY)
xor %ebx, %ebx
.p2align 4
+# ifdef __CHKP__
+ bndcu 16(%ecx), %bnd1
+ bndcu 15(%edx), %bnd0
+# endif
movdqa (%ecx), %xmm1
movaps 16(%ecx), %xmm2
movdqu %xmm1, (%edx)
@@ -1678,6 +1905,10 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm3
movdqu %xmm2, (%edx, %ebx)
pcmpeqb %xmm3, %xmm0
@@ -1686,6 +1917,10 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm4
movdqu %xmm3, (%edx, %ebx)
pcmpeqb %xmm4, %xmm0
@@ -1694,6 +1929,10 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm1
movdqu %xmm4, (%edx, %ebx)
pcmpeqb %xmm1, %xmm0
@@ -1702,6 +1941,10 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm2
movdqu %xmm1, (%edx, %ebx)
pcmpeqb %xmm2, %xmm0
@@ -1710,6 +1953,10 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm3
movdqu %xmm2, (%edx, %ebx)
pcmpeqb %xmm3, %xmm0
@@ -1718,6 +1965,9 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movdqu %xmm3, (%edx, %ebx)
mov %ecx, %eax
lea 16(%ecx, %ebx), %ecx
@@ -1726,6 +1976,9 @@ ENTRY (STRCPY)
sub %eax, %edx
L(Aligned64Loop):
+# ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+# endif
movaps (%ecx), %xmm2
movaps %xmm2, %xmm4
movaps 16(%ecx), %xmm5
@@ -1742,6 +1995,10 @@ L(Aligned64Loop):
test %eax, %eax
jnz L(Aligned64Leave)
L(Aligned64Loop_start):
+# ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+ bndcu -1(%edx), %bnd0
+# endif
movdqu %xmm4, -64(%edx)
movaps (%ecx), %xmm2
movdqa %xmm2, %xmm4
@@ -1771,6 +2028,9 @@ L(Aligned64Leave):
pcmpeqb %xmm5, %xmm0
pmovmskb %xmm0, %eax
+# ifdef __CHKP__
+ bndcu -49(%edx), %bnd0
+# endif
movdqu %xmm4, -64(%edx)
test %eax, %eax
lea 16(%ebx), %ebx
@@ -1778,11 +2038,17 @@ L(Aligned64Leave):
pcmpeqb %xmm6, %xmm0
pmovmskb %xmm0, %eax
+# ifdef __CHKP__
+ bndcu -33(%edx), %bnd0
+# endif
movdqu %xmm5, -48(%edx)
test %eax, %eax
lea 16(%ebx), %ebx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu -17(%edx), %bnd0
+# endif
movdqu %xmm6, -32(%edx)
pcmpeqb %xmm7, %xmm0
pmovmskb %xmm0, %eax
@@ -1813,6 +2079,10 @@ L(CopyFrom1To16Bytes):
test $0x40, %al
jnz L(Exit7)
/* Exit 8 */
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+ bndcu 7(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
@@ -1841,6 +2111,10 @@ L(ExitHigh):
test $0x40, %ah
jnz L(Exit15)
/* Exit 16 */
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 15(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 8(%ecx), %xmm0
@@ -1854,6 +2128,10 @@ L(ExitHigh):
.p2align 4
L(Exit1):
+# ifdef __CHKP__
+ bndcu (%edx), %bnd0
+ bndcu (%ecx), %bnd1
+# endif
movb (%ecx), %al
movb %al, (%edx)
# ifdef USE_AS_STPCPY
@@ -1865,6 +2143,10 @@ L(Exit1):
.p2align 4
L(Exit2):
+# ifdef __CHKP__
+ bndcu 1(%edx), %bnd0
+ bndcu 1(%ecx), %bnd1
+# endif
movw (%ecx), %ax
movw %ax, (%edx)
# ifdef USE_AS_STPCPY
@@ -1876,6 +2158,10 @@ L(Exit2):
.p2align 4
L(Exit3):
+# ifdef __CHKP__
+ bndcu 2(%edx), %bnd0
+ bndcu 2(%ecx), %bnd1
+# endif
movw (%ecx), %ax
movw %ax, (%edx)
movb 2(%ecx), %al
@@ -1889,6 +2175,10 @@ L(Exit3):
.p2align 4
L(Exit4):
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+ bndcu 3(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
# ifdef USE_AS_STPCPY
@@ -1900,6 +2190,10 @@ L(Exit4):
.p2align 4
L(Exit5):
+# ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+ bndcu 4(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movb 4(%ecx), %al
@@ -1913,6 +2207,10 @@ L(Exit5):
.p2align 4
L(Exit6):
+# ifdef __CHKP__
+ bndcu 5(%edx), %bnd0
+ bndcu 5(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movw 4(%ecx), %ax
@@ -1926,6 +2224,10 @@ L(Exit6):
.p2align 4
L(Exit7):
+# ifdef __CHKP__
+ bndcu 6(%edx), %bnd0
+ bndcu 6(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 3(%ecx), %eax
@@ -1939,6 +2241,10 @@ L(Exit7):
.p2align 4
L(Exit9):
+# ifdef __CHKP__
+ bndcu 8(%edx), %bnd0
+ bndcu 8(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
@@ -1954,6 +2260,10 @@ L(Exit9):
.p2align 4
L(Exit10):
+# ifdef __CHKP__
+ bndcu 9(%edx), %bnd0
+ bndcu 9(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
@@ -1969,6 +2279,10 @@ L(Exit10):
.p2align 4
L(Exit11):
+# ifdef __CHKP__
+ bndcu 10(%edx), %bnd0
+ bndcu 10(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
@@ -1984,6 +2298,10 @@ L(Exit11):
.p2align 4
L(Exit12):
+# ifdef __CHKP__
+ bndcu 11(%edx), %bnd0
+ bndcu 11(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
@@ -1999,6 +2317,10 @@ L(Exit12):
.p2align 4
L(Exit13):
+# ifdef __CHKP__
+ bndcu 12(%edx), %bnd0
+ bndcu 12(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 5(%ecx), %xmm0
@@ -2012,6 +2334,10 @@ L(Exit13):
.p2align 4
L(Exit14):
+# ifdef __CHKP__
+ bndcu 13(%edx), %bnd0
+ bndcu 13(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 6(%ecx), %xmm0
@@ -2025,6 +2351,10 @@ L(Exit14):
.p2align 4
L(Exit15):
+# ifdef __CHKP__
+ bndcu 14(%edx), %bnd0
+ bndcu 14(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 7(%ecx), %xmm0
@@ -2040,6 +2370,9 @@ CFI_POP (%edi)
.p2align 4
L(ExitTail1):
+# ifdef __CHKP__
+ bndcu (%edx), %bnd0
+# endif
movb (%ecx), %al
movb %al, (%edx)
movl %edx, %eax
@@ -2048,6 +2381,9 @@ L(ExitTail1):
.p2align 4
L(ExitTail2):
movw (%ecx), %ax
+# ifdef __CHKP__
+ bndcu 1(%edx), %bnd0
+# endif
movw %ax, (%edx)
# ifdef USE_AS_STPCPY
lea 1(%edx), %eax
@@ -2059,6 +2395,9 @@ L(ExitTail2):
.p2align 4
L(ExitTail3):
movw (%ecx), %ax
+# ifdef __CHKP__
+ bndcu 2(%edx), %bnd0
+# endif
movw %ax, (%edx)
movb 2(%ecx), %al
movb %al, 2(%edx)
@@ -2072,6 +2411,9 @@ L(ExitTail3):
.p2align 4
L(ExitTail4):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+# endif
movl %eax, (%edx)
# ifdef USE_AS_STPCPY
lea 3(%edx), %eax
@@ -2083,6 +2425,9 @@ L(ExitTail4):
.p2align 4
L(ExitTail5):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+# endif
movl %eax, (%edx)
movb 4(%ecx), %al
movb %al, 4(%edx)
@@ -2096,6 +2441,9 @@ L(ExitTail5):
.p2align 4
L(ExitTail6):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 5(%edx), %bnd0
+# endif
movl %eax, (%edx)
movw 4(%ecx), %ax
movw %ax, 4(%edx)
@@ -2109,6 +2457,9 @@ L(ExitTail6):
.p2align 4
L(ExitTail7):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 6(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 3(%ecx), %eax
movl %eax, 3(%edx)
@@ -2122,6 +2473,9 @@ L(ExitTail7):
.p2align 4
L(ExitTail8):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
@@ -2135,6 +2489,9 @@ L(ExitTail8):
.p2align 4
L(ExitTail9):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 8(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
@@ -2150,6 +2507,9 @@ L(ExitTail9):
.p2align 4
L(ExitTail10):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 9(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
@@ -2165,6 +2525,9 @@ L(ExitTail10):
.p2align 4
L(ExitTail11):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 10(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
@@ -2180,6 +2543,9 @@ L(ExitTail11):
.p2align 4
L(ExitTail12):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 11(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
@@ -2195,6 +2561,9 @@ L(ExitTail12):
.p2align 4
L(ExitTail13):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 12(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movlpd 5(%ecx), %xmm0
movlpd %xmm0, 5(%edx)
@@ -2208,6 +2577,9 @@ L(ExitTail13):
.p2align 4
L(ExitTail14):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 13(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movlpd 6(%ecx), %xmm0
movlpd %xmm0, 6(%edx)
@@ -2221,6 +2593,9 @@ L(ExitTail14):
.p2align 4
L(ExitTail15):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 14(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movlpd 7(%ecx), %xmm0
movlpd %xmm0, 7(%edx)
@@ -2234,6 +2609,9 @@ L(ExitTail15):
.p2align 4
L(ExitTail16):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movlpd 8(%ecx), %xmm0
movlpd %xmm0, 8(%edx)
diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
index 32db65c..ab537c1 100644
--- a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
+++ b/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
@@ -41,6 +41,11 @@
ENTRY ( __strlen_sse2_bsf)
ENTRANCE
mov STR(%esp), %edi
+#ifdef __CHKP__
+ bndldx STR(%esp,%edi,1), %bnd0
+ bndcl (%edi),%bnd0
+ bndcu (%edi),%bnd0
+#endif
xor %eax, %eax
mov %edi, %ecx
and $0x3f, %ecx
@@ -73,21 +78,33 @@ L(align16_start):
pxor %xmm3, %xmm3
.p2align 4
L(align16_loop):
+#ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+#endif
pcmpeqb 16(%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+#ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+#endif
pcmpeqb 32(%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+#ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+#endif
pcmpeqb 48(%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+#ifdef __CHKP__
+ bndcu 64(%eax), %bnd0
+#endif
pcmpeqb 64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 64(%eax), %eax
@@ -98,24 +115,36 @@ L(exit):
L(exit_less16):
bsf %edx, %edx
add %edx, %eax
+#ifdef __CHKP__
+ bndcu -1(%edi, %eax), %bnd0
+#endif
RETURN
L(exit16):
sub %edi, %eax
bsf %edx, %edx
add %edx, %eax
add $16, %eax
+#ifdef __CHKP__
+ bndcu -1(%edi, %eax), %bnd0
+#endif
RETURN
L(exit32):
sub %edi, %eax
bsf %edx, %edx
add %edx, %eax
add $32, %eax
+#ifdef __CHKP__
+ bndcu -1(%edi, %eax), %bnd0
+#endif
RETURN
L(exit48):
sub %edi, %eax
bsf %edx, %edx
add %edx, %eax
add $48, %eax
+#ifdef __CHKP__
+ bndcu -1(%edi, %eax), %bnd0
+#endif
POP (%edi)
POP (%esi)
ret
diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2.S b/sysdeps/i386/i686/multiarch/strlen-sse2.S
index a4f2806..3d0743e 100644
--- a/sysdeps/i386/i686/multiarch/strlen-sse2.S
+++ b/sysdeps/i386/i686/multiarch/strlen-sse2.S
@@ -41,7 +41,10 @@
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
# define POP(REG) popl REG; CFI_POP (REG)
# undef RETURN
-# define RETURN POP (%edi); CFI_PUSH(%edi); ret
+# define RETURN \
+ mov STR+4(%esp),%edx; \
+ bndcu -1(%edx,%eax), %bnd0; \
+ POP (%edi); CFI_PUSH(%edi); ret
# endif
# ifndef STRLEN
@@ -51,12 +54,19 @@
atom_text_section
ENTRY (STRLEN)
mov STR(%esp), %edx
+# ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+# endif
# ifdef USE_AS_STRNLEN
PUSH (%edi)
movl LEN(%esp), %edi
sub $4, %edi
jbe L(len_less4_prolog)
# endif
+# ifdef __CHKP__
+ bndcl (%edx),%bnd0
+ bndcu (%edx),%bnd0
+# endif
# endif
xor %eax, %eax
cmpb $0, (%edx)
@@ -122,6 +132,9 @@ ENTRY (STRLEN)
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
@@ -129,6 +142,9 @@ ENTRY (STRLEN)
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2
@@ -136,6 +152,9 @@ ENTRY (STRLEN)
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
@@ -143,6 +162,9 @@ ENTRY (STRLEN)
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -154,24 +176,36 @@ ENTRY (STRLEN)
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -183,24 +217,36 @@ ENTRY (STRLEN)
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -212,24 +258,36 @@ ENTRY (STRLEN)
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -250,6 +308,9 @@ L(aligned_64_loop):
sub $64, %edi
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movaps (%eax), %xmm0
movaps 16(%eax), %xmm1
movaps 32(%eax), %xmm2
@@ -535,6 +596,10 @@ L(len_less4_prolog):
add $4, %edi
jz L(exit_tail0)
+# ifdef __CHKP__
+ bndcl (%edx),%bnd0
+ bndcu (%edx),%bnd0
+# endif
cmpb $0, (%edx)
jz L(exit_tail0)
cmp $1, %edi
diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
index e026c40..1c907a4 100644
--- a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
+++ b/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
@@ -42,6 +42,12 @@ ENTRY (__strrchr_sse2_bsf)
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx), %bnd0
+ bndcu (%ecx), %bnd0
+# endif
+
PUSH (%edi)
pxor %xmm2, %xmm2
mov %ecx, %edi
@@ -90,6 +96,9 @@ L(unaligned_return_value1):
jz L(return_null)
bsr %eax, %eax
add %edi, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
POP (%edi)
ret
CFI_PUSH (%edi)
@@ -156,6 +165,9 @@ L(unaligned_return_value):
jz L(return_null)
bsr %eax, %eax
add %edi, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
POP (%edi)
ret
CFI_PUSH (%edi)
@@ -175,6 +187,9 @@ L(unaligned_match):
/* Loop start on aligned string. */
.p2align 4
L(loop):
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -184,6 +199,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -193,6 +211,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -202,6 +223,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -224,6 +248,9 @@ L(return_value):
POP (%esi)
sub $16, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
POP (%edi)
ret
@@ -255,6 +282,9 @@ L(return_value_1):
bsr %eax, %eax
add %edi, %eax
sub $16, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
POP (%edi)
ret
diff --git a/sysdeps/i386/i686/multiarch/wcschr-sse2.S b/sysdeps/i386/i686/multiarch/wcschr-sse2.S
index 63101d9..e06274a 100644
--- a/sysdeps/i386/i686/multiarch/wcschr-sse2.S
+++ b/sysdeps/i386/i686/multiarch/wcschr-sse2.S
@@ -40,7 +40,11 @@ ENTRY (__wcschr_sse2)
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
-
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx),%bnd0
+ bndcu (%ecx),%bnd0
+# endif
mov %ecx, %eax
punpckldq %xmm1, %xmm1
pxor %xmm2, %xmm2
@@ -90,6 +94,9 @@ L(cross_cache):
test $15, %dl
jnz L(return_null)
lea 4(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
CFI_PUSH (%edi)
@@ -108,6 +115,9 @@ L(unaligned_no_match):
.p2align 4
L(loop):
add $16, %ecx
+# ifdef __CHKP__
+ bndcu (%ecx),%bnd0
+# endif
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
@@ -117,6 +127,9 @@ L(loop):
jnz L(matches)
add $16, %ecx
+# ifdef __CHKP__
+ bndcu (%ecx),%bnd0
+# endif
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
@@ -126,6 +139,9 @@ L(loop):
jnz L(matches)
add $16, %ecx
+# ifdef __CHKP__
+ bndcu (%ecx),%bnd0
+# endif
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
@@ -135,6 +151,9 @@ L(loop):
jnz L(matches)
add $16, %ecx
+# ifdef __CHKP__
+ bndcu (%ecx),%bnd0
+# endif
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
@@ -160,11 +179,17 @@ L(match_case2):
test $15, %dl
jnz L(return_null)
lea 4(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
L(match_case2_4):
mov %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
@@ -176,11 +201,17 @@ L(match_higth_case2):
test $15, %dh
jnz L(return_null)
lea 12(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
L(match_case2_12):
lea 8(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
@@ -191,6 +222,9 @@ L(match_case1):
test $0x01, %al
jnz L(exit0)
lea 4(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
@@ -198,16 +232,25 @@ L(match_higth_case1):
test $0x01, %ah
jnz L(exit3)
lea 12(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
L(exit0):
mov %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
L(exit3):
lea 8(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
diff --git a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
index 9b248c1..108e7fb 100644
--- a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
+++ b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
@@ -47,6 +47,14 @@ ENTRY (__wcscmp_sse2)
*/
mov STR1(%esp), %edx
mov STR2(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR1(%esp,%edx,1), %bnd0
+ bndldx STR2(%esp,%eax,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
mov (%eax), %ecx
cmp %ecx, (%edx)
diff --git a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S b/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
index 47fb516..708ef41 100644
--- a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
+++ b/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
@@ -41,13 +41,29 @@
ENTRY (__wcscpy_ssse3)
mov STR1(%esp), %edx
mov STR2(%esp), %ecx
+# ifdef __CHKP__
+ bndldx STR1(%esp,%edx,1), %bnd0
+ bndldx STR2(%esp,%ecx,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%ecx), %bnd1
+ bndcu (%ecx), %bnd1
+# endif
cmp $0, (%ecx)
jz L(ExitTail4)
+# ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+# endif
cmp $0, 4(%ecx)
jz L(ExitTail8)
+# ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+# endif
cmp $0, 8(%ecx)
jz L(ExitTail12)
+# ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+# endif
cmp $0, 12(%ecx)
jz L(ExitTail16)
@@ -61,6 +77,9 @@ ENTRY (__wcscpy_ssse3)
pxor %xmm0, %xmm0
pcmpeqd (%esi), %xmm0
movdqu (%ecx), %xmm1
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movdqu %xmm1, (%edx)
pmovmskb %xmm0, %eax
@@ -87,6 +106,10 @@ ENTRY (__wcscpy_ssse3)
jmp L(Shl12)
L(Align16Both):
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 16(%ecx), %bnd1
+# endif
movaps (%ecx), %xmm1
movaps 16(%ecx), %xmm2
movaps %xmm1, (%edx)
@@ -97,6 +120,10 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm3
movaps %xmm2, (%edx, %esi)
pcmpeqd %xmm3, %xmm0
@@ -106,6 +133,10 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm4
movaps %xmm3, (%edx, %esi)
pcmpeqd %xmm4, %xmm0
@@ -115,6 +146,10 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm1
movaps %xmm4, (%edx, %esi)
pcmpeqd %xmm1, %xmm0
@@ -124,6 +159,10 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm2
movaps %xmm1, (%edx, %esi)
pcmpeqd %xmm2, %xmm0
@@ -133,6 +172,10 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm3
movaps %xmm2, (%edx, %esi)
pcmpeqd %xmm3, %xmm0
@@ -142,6 +185,9 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+# endif
movaps %xmm3, (%edx, %esi)
mov %ecx, %eax
lea 16(%ecx, %esi), %ecx
@@ -152,6 +198,9 @@ L(Align16Both):
mov $-0x40, %esi
L(Aligned64Loop):
+# ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+# endif
movaps (%ecx), %xmm2
movaps 32(%ecx), %xmm3
movaps %xmm2, %xmm4
@@ -168,6 +217,9 @@ L(Aligned64Loop):
test %eax, %eax
jnz L(Aligned64Leave)
+# ifdef __CHKP__
+ bndcu -1(%edx), %bnd0
+# endif
movaps %xmm4, -64(%edx)
movaps %xmm5, -48(%edx)
movaps %xmm6, -32(%edx)
@@ -182,6 +234,9 @@ L(Aligned64Leave):
pcmpeqd %xmm5, %xmm0
pmovmskb %xmm0, %eax
+# ifdef __CHKP__
+ bndcu -49(%edx), %bnd0
+# endif
movaps %xmm4, -64(%edx)
test %eax, %eax
lea 16(%esi), %esi
@@ -189,11 +244,17 @@ L(Aligned64Leave):
pcmpeqd %xmm6, %xmm0
pmovmskb %xmm0, %eax
+# ifdef __CHKP__
+ bndcu -33(%edx), %bnd0
+# endif
movaps %xmm5, -48(%edx)
test %eax, %eax
lea 16(%esi), %esi
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu -17(%edx), %bnd0
+# endif
movaps %xmm6, -32(%edx)
pcmpeqd %xmm7, %xmm0
pmovmskb %xmm0, %eax
@@ -202,11 +263,17 @@ L(Aligned64Leave):
jnz L(CopyFrom1To16Bytes)
mov $-0x40, %esi
+# ifdef __CHKP__
+ bndcu -1(%edx), %bnd0
+# endif
movaps %xmm7, -16(%edx)
jmp L(Aligned64Loop)
.p2align 4
L(Shl4):
+# ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+# endif
movaps -4(%ecx), %xmm1
movaps 12(%ecx), %xmm2
L(Shl4Start):
@@ -218,6 +285,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 28(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
@@ -231,6 +302,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 28(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
@@ -244,6 +319,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 28(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
@@ -256,6 +335,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 28(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
lea 28(%ecx), %ecx
lea 16(%edx), %edx
@@ -269,6 +352,9 @@ L(Shl4Start):
movaps -4(%ecx), %xmm1
L(Shl4LoopStart):
+# ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+# endif
movaps 12(%ecx), %xmm2
movaps 28(%ecx), %xmm3
movaps %xmm3, %xmm6
@@ -290,6 +376,9 @@ L(Shl4LoopStart):
lea 64(%ecx), %ecx
palignr $4, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%edx), %bnd0
+# endif
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
@@ -300,6 +389,10 @@ L(Shl4LoopStart):
L(Shl4LoopExit):
movlpd (%ecx), %xmm0
movl 8(%ecx), %esi
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
+ movaps %xmm2, (%edx)
movlpd %xmm0, (%edx)
movl %esi, 8(%edx)
POP (%esi)
@@ -310,6 +403,9 @@ L(Shl4LoopExit):
test $0x01, %al
jnz L(Exit4)
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN
@@ -318,6 +414,9 @@ L(Shl4LoopExit):
.p2align 4
L(Shl8):
+# ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+# endif
movaps -8(%ecx), %xmm1
movaps 8(%ecx), %xmm2
L(Shl8Start):
@@ -329,6 +428,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 24(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
@@ -342,6 +445,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 24(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
@@ -355,6 +462,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 24(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
@@ -367,6 +478,9 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movaps %xmm2, (%edx)
lea 24(%ecx), %ecx
lea 16(%edx), %edx
@@ -380,6 +494,9 @@ L(Shl8Start):
movaps -8(%ecx), %xmm1
L(Shl8LoopStart):
+# ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+# endif
movaps 8(%ecx), %xmm2
movaps 24(%ecx), %xmm3
movaps %xmm3, %xmm6
@@ -401,6 +518,9 @@ L(Shl8LoopStart):
lea 64(%ecx), %ecx
palignr $8, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%edx), %bnd0
+# endif
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
@@ -410,6 +530,9 @@ L(Shl8LoopStart):
L(Shl8LoopExit):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
POP (%esi)
add $8, %edx
@@ -419,6 +542,9 @@ L(Shl8LoopExit):
test $0x01, %al
jnz L(Exit4)
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN
@@ -427,6 +553,9 @@ L(Shl8LoopExit):
.p2align 4
L(Shl12):
+# ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+# endif
movaps -12(%ecx), %xmm1
movaps 4(%ecx), %xmm2
L(Shl12Start):
@@ -438,6 +567,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 20(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
@@ -451,6 +584,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 20(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
@@ -464,6 +601,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 20(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
@@ -476,6 +617,9 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movaps %xmm2, (%edx)
lea 20(%ecx), %ecx
lea 16(%edx), %edx
@@ -489,6 +633,9 @@ L(Shl12Start):
movaps -12(%ecx), %xmm1
L(Shl12LoopStart):
+# ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+# endif
movaps 4(%ecx), %xmm2
movaps 20(%ecx), %xmm3
movaps %xmm3, %xmm6
@@ -510,6 +657,9 @@ L(Shl12LoopStart):
lea 64(%ecx), %ecx
palignr $12, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%edx), %bnd0
+# endif
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
@@ -519,6 +669,9 @@ L(Shl12LoopStart):
L(Shl12LoopExit):
movl (%ecx), %esi
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+# endif
movl %esi, (%edx)
mov $4, %esi
@@ -533,6 +686,10 @@ L(CopyFrom1To16Bytes):
test $0x01, %al
jnz L(Exit4)
L(Exit8):
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+ bndcu 7(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
@@ -543,6 +700,10 @@ L(ExitHigh):
test $0x01, %ah
jnz L(Exit12)
L(Exit16):
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 15(%ecx), %bnd1
+# endif
movdqu (%ecx), %xmm0
movdqu %xmm0, (%edx)
movl %edi, %eax
@@ -550,6 +711,10 @@ L(Exit16):
.p2align 4
L(Exit4):
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+ bndcu 3(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl %edi, %eax
@@ -557,6 +722,10 @@ L(Exit4):
.p2align 4
L(Exit12):
+# ifdef __CHKP__
+ bndcu 11(%edx), %bnd0
+ bndcu 11(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
@@ -569,6 +738,9 @@ CFI_POP (%edi)
.p2align 4
L(ExitTail4):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl %edx, %eax
ret
@@ -576,6 +748,9 @@ L(ExitTail4):
.p2align 4
L(ExitTail8):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movl %edx, %eax
ret
@@ -583,6 +758,9 @@ L(ExitTail8):
.p2align 4
L(ExitTail12):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 11(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
movl %eax, 8(%edx)
@@ -592,6 +770,9 @@ L(ExitTail12):
.p2align 4
L(ExitTail16):
movdqu (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movdqu %xmm0, (%edx)
movl %edx, %eax
ret
diff --git a/sysdeps/i386/i686/multiarch/wcslen-sse2.S b/sysdeps/i386/i686/multiarch/wcslen-sse2.S
index a92b92f..9c53149 100644
--- a/sysdeps/i386/i686/multiarch/wcslen-sse2.S
+++ b/sysdeps/i386/i686/multiarch/wcslen-sse2.S
@@ -24,21 +24,47 @@
.text
ENTRY (__wcslen_sse2)
mov STR(%esp), %edx
+# ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+ bndcl (%edx),%bnd0
+ bndcu (%edx),%bnd0
+# endif
cmp $0, (%edx)
jz L(exit_tail0)
+# ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+# endif
cmp $0, 4(%edx)
jz L(exit_tail1)
+# ifdef __CHKP__
+ bndcu 8(%edx), %bnd0
+# endif
cmp $0, 8(%edx)
jz L(exit_tail2)
+# ifdef __CHKP__
+ bndcu 12(%edx), %bnd0
+# endif
cmp $0, 12(%edx)
jz L(exit_tail3)
+# ifdef __CHKP__
+ bndcu 16(%edx), %bnd0
+# endif
cmp $0, 16(%edx)
jz L(exit_tail4)
+# ifdef __CHKP__
+ bndcu 20(%edx), %bnd0
+# endif
cmp $0, 20(%edx)
jz L(exit_tail5)
+# ifdef __CHKP__
+ bndcu 24(%edx), %bnd0
+# endif
cmp $0, 24(%edx)
jz L(exit_tail6)
+# ifdef __CHKP__
+ bndcu 28(%edx), %bnd0
+# endif
cmp $0, 28(%edx)
jz L(exit_tail7)
@@ -48,6 +74,9 @@ ENTRY (__wcslen_sse2)
lea 16(%edx), %ecx
and $-16, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
pcmpeqd (%eax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
@@ -55,6 +84,9 @@ ENTRY (__wcslen_sse2)
lea 16(%eax), %eax
jnz L(exit)
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
pcmpeqd (%eax), %xmm1
pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2
@@ -62,6 +94,9 @@ ENTRY (__wcslen_sse2)
lea 16(%eax), %eax
jnz L(exit)
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
pcmpeqd (%eax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
@@ -69,6 +104,9 @@ ENTRY (__wcslen_sse2)
lea 16(%eax), %eax
jnz L(exit)
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
pcmpeqd (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -79,6 +117,9 @@ ENTRY (__wcslen_sse2)
.p2align 4
L(aligned_64_loop):
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
movaps (%eax), %xmm0
movaps 16(%eax), %xmm1
movaps 32(%eax), %xmm2
@@ -129,6 +170,10 @@ L(exit):
mov %dl, %cl
and $15, %cl
jz L(exit_1)
+# ifdef __CHKP__
+ mov STR(%esp), %edx
+ bndcu -1(%edx, %eax, 4), %bnd0
+# endif
ret
.p2align 4
@@ -137,16 +182,28 @@ L(exit_high):
and $15, %ch
jz L(exit_3)
add $2, %eax
+# ifdef __CHKP__
+ mov STR(%esp), %edx
+ bndcu -1(%edx, %eax, 4), %bnd0
+# endif
ret
.p2align 4
L(exit_1):
add $1, %eax
+# ifdef __CHKP__
+ mov STR(%esp), %edx
+ bndcu -1(%edx, %eax, 4), %bnd0
+# endif
ret
.p2align 4
L(exit_3):
add $3, %eax
+# ifdef __CHKP__
+ mov STR(%esp), %edx
+ bndcu -1(%edx, %eax, 4), %bnd0
+# endif
ret
.p2align 4
diff --git a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S b/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S
index d31e48e..f7c70e6 100644
--- a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S
+++ b/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S
@@ -36,12 +36,23 @@
# define STR1 PARMS
# define STR2 STR1+4
+# ifdef __CHKP__
+# undef RETURN
+# define RETURN bndcu (%eax),%bnd0; \
+ POP (%edi); ret; CFI_PUSH (%edi);
+# endif
+
atom_text_section
ENTRY (__wcsrchr_sse2)
ENTRANCE
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx),%bnd0
+ bndcu (%ecx),%bnd0
+# endif
mov %ecx, %edi
punpckldq %xmm1, %xmm1
@@ -137,6 +148,9 @@ L(unaligned_match):
/* Loop start on aligned string. */
.p2align 4
L(loop):
+# ifdef __CHKP__
+ bndcu (%edi),%bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %edi
@@ -146,6 +160,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi),%bnd0
+# endif
movdqa (%edi), %xmm3
pcmpeqd %xmm3, %xmm2
add $16, %edi
@@ -155,6 +172,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi),%bnd0
+# endif
movdqa (%edi), %xmm4
pcmpeqd %xmm4, %xmm2
add $16, %edi
@@ -164,6 +184,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi),%bnd0
+# endif
movdqa (%edi), %xmm5
pcmpeqd %xmm5, %xmm2
add $16, %edi
diff --git a/sysdeps/i386/i686/strcmp.S b/sysdeps/i386/i686/strcmp.S
index 6ca6220..67134af 100644
--- a/sysdeps/i386/i686/strcmp.S
+++ b/sysdeps/i386/i686/strcmp.S
@@ -29,8 +29,19 @@ ENTRY (strcmp)
movl STR1(%esp), %ecx
movl STR2(%esp), %edx
-
-L(oop): movb (%ecx), %al
+#ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndldx STR2(%esp,%edx,1), %bnd1
+ bndcl (%ecx), %bnd0
+ bndcl (%edx), %bnd1
+#endif
+
+L(oop):
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd0
+ bndcu (%edx), %bnd1
+#endif
+ movb (%ecx), %al
cmpb (%edx), %al
jne L(neq)
incl %ecx
diff --git a/sysdeps/i386/i686/strtok.S b/sysdeps/i386/i686/strtok.S
index 8848faf..78a2ea9 100644
--- a/sysdeps/i386/i686/strtok.S
+++ b/sysdeps/i386/i686/strtok.S
@@ -121,6 +121,14 @@ ENTRY (FUNCTION)
testl %edx, %edx
jz L(returnNULL)
movl DELIM(%esp), %eax /* Get start of delimiter set. */
+#ifdef __CHKP__
+ bndldx STR(%esp,%edx,1),%bnd0
+ bndldx DELIM(%esp,%eax,1),%bnd1
+ bndcl (%edx), %bnd0
+ bndcu (%edx), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu (%eax), %bnd1
+#endif
/* For understanding the following code remember that %ecx == 0 now.
Although all the following instruction only modify %cl we always
diff --git a/sysdeps/i386/memchr.S b/sysdeps/i386/memchr.S
index 6799500..39fe616 100644
--- a/sysdeps/i386/memchr.S
+++ b/sysdeps/i386/memchr.S
@@ -51,6 +51,11 @@ ENTRY (__memchr)
movl LEN(%esp), %esi /* len: length of memory block. */
cfi_rel_offset (esi, 4)
+#ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcl (%eax), %bnd0
+#endif
+
/* If my must not test more than three characters test
them one by one. This is especially true for 0. */
cmpl $4, %esi
@@ -72,6 +77,9 @@ ENTRY (__memchr)
testb $3, %al /* correctly aligned ? */
je L(2) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
@@ -80,6 +88,9 @@ ENTRY (__memchr)
testb $3, %al /* correctly aligned ? */
je L(2) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
@@ -88,6 +99,9 @@ ENTRY (__memchr)
testb $3, %al /* correctly aligned ? */
je L(2) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
@@ -127,7 +141,11 @@ ENTRY (__memchr)
ALIGN (4)
-L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
+L(1):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
@@ -162,6 +180,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
(following LL(13) below). Even the len can be compared with
constants instead of decrementing each time. */
+#ifdef __CHKP__
+ bndcu 4(%eax), %bnd0
+#endif
movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -176,6 +197,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(7) /* found it => return pointer */
+#ifdef __CHKP__
+ bndcu 8(%eax), %bnd0
+#endif
movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -190,6 +214,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(6) /* found it => return pointer */
+#ifdef __CHKP__
+ bndcu 12(%eax), %bnd0
+#endif
movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -213,6 +240,9 @@ L(2): subl $16, %esi
cmpl $4-16, %esi /* rest < 4 bytes? */
jb L(3) /* yes, than test byte by byte */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -231,6 +261,9 @@ L(2): subl $16, %esi
cmpl $8-16, %esi /* rest < 8 bytes? */
jb L(3) /* yes, than test byte by byte */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -249,6 +282,9 @@ L(2): subl $16, %esi
cmpl $12-16, %esi /* rest < 12 bytes? */
jb L(3) /* yes, than test byte by byte */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -268,18 +304,27 @@ L(2): subl $16, %esi
L(3): andl $3, %esi /* mask out uninteresting bytes */
jz L(4) /* no remaining bytes => return NULL */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte with CHR */
je L(9) /* equal, than return pointer */
incl %eax /* increment source pointer */
decl %esi /* decrement length */
jz L(4) /* no remaining bytes => return NULL */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte with CHR */
je L(9) /* equal, than return pointer */
incl %eax /* increment source pointer */
decl %esi /* decrement length */
jz L(4) /* no remaining bytes => return NULL */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte with CHR */
je L(9) /* equal, than return pointer */
diff --git a/sysdeps/i386/memcmp.S b/sysdeps/i386/memcmp.S
index 21e0bfc..7beab65 100644
--- a/sysdeps/i386/memcmp.S
+++ b/sysdeps/i386/memcmp.S
@@ -37,6 +37,12 @@ ENTRY (memcmp)
cfi_rel_offset (esi, 0)
movl BLK2(%esp), %edi
movl LEN(%esp), %ecx
+#ifdef __CHKP__
+ bndldx BLK1(%esp,%esi,1), %bnd0
+ bndldx BLK2(%esp,%edi,1), %bnd1
+ bndcl (%esi), %bnd0
+ bndcl (%edi), %bnd1
+#endif
cld /* Set direction of comparison. */
@@ -59,7 +65,13 @@ ENTRY (memcmp)
Note that the following operation does not change 0xffffffff. */
orb $1, %al /* Change 0 to 1. */
-L(1): popl %esi /* Restore registers. */
+L(1):
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+ bndcu (%edi), %bnd1
+#endif
+ popl %esi /* Restore registers. */
+
cfi_adjust_cfa_offset (-4)
cfi_restore (esi)
movl %edx, %edi
diff --git a/sysdeps/i386/rawmemchr.S b/sysdeps/i386/rawmemchr.S
index 2bd20e0..27441dd 100644
--- a/sysdeps/i386/rawmemchr.S
+++ b/sysdeps/i386/rawmemchr.S
@@ -46,6 +46,11 @@ ENTRY (__rawmemchr)
movl STR(%esp), %eax
movl CHR(%esp), %edx
+#ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcl (%eax), %bnd0
+#endif
+
/* At the moment %edx contains C. What we need for the
algorithm is C in all bytes of the dword. Avoid
operations on 16 bit words because these require an
@@ -62,18 +67,27 @@ ENTRY (__rawmemchr)
testb $3, %al /* correctly aligned ? */
je L(1) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
testb $3, %al /* correctly aligned ? */
je L(1) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
testb $3, %al /* correctly aligned ? */
je L(1) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
@@ -108,7 +122,11 @@ ENTRY (__rawmemchr)
/* Each round the main loop processes 16 bytes. */
ALIGN (4)
-L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
+L(1):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
@@ -143,6 +161,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
(following LL(13) below). Even the len can be compared with
constants instead of decrementing each time. */
+#ifdef __CHKP__
+ bndcu 4(%eax), %bnd0
+#endif
movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -157,6 +178,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(7) /* found it => return pointer */
+#ifdef __CHKP__
+ bndcu 8(%eax), %bnd0
+#endif
movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -171,6 +195,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(6) /* found it => return pointer */
+#ifdef __CHKP__
+ bndcu 12(%eax), %bnd0
+#endif
movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -211,6 +238,9 @@ L(8): testb %cl, %cl /* test first byte in dword */
/* No further test needed we we know it is one of the four bytes. */
L(9):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
popl %edi /* pop saved register */
cfi_adjust_cfa_offset (-4)
cfi_restore (edi)
diff --git a/sysdeps/i386/stpncpy.S b/sysdeps/i386/stpncpy.S
index b23e820..22d727a 100644
--- a/sysdeps/i386/stpncpy.S
+++ b/sysdeps/i386/stpncpy.S
@@ -42,6 +42,14 @@ ENTRY (__stpncpy)
movl SRC(%esp), %esi
cfi_rel_offset (esi, 0)
movl LEN(%esp), %ecx
+#ifdef __CHKP__
+ bndldx DEST(%esp,%eax,1), %bnd0
+ bndldx SRC(%esp,%esi,1), %bnd1
+ bndcl (%eax), %bnd0
+ bndcu -1(%eax, %ecx), %bnd0
+ bndcl (%esi), %bnd1
+ bndcu (%esi), %bnd1
+#endif
subl %eax, %esi /* magic: reduce number of loop variants
to one using addressing mode */
diff --git a/sysdeps/i386/strchrnul.S b/sysdeps/i386/strchrnul.S
index 7ceb88e..86bf770 100644
--- a/sysdeps/i386/strchrnul.S
+++ b/sysdeps/i386/strchrnul.S
@@ -38,6 +38,11 @@ ENTRY (__strchrnul)
movl STR(%esp), %eax
movl CHR(%esp), %edx
+# ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcl (%eax), %bnd0
+ bndcu (%eax), %bnd0
+# endif
/* At the moment %edx contains CHR. What we need for the
algorithm is CHR in all bytes of the dword. Avoid
operations on 16 bit words because these require an
@@ -60,6 +65,9 @@ ENTRY (__strchrnul)
testb $3, %al /* correctly aligned ? */
jz L(11) /* yes => begin loop */
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
movb (%eax), %cl /* load byte in question (we need it twice) */
cmpb %cl, %dl /* compare byte */
je L(6) /* target found => return */
@@ -69,6 +77,9 @@ ENTRY (__strchrnul)
testb $3, %al /* correctly aligned ? */
jz L(11) /* yes => begin loop */
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
movb (%eax), %cl /* load byte in question (we need it twice) */
cmpb %cl, %dl /* compare byte */
je L(6) /* target found => return */
@@ -78,6 +89,9 @@ ENTRY (__strchrnul)
testb $3, %al /* correctly aligned ? */
jz L(11) /* yes => begin loop */
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
movb (%eax), %cl /* load byte in question (we need it twice) */
cmpb %cl, %dl /* compare byte */
je L(6) /* target found => return */
@@ -120,7 +134,11 @@ ENTRY (__strchrnul)
L(1): addl $16, %eax /* adjust pointer for whole round */
-L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
+L(11):
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
movl $0xfefefeff, %edi /* magic value */
@@ -164,6 +182,9 @@ L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(7) /* found NUL => return NULL */
+# ifdef __CHKP__
+ bndcu 4(%eax), %bnd0
+# endif
movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
@@ -189,6 +210,9 @@ L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(71) /* found NUL => return NULL */
+# ifdef __CHKP__
+ bndcu 8(%eax), %bnd0
+# endif
movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
@@ -214,6 +238,9 @@ L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(72) /* found NUL => return NULL */
+# ifdef __CHKP__
+ bndcu 12(%eax), %bnd0
+# endif
movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
@@ -268,7 +295,11 @@ L(7): testb %cl, %cl /* is first byte CHR? */
/* It must be in the fourth byte and it cannot be NUL. */
incl %eax
-L(6): popl %edi /* restore saved register content */
+L(6):
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
+ popl %edi /* restore saved register content */
cfi_adjust_cfa_offset (-4)
cfi_restore (edi)
diff --git a/sysdeps/i386/strcspn.S b/sysdeps/i386/strcspn.S
index 0c262d6..1352b03 100644
--- a/sysdeps/i386/strcspn.S
+++ b/sysdeps/i386/strcspn.S
@@ -32,6 +32,14 @@ ENTRY (strcspn)
movl STR(%esp), %edx
movl STOP(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+ bndldx STOP(%esp,%eax,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
/* First we create a table with flags for all possible characters.
For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
diff --git a/sysdeps/i386/strpbrk.S b/sysdeps/i386/strpbrk.S
index 246ae27..7190a06 100644
--- a/sysdeps/i386/strpbrk.S
+++ b/sysdeps/i386/strpbrk.S
@@ -33,6 +33,14 @@ ENTRY (strpbrk)
movl STR(%esp), %edx
movl STOP(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+ bndldx STOP(%esp,%eax,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
/* First we create a table with flags for all possible characters.
For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
diff --git a/sysdeps/i386/strrchr.S b/sysdeps/i386/strrchr.S
index 31b8a45..858bba4 100644
--- a/sysdeps/i386/strrchr.S
+++ b/sysdeps/i386/strrchr.S
@@ -40,6 +40,10 @@ ENTRY (strrchr)
movl STR(%esp), %esi
cfi_rel_offset (esi, 0)
movl CHR(%esp), %ecx
+#ifdef __CHKP__
+ bndldx STR(%esp,%esi,1), %bnd0
+ bndcl (%esi), %bnd0
+#endif
/* At the moment %ecx contains C. What we need for the
algorithm is C in all bytes of the dword. Avoid
@@ -63,6 +67,9 @@ ENTRY (strrchr)
testl $3, %esi /* correctly aligned ? */
jz L(19) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+#endif
movb (%esi), %dl /* load byte in question (we need it twice) */
cmpb %dl, %cl /* compare byte */
jne L(11) /* target found => return */
@@ -73,6 +80,9 @@ L(11): orb %dl, %dl /* is NUL? */
testl $3, %esi /* correctly aligned ? */
jz L(19) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+#endif
movb (%esi), %dl /* load byte in question (we need it twice) */
cmpb %dl, %cl /* compare byte */
jne L(12) /* target found => return */
@@ -83,6 +93,9 @@ L(12): orb %dl, %dl /* is NUL? */
testl $3, %esi /* correctly aligned ? */
jz L(19) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+#endif
movb (%esi), %dl /* load byte in question (we need it twice) */
cmpb %dl, %cl /* compare byte */
jne L(13) /* target found => return */
@@ -170,7 +183,11 @@ L(51):
L(1): addl $16, %esi /* increment pointer for full round */
-L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */
+L(19):
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+#endif
+ movl (%esi), %edx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
addl %edx, %edi /* add the magic value to the word. We get
carry bits reported for each byte which
@@ -214,6 +231,9 @@ L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(3) /* C is detected in the word => examine it */
+#ifdef __CHKP__
+ bndcu 4(%esi), %bnd0
+#endif
movl 4(%esi), %edx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
addl %edx, %edi /* add the magic value to the word. We get
@@ -238,6 +258,9 @@ L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(31) /* C is detected in the word => examine it */
+#ifdef __CHKP__
+ bndcu 8(%esi), %bnd0
+#endif
movl 8(%esi), %edx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
addl %edx, %edi /* add the magic value to the word. We get
@@ -262,6 +285,9 @@ L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(32) /* C is detected in the word => examine it */
+#ifdef __CHKP__
+ bndcu 12(%esi), %bnd0
+#endif
movl 12(%esi), %edx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
addl %edx, %edi /* add the magic value to the word. We get
diff --git a/sysdeps/i386/strtok.S b/sysdeps/i386/strtok.S
index 79d540b..cfee507 100644
--- a/sysdeps/i386/strtok.S
+++ b/sysdeps/i386/strtok.S
@@ -67,6 +67,11 @@ ENTRY (FUNCTION)
movl STR(%esp), %edx
movl DELIM(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+ bndldx DELIM(%esp,%eax,1), %bnd1
+#endif
+
#if !defined USE_AS_STRTOK_R && defined PIC
pushl %ebx /* Save PIC register. */
cfi_adjust_cfa_offset (4)
@@ -336,6 +341,9 @@ L(11):
/* Store the pointer to the next character. */
#ifdef USE_AS_STRTOK_R
movl SAVE(%esp), %ecx
+# ifdef __CHKP__
+ bndmov %bnd2, %bnd0
+# endif
#endif
movl %edx, SAVE_PTR
@@ -351,6 +359,9 @@ L(returnNULL):
xorl %eax, %eax
#ifdef USE_AS_STRTOK_R
movl SAVE(%esp), %ecx
+# ifdef __CHKP__
+ bndmov %bnd2, %bnd0
+# endif
#endif
movl %edx, SAVE_PTR
jmp L(epilogue)
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=b06794c2e76de78232cd20bf331ce8913f2fd764
commit b06794c2e76de78232cd20bf331ce8913f2fd764
Author: Liubov Dmitrieva <ldmitrie@sourceware.org>
Date: Mon Aug 26 16:51:26 2013 +0400
Warning! Temporary use O0 for vfprintf.c file if MPX enabled because of a compiler bug relates MPX.
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 658804b..12befcc 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -77,6 +77,9 @@ $(objpfx)tst-printf.out: tst-printf.sh $(objpfx)tst-printf
endif
CFLAGS-vfprintf.c = -Wno-uninitialized
+ifeq ($(enable-mpx), yes)
+CFLAGS-vfprintf.c += -O0 -D__OPTIMIZE__
+endif
CFLAGS-vfwprintf.c = -Wno-uninitialized
CFLAGS-tst-printf.c = -Wno-format
CFLAGS-tstdiomisc.c = -Wno-format
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=b18de4c90e1c54330af6d2ebf43c3cfa6e6600b6
commit b18de4c90e1c54330af6d2ebf43c3cfa6e6600b6
Author: Liubov Dmitrieva <ldmitrie@sourceware.org>
Date: Mon Sep 2 13:21:47 2013 +0400
Add --enable-mpx option to configure for Intel MPX support.
Conflicts:
elf/Makefile
diff --git a/config.make.in b/config.make.in
index 7b04568..8c1228d 100644
--- a/config.make.in
+++ b/config.make.in
@@ -96,12 +96,14 @@ build-nscd = @build_nscd@
use-nscd = @use_nscd@
build-hardcoded-path-in-tests= @hardcoded_path_in_tests@
build-pt-chown = @build_pt_chown@
+enable-mpx = @enable_mpx@
# Build tools.
CC = @CC@
CXX = @CXX@
BUILD_CC = @BUILD_CC@
CFLAGS = @CFLAGS@
+ASFLAGS = @ASFLAGS@
CPPFLAGS-config = @CPPFLAGS@
CPPUNDEFS = @CPPUNDEFS@
ASFLAGS-config = @ASFLAGS_config@
diff --git a/configure b/configure
index afe7821..7bb8bf2 100755
--- a/configure
+++ b/configure
@@ -653,6 +653,8 @@ link_obsolete_rpc
libc_cv_nss_crypt
all_warnings
force_install
+ASFLAGS
+enable_mpx
bindnow
hardcoded_path_in_tests
oldest_abi
@@ -747,6 +749,7 @@ enable_lock_elision
enable_add_ons
enable_hidden_plt
enable_bind_now
+enable_mpx
enable_static_nss
enable_force_install
enable_kernel
@@ -1409,6 +1412,7 @@ Optional Features:
for add-ons if no parameter given
--disable-hidden-plt do not hide internal function calls to avoid PLT
--enable-bind-now disable lazy relocations in DSOs
+ --enable-mpx turn on Intel MPX extension
--enable-static-nss build static NSS modules [default=no]
--disable-force-install don't force installation of files from this package,
even if they are older than the installed files
@@ -3519,6 +3523,24 @@ fi
+# Check whether --enable-mpx was given.
+if test "${enable_mpx+set}" = set; then :
+ enableval=$enable_mpx; enable_mpx=$enableval
+else
+ enable_mpx=no
+fi
+
+
+
+
+if test "$ac_test_CFLAGS" != set && test "$enable_mpx" = yes ; then
+ CFLAGS="$CFLAGS -g -fcheck-pointers -mmpx -fno-chkp-check-incomplete-type";
+fi
+
+if test "$enable_mpx" = yes ; then
+ ASFLAGS="$ASFLAGS -g -fcheck-pointers -mmpx -Wa,-madd-bnd-prefix"
+fi
+
# Check whether --enable-static-nss was given.
if test "${enable_static_nss+set}" = set; then :
enableval=$enable_static_nss; static_nss=$enableval
diff --git a/configure.in b/configure.in
index 9172ad1..d7eb9a6 100644
--- a/configure.in
+++ b/configure.in
@@ -216,6 +216,22 @@ AC_ARG_ENABLE([bind-now],
[bindnow=no])
AC_SUBST(bindnow)
+AC_ARG_ENABLE([mpx],
+ AC_HELP_STRING([--enable-mpx],
+ [turn on Intel MPX extension]),
+ [enable_mpx=$enableval],
+ [enable_mpx=no])
+
+AC_SUBST(enable_mpx)
+AC_SUBST(ASFLAGS)
+if test "$ac_test_CFLAGS" != set && test "$enable_mpx" = yes ; then
+ CFLAGS="$CFLAGS -g -fcheck-pointers -mmpx -fno-chkp-check-incomplete-type";
+fi
+
+if test "$enable_mpx" = yes ; then
+ ASFLAGS="$ASFLAGS -g -fcheck-pointers -mmpx -Wa,-madd-bnd-prefix"
+fi
+
dnl On some platforms we cannot use dynamic loading. We must provide
dnl static NSS modules.
AC_ARG_ENABLE([static-nss],
diff --git a/elf/Makefile b/elf/Makefile
index 4ef80c9..2bdf045 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -49,7 +49,10 @@ all-rtld-routines = $(rtld-routines) $(sysdep-rtld-routines)
CFLAGS-dl-runtime.c = -fexceptions -fasynchronous-unwind-tables
CFLAGS-dl-lookup.c = -fexceptions -fasynchronous-unwind-tables
CFLAGS-dl-iterate-phdr.c = $(uses-callbacks)
+
+ifeq ($(enable-mpx), yes)
CFLAGS-dl-init.c = -fno-check-pointers
+endif
ifeq ($(unwind-find-fde),yes)
routines += unwind-dw2-fde-glibc
diff --git a/manual/install.texi b/manual/install.texi
index 4575d22..68dab0d 100644
--- a/manual/install.texi
+++ b/manual/install.texi
@@ -177,6 +177,9 @@ setuid and owned by @code{root}. The use of @file{pt_chown} introduces
additional security risks to the system and you should enable it only if
you understand and accept those risks.
+@item --enable-mpx
+By default, Intel MPX extension is disabled. This option turns it on.
+
@item --build=@var{build-system}
@itemx --host=@var{host-system}
These options are for cross-compiling. If you specify both options and
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=1002f6c2a45ba235dd53d7e02ee9ad24cd5743b8
commit 1002f6c2a45ba235dd53d7e02ee9ad24cd5743b8
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Sun May 19 18:30:05 2013 +0400
Support new siginfo in Glibc for Intel MPX.
diff --git a/sysdeps/unix/sysv/linux/x86/bits/siginfo.h b/sysdeps/unix/sysv/linux/x86/bits/siginfo.h
index bfc6aa3..23d946c 100644
--- a/sysdeps/unix/sysv/linux/x86/bits/siginfo.h
+++ b/sysdeps/unix/sysv/linux/x86/bits/siginfo.h
@@ -108,6 +108,10 @@ typedef struct
{
void *si_addr; /* Faulting insn/memory ref. */
short int si_addr_lsb; /* Valid LSB of the reported address. */
+# ifdef __CHKP__
+ void *si_lower;
+ void *si_upper;
+# endif
} _sigfault;
/* SIGPOLL. */
@@ -141,6 +145,10 @@ typedef struct
# define si_ptr _sifields._rt.si_sigval.sival_ptr
# define si_addr _sifields._sigfault.si_addr
# define si_addr_lsb _sifields._sigfault.si_addr_lsb
+# ifdef __CHKP__
+# define si_lower _sifields._sigfault.si_lower
+# define si_upper _sifields._sigfault.si_upper
+# endif
# define si_band _sifields._sigpoll.si_band
# define si_fd _sifields._sigpoll.si_fd
# define si_call_addr _sifields._sigsys._call_addr
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=9096323b8cb7bfe15b5f7137dc24f92f5ba6dd17
commit 9096323b8cb7bfe15b5f7137dc24f92f5ba6dd17
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Fri Jan 25 18:40:50 2013 +0400
Intel MPX support for x86_64 and x86_32 pthread routines.
Always use INIT bounds in __tls_get_addr.
Set bounds manually in _Unwind_Resume.
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 576d9a1..ee84fa6 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -767,6 +767,9 @@ update_get_addr (GET_ADDR_ARGS)
void *
__tls_get_addr (GET_ADDR_ARGS)
{
+#ifdef __CHKP__
+ GET_ADDR_PARAM = __bnd_init_ptr_bounds(GET_ADDR_PARAM);
+#endif
dtv_t *dtv = THREAD_DTV ();
if (__builtin_expect (dtv[0].counter != GL(dl_tls_generation), 0))
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
index a6d6bc4..973ff0e 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
@@ -94,6 +94,13 @@ __pthread_cond_timedwait:
je .Lreltmo
#endif
+#ifdef __CHKP__
+ bndldx (%esp,%ebx,1), %bnd0
+ bndldx 28(%esp,%ebp,1), %bnd2
+ bndmov %bnd0, 48(%esp)
+ bndmov %bnd2, 80(%esp)
+#endif
+
/* Get internal lock. */
movl $1, %edx
xorl %eax, %eax
@@ -109,12 +116,24 @@ __pthread_cond_timedwait:
different value in there this is a bad user bug. */
2: cmpl $-1, dep_mutex(%ebx)
movl 24(%esp), %eax
+#ifdef __CHKP__
+ bndldx 4(%esp,%eax,1), %bnd1
+ bndmov %bnd1, 64(%esp)
+#endif
je 17f
movl %eax, dep_mutex(%ebx)
/* Unlock the mutex. */
17: xorl %edx, %edx
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_unlock_usercnt
+#ifdef __CHKP__
+ bndmov 48(%esp), %bnd0
+ bndmov 64(%esp), %bnd1
+ bndmov 80(%esp), %bnd2
+#endif
testl %eax, %eax
jne 16f
@@ -296,9 +315,25 @@ __pthread_cond_timedwait:
should always succeed or else the kernel did not lock the mutex
correctly. */
movl dep_mutex(%ebx), %eax
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 48(%esp), %bnd0
+ bndmov 64(%esp), %bnd1
+ bndmov 80(%esp), %bnd2
+#endif
xorl %edx, %edx
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_unlock_usercnt
+#ifdef __CHKP__
+ bndmov 48(%esp), %bnd0
+ bndmov 64(%esp), %bnd1
+ bndmov 80(%esp), %bnd2
+#endif
jmp 8b
28: addl $1, wakeup_seq(%ebx)
@@ -356,8 +391,15 @@ __pthread_cond_timedwait:
movl 16(%esp), %ecx
testl %ecx, %ecx
jnz 27f
-
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_cond_lock
+#ifdef __CHKP__
+ bndmov 48(%esp), %bnd0
+ bndmov 64(%esp), %bnd1
+ bndmov 80(%esp), %bnd2
+#endif
26: addl $FRAME_SIZE, %esp
cfi_adjust_cfa_offset(-FRAME_SIZE)
@@ -388,7 +430,16 @@ __pthread_cond_timedwait:
cfi_restore_state
-27: call __pthread_mutex_cond_lock_adjust
+27:
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
+ call __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 48(%esp), %bnd0
+ bndmov 64(%esp), %bnd1
+ bndmov 80(%esp), %bnd2
+#endif
xorl %eax, %eax
jmp 26b
@@ -529,7 +580,15 @@ __pthread_cond_timedwait:
/* Unlock the mutex. */
117: xorl %edx, %edx
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_unlock_usercnt
+#ifdef __CHKP__
+ bndmov 48(%esp), %bnd0
+ bndmov 64(%esp), %bnd1
+ bndmov 80(%esp), %bnd2
+#endif
testl %eax, %eax
jne 16b
@@ -899,10 +958,27 @@ __condvar_tw_cleanup:
cmpl %ebx, %gs:TID
jne 8f
/* We managed to get the lock. Fix it up before returning. */
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 48(%esp), %bnd0
+ bndmov 64(%esp), %bnd1
+ bndmov 80(%esp), %bnd2
+#endif
jmp 9f
-8: call __pthread_mutex_cond_lock
+8:
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
+ call __pthread_mutex_cond_lock
+#ifdef __CHKP__
+ bndmov 48(%esp), %bnd0
+ bndmov 64(%esp), %bnd1
+ bndmov 80(%esp), %bnd2
+#endif
9: movl %esi, (%esp)
.LcallUR:
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
index 9695dcb..af53cbf 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
@@ -60,6 +60,10 @@ __pthread_cond_wait:
xorl %esi, %esi
movl 20(%esp), %ebx
+#ifdef __CHKP__
+ bndldx (%esp,%ebx,1), %bnd0
+ bndmov %bnd0, 32(%esp)
+#endif
LIBC_PROBE (cond_wait, 2, 24(%esp), %ebx)
@@ -78,12 +82,23 @@ __pthread_cond_wait:
different value in there this is a bad user bug. */
2: cmpl $-1, dep_mutex(%ebx)
movl 24(%esp), %eax
+#ifdef __CHKP__
+ bndldx 4(%esp,%eax,1), %bnd1
+ bndmov %bnd1, 48(%esp)
+#endif
je 15f
movl %eax, dep_mutex(%ebx)
/* Unlock the mutex. */
15: xorl %edx, %edx
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_unlock_usercnt
+#ifdef __CHKP__
+ bndmov 32(%esp), %bnd0
+ bndmov 48(%esp), %bnd1
+#endif
testl %eax, %eax
jne 12f
@@ -270,7 +285,14 @@ __pthread_cond_wait:
testl %ecx, %ecx
jnz 21f
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_cond_lock
+#ifdef __CHKP__
+ bndmov 32(%esp), %bnd0
+ bndmov 48(%esp), %bnd1
+#endif
20: addl $FRAME_SIZE, %esp
cfi_adjust_cfa_offset(-FRAME_SIZE);
@@ -292,7 +314,15 @@ __pthread_cond_wait:
cfi_restore_state
-21: call __pthread_mutex_cond_lock_adjust
+21:
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
+ call __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 32(%esp), %bnd0
+ bndmov 48(%esp), %bnd1
+#endif
xorl %eax, %eax
jmp 20b
@@ -308,9 +338,23 @@ __pthread_cond_wait:
should always succeed or else the kernel did not lock the mutex
correctly. */
movl dep_mutex(%ebx), %eax
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 32(%esp), %bnd0
+ bndmov 48(%esp), %bnd1
+#endif
xorl %edx, %edx
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_unlock_usercnt
+#ifdef __CHKP__
+ bndmov 32(%esp), %bnd0
+ bndmov 48(%esp), %bnd1
+#endif
jmp 8b
/* Initial locking failed. */
@@ -581,10 +625,25 @@ __condvar_w_cleanup:
cmpl %ebx, %gs:TID
jne 8f
/* We managed to get the lock. Fix it up before returning. */
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 32(%esp), %bnd0
+ bndmov 48(%esp), %bnd1
+#endif
jmp 9f
-8: call __pthread_mutex_cond_lock
+8:
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
+ call __pthread_mutex_cond_lock
+#ifdef __CHKP__
+ bndmov 32(%esp), %bnd0
+ bndmov 48(%esp), %bnd1
+#endif
9: movl %esi, (%esp)
.LcallUR:
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/pthread_once.S b/nptl/sysdeps/unix/sysv/linux/i386/pthread_once.S
index b405b9e..7104fba 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/pthread_once.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/pthread_once.S
@@ -114,6 +114,9 @@ __pthread_once:
jne 7f
leal 8(%esp), %eax
+#ifdef __CHKP__
+ bndldx 8(%esp,%eax,1), %bnd0
+#endif
call HIDDEN_JUMPTARGET(__pthread_register_cancel)
/* Call the user-provided initialization function. */
@@ -121,6 +124,9 @@ __pthread_once:
/* Pop the cleanup handler. */
leal 8(%esp), %eax
+#ifdef __CHKP__
+ bndldx 8(%esp,%eax,1), %bnd0
+#endif
call HIDDEN_JUMPTARGET(__pthread_unregister_cancel)
addl $UNWINDBUFSIZE+8, %esp
cfi_adjust_cfa_offset (-UNWINDBUFSIZE-8)
@@ -168,6 +174,9 @@ __pthread_once:
ENTER_KERNEL
leal 8(%esp), %eax
+#ifdef __CHKP__
+ bndldx 8(%esp,%eax,1), %bnd0
+#endif
call HIDDEN_JUMPTARGET (__pthread_unwind_next)
/* NOTREACHED */
hlt
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
index 6c1a75f..dc15345 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
@@ -99,6 +99,12 @@ __pthread_cond_timedwait:
movq %rsi, 16(%rsp)
movq %rdx, %r13
+#ifdef __CHKP__
+ bndmov %bnd0, 72(%rsp)
+ bndmov %bnd1, 88(%rsp)
+ bndmov %bnd2, 104(%rsp)
+#endif
+
je 22f
mov %RSI_LP, dep_mutex(%rdi)
@@ -128,7 +134,15 @@ __pthread_cond_timedwait:
/* Unlock the mutex. */
32: movq 16(%rsp), %rdi
xorl %esi, %esi
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
callq __pthread_mutex_unlock_usercnt
+#ifdef __CHKP__
+ bndmov 72(%rsp), %bnd0
+ bndmov 88(%rsp), %bnd1
+ bndmov 104(%rsp), %bnd2
+#endif
testl %eax, %eax
jne 46f
@@ -338,7 +352,15 @@ __pthread_cond_timedwait:
testb %r15b, %r15b
jnz 64f
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
callq __pthread_mutex_cond_lock
+#ifdef __CHKP__
+ bndmov 72(%rsp), %bnd0
+ bndmov 88(%rsp), %bnd1
+ bndmov 104(%rsp), %bnd2
+#endif
63: testq %rax, %rax
cmoveq %r14, %rax
@@ -362,7 +384,16 @@ __pthread_cond_timedwait:
cfi_restore_state
-64: callq __pthread_mutex_cond_lock_adjust
+64:
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
+ callq __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 72(%rsp), %bnd0
+ bndmov 88(%rsp), %bnd1
+ bndmov 104(%rsp), %bnd2
+#endif
movq %r14, %rax
jmp 48b
@@ -457,7 +488,15 @@ __pthread_cond_timedwait:
/* Unlock the mutex. */
2: movq 16(%rsp), %rdi
xorl %esi, %esi
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
callq __pthread_mutex_unlock_usercnt
+#ifdef __CHKP__
+ bndmov 72(%rsp), %bnd0
+ bndmov 88(%rsp), %bnd1
+ bndmov 104(%rsp), %bnd2
+#endif
testl %eax, %eax
jne 46b
@@ -786,7 +825,15 @@ __condvar_cleanup2:
cmpl %eax, %fs:TID
jne 7f
/* We managed to get the lock. Fix it up before returning. */
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
callq __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 72(%rsp), %bnd0
+ bndmov 88(%rsp), %bnd1
+ bndmov 104(%rsp), %bnd2
+#endif
jmp 8f
7: callq __pthread_mutex_cond_lock
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
index f0f6683..32b8d69 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
@@ -74,6 +74,11 @@ __pthread_cond_wait:
movq %rdi, 8(%rsp)
movq %rsi, 16(%rsp)
+#ifdef __CHKP__
+ bndmov %bnd0, 32(%rsp)
+ bndmov %bnd1, 48(%rsp)
+#endif
+
je 15f
mov %RSI_LP, dep_mutex(%rdi)
@@ -91,7 +96,14 @@ __pthread_cond_wait:
/* Unlock the mutex. */
2: movq 16(%rsp), %rdi
xorl %esi, %esi
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
callq __pthread_mutex_unlock_usercnt
+#ifdef __CHKP__
+ bndmov 32(%rsp), %bnd0
+ bndmov 48(%rsp), %bnd1
+#endif
testl %eax, %eax
jne 12f
@@ -256,7 +268,14 @@ __pthread_cond_wait:
testb %r8b, %r8b
jnz 18f
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
callq __pthread_mutex_cond_lock
+#ifdef __CHKP__
+ bndmov 32(%rsp), %bnd0
+ bndmov 48(%rsp), %bnd1
+#endif
14: leaq FRAME_SIZE(%rsp), %rsp
cfi_adjust_cfa_offset(-FRAME_SIZE)
@@ -266,7 +285,15 @@ __pthread_cond_wait:
cfi_adjust_cfa_offset(FRAME_SIZE)
-18: callq __pthread_mutex_cond_lock_adjust
+18:
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
+ callq __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 32(%rsp), %bnd0
+ bndmov 48(%rsp), %bnd1
+#endif
xorl %eax, %eax
jmp 14b
@@ -510,10 +537,16 @@ __condvar_cleanup1:
cmpl %eax, %fs:TID
jne 7f
/* We managed to get the lock. Fix it up before returning. */
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
callq __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 32(%rsp), %bnd0
+ bndmov 48(%rsp), %bnd1
+#endif
jmp 8f
-
7: callq __pthread_mutex_cond_lock
8: movq 24(%rsp), %rdi
diff --git a/sysdeps/gnu/unwind-resume.c b/sysdeps/gnu/unwind-resume.c
index df845cd..19e06b2 100644
--- a/sysdeps/gnu/unwind-resume.c
+++ b/sysdeps/gnu/unwind-resume.c
@@ -46,6 +46,9 @@ init (void)
void
_Unwind_Resume (struct _Unwind_Exception *exc)
{
+#ifdef __CHKP__
+ exc = (struct _Unwind_Exception *) __bnd_set_ptr_bounds (exc, sizeof (struct _Unwind_Exception));
+#endif
if (__builtin_expect (libgcc_s_resume == NULL, 0))
init ();
libgcc_s_resume (exc);
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=b98cec04223e2dc8191af5b1ee85d0f49a9eca51
commit b98cec04223e2dc8191af5b1ee85d0f49a9eca51
Author: Liubov Dmitrieva <ldmitrie@sourceware.org>
Date: Thu Aug 29 16:33:47 2013 +0400
Buffer overrun detected by Intel MPX in wcschr test. Fixed.
diff --git a/string/test-strchr.c b/string/test-strchr.c
index cbcf53e..572671f 100644
--- a/string/test-strchr.c
+++ b/string/test-strchr.c
@@ -219,9 +219,14 @@ do_random_tests (void)
static void
check1 (void)
{
- char s[] __attribute__((aligned(16))) = "\xff";
- char c = '\xfe';
- char *exp_result = stupid_STRCHR (s, c);
+ CHAR s[] __attribute__((aligned(16))) =
+#ifdef WIDE
+ L"\xff";
+#else
+ "\xff";
+#endif
+ CHAR c = '\xfe';
+ CHAR *exp_result = stupid_STRCHR (s, c);
FOR_EACH_IMPL (impl, 0)
check_result (impl, s, c, exp_result);
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=b825d28a47d31d525fa1042257a88a705545268a
commit b825d28a47d31d525fa1042257a88a705545268a
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Sat Dec 22 20:51:45 2012 +0400
[BZ 15698] Buffer overrun detected by Intel MPX at sysdeps/unix/sysv/linux/ifaddrs.c
diff --git a/sysdeps/unix/sysv/linux/ifaddrs.c b/sysdeps/unix/sysv/linux/ifaddrs.c
index 89fda15..4f5f7b5 100644
--- a/sysdeps/unix/sysv/linux/ifaddrs.c
+++ b/sysdeps/unix/sysv/linux/ifaddrs.c
@@ -782,9 +782,11 @@ getifaddrs_internal (struct ifaddrs **ifap)
for (i = 0; i < (preflen / 8); i++)
*cp++ = 0xff;
- c = 0xff;
- c <<= (8 - (preflen % 8));
- *cp = c;
+ if (preflen < max_prefixlen) {
+ c = 0xff;
+ c <<= (8 - (preflen % 8));
+ *cp = c;
+ }
}
}
}
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=980b18db54bf573d5352cd79c0a1985f59b171e5
commit 980b18db54bf573d5352cd79c0a1985f59b171e5
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Wed Dec 19 18:56:40 2012 +0400
Buffer overrun detected by Intel MPX in stdio-common/scanf13.c. Fixed.
diff --git a/stdio-common/scanf13.c b/stdio-common/scanf13.c
index 720224a..aa58dd5 100644
--- a/stdio-common/scanf13.c
+++ b/stdio-common/scanf13.c
@@ -59,6 +59,7 @@ main (void)
}
memset (buf, '/', sizeof (buf));
+ buf[sizeof(buf) - 1] = 0;
buf[0] = '\t';
buf[1] = ' ';
buf[2] = 0xc3;
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=3b564b91f33e3ca230eec121514ee505d0d54437
commit 3b564b91f33e3ca230eec121514ee505d0d54437
Author: ienkovic <ilya.enkovich@intel.com>
Date: Tue Dec 25 15:16:28 2012 +0400
Do not block SIGSEGV signal because Intel MPX runtime uses it.
diff --git a/nptl/sysdeps/pthread/gai_misc.h b/nptl/sysdeps/pthread/gai_misc.h
index 6026085..46305ca 100644
--- a/nptl/sysdeps/pthread/gai_misc.h
+++ b/nptl/sysdeps/pthread/gai_misc.h
@@ -82,6 +82,9 @@ __gai_start_notify_thread (void)
sigset_t ss;
sigemptyset (&ss);
INTERNAL_SYSCALL_DECL (err);
+#ifdef __CHKP__
+ __sigdelset(&ss, SIGSEGV);
+#endif
INTERNAL_SYSCALL (rt_sigprocmask, err, 4, SIG_SETMASK, &ss, NULL, _NSIG / 8);
}
@@ -106,6 +109,9 @@ __gai_create_helper_thread (pthread_t *threadp, void *(*tf) (void *),
sigset_t oss;
sigfillset (&ss);
INTERNAL_SYSCALL_DECL (err);
+#ifdef __CHKP__
+ __sigdelset(&ss, SIGSEGV);
+#endif
INTERNAL_SYSCALL (rt_sigprocmask, err, 4, SIG_SETMASK, &ss, &oss, _NSIG / 8);
int ret = pthread_create (threadp, &attr, tf, arg);
diff --git a/nptl/sysdeps/unix/sysv/linux/aio_misc.h b/nptl/sysdeps/unix/sysv/linux/aio_misc.h
index 2649dc1..3994f98 100644
--- a/nptl/sysdeps/unix/sysv/linux/aio_misc.h
+++ b/nptl/sysdeps/unix/sysv/linux/aio_misc.h
@@ -32,6 +32,9 @@ __aio_start_notify_thread (void)
sigset_t ss;
sigemptyset (&ss);
INTERNAL_SYSCALL_DECL (err);
+#ifdef __CHKP__
+ __sigdelset(&ss, SIGSEGV);
+#endif
INTERNAL_SYSCALL (rt_sigprocmask, err, 4, SIG_SETMASK, &ss, NULL, _NSIG / 8);
}
@@ -54,6 +57,9 @@ __aio_create_helper_thread (pthread_t *threadp, void *(*tf) (void *),
sigset_t oss;
sigfillset (&ss);
INTERNAL_SYSCALL_DECL (err);
+#ifdef __CHKP__
+ __sigdelset(&ss, SIGSEGV);
+#endif
INTERNAL_SYSCALL (rt_sigprocmask, err, 4, SIG_SETMASK, &ss, &oss, _NSIG / 8);
int ret = pthread_create (threadp, &attr, tf, arg);
diff --git a/nptl/sysdeps/unix/sysv/linux/mq_notify.c b/nptl/sysdeps/unix/sysv/linux/mq_notify.c
index 6bc34ba..b9250df 100644
--- a/nptl/sysdeps/unix/sysv/linux/mq_notify.c
+++ b/nptl/sysdeps/unix/sysv/linux/mq_notify.c
@@ -78,6 +78,9 @@ change_sigmask (int how, sigset_t *oss)
{
sigset_t ss;
sigfillset (&ss);
+#ifdef __CHKP__
+ sigdelset (&ss, SIGSEGV);
+#endif
return pthread_sigmask (how, &ss, oss);
}
diff --git a/nptl/sysdeps/unix/sysv/linux/timer_routines.c b/nptl/sysdeps/unix/sysv/linux/timer_routines.c
index 57f115f..1979adc 100644
--- a/nptl/sysdeps/unix/sysv/linux/timer_routines.c
+++ b/nptl/sysdeps/unix/sysv/linux/timer_routines.c
@@ -174,6 +174,9 @@ __start_helper_thread (void)
sigset_t oss;
sigfillset (&ss);
__sigaddset (&ss, SIGCANCEL);
+#ifdef __CHKP__
+ __sigdelset (&ss, SIGSEGV);
+#endif
INTERNAL_SYSCALL_DECL (err);
INTERNAL_SYSCALL (rt_sigprocmask, err, 4, SIG_SETMASK, &ss, &oss, _NSIG / 8);
diff --git a/nptl/tst-cancel7.c b/nptl/tst-cancel7.c
index ad40b9c..7e8a860 100644
--- a/nptl/tst-cancel7.c
+++ b/nptl/tst-cancel7.c
@@ -65,6 +65,9 @@ sl (void)
sigset_t ss;
sigfillset (&ss);
+#ifdef __CHKP__
+ sigdelset (&ss, SIGSEGV);
+#endif
sigsuspend (&ss);
exit (0);
}
diff --git a/nptl/tst-signal1.c b/nptl/tst-signal1.c
index 81dd161..0345701 100644
--- a/nptl/tst-signal1.c
+++ b/nptl/tst-signal1.c
@@ -68,6 +68,9 @@ receiver (void)
sigfillset (&ss);
+#ifdef __CHKP__
+ sigdelset(&ss, SIGSEGV);
+#endif
if (pthread_sigmask (SIG_SETMASK, &ss, NULL) != 0)
{
puts ("1st pthread_sigmask failed");
diff --git a/nptl/tst-signal2.c b/nptl/tst-signal2.c
index 87f3bb8..23cda43 100644
--- a/nptl/tst-signal2.c
+++ b/nptl/tst-signal2.c
@@ -71,6 +71,9 @@ receiver (void)
alarm (10);
sigfillset (&ss);
+#ifdef __CHKP__
+ sigdelset(&ss, SIGSEGV);
+#endif
if (pthread_sigmask (SIG_SETMASK, &ss, NULL) != 0)
{
diff --git a/nptl/tst-signal3.c b/nptl/tst-signal3.c
index fc34f66..ae5fea6 100644
--- a/nptl/tst-signal3.c
+++ b/nptl/tst-signal3.c
@@ -96,6 +96,9 @@ do_test (void)
/* Block all signals. */
sigset_t ss;
sigfillset (&ss);
+#ifdef __CHKP__
+ sigdelset(&ss, SIGSEGV);
+#endif
th_main = pthread_self ();
@@ -118,6 +121,9 @@ do_test (void)
};
sigfillset (&sa.sa_mask);
+#ifdef __CHKP__
+ sigdelset(&ss, SIGSEGV);
+#endif
if (sigaction (sig0 + i, &sa, NULL) != 0)
{
printf ("sigaction for signal %d failed\n", i);
diff --git a/sysdeps/posix/profil.c b/sysdeps/posix/profil.c
index 86d36a9..28613af 100644
--- a/sysdeps/posix/profil.c
+++ b/sysdeps/posix/profil.c
@@ -106,6 +106,9 @@ __profil (u_short *sample_buffer, size_t size, size_t offset, u_int scale)
act.sa_handler = (sighandler_t) &profil_counter;
act.sa_flags = SA_RESTART;
__sigfillset (&act.sa_mask);
+#ifdef __CHKP__
+ __sigdelset (&act.sa_mask, SIGSEGV);
+#endif
if (__sigaction (SIGPROF, &act, oact_ptr) < 0)
return -1;
diff --git a/sysdeps/posix/sigwait.c b/sysdeps/posix/sigwait.c
index b0ea14d..a980647 100644
--- a/sysdeps/posix/sigwait.c
+++ b/sysdeps/posix/sigwait.c
@@ -42,11 +42,17 @@ do_sigwait (const sigset_t *set, int *sig)
/* Prepare set. */
__sigfillset (&tmp_mask);
+#ifdef __CHKP__
+ __sigdelset (&tmp_mask, SIGSEGV):
+#endif
/* Unblock all signals in the SET and register our nice handler. */
action.sa_handler = ignore_signal;
action.sa_flags = 0;
__sigfillset (&action.sa_mask); /* Block all signals for handler. */
+#ifdef __CHKP__
+ __sigdelset (&action.sa_mask, SIGSEGV):
+#endif
/* Make sure we recognize error conditions by setting WAS_SIG to a
value which does not describe a legal signal number. */
diff --git a/sysdeps/posix/sprofil.c b/sysdeps/posix/sprofil.c
index 1447a4f..42c43cd 100644
--- a/sysdeps/posix/sprofil.c
+++ b/sysdeps/posix/sprofil.c
@@ -339,6 +339,9 @@ __sprofil (struct prof *profp, int profcnt, struct timeval *tvp,
act.sa_handler = (sighandler_t) &profil_counter_ushort;
act.sa_flags = SA_RESTART;
__sigfillset (&act.sa_mask);
+#ifdef __CHKP__
+ __sigdelset (&act.sa_mask, SIGSEGV);
+#endif
if (__sigaction (SIGPROF, &act, &prof_info.saved_action) < 0)
return -1;
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=c4f6e8dc89935bbd7b3170b20590ab94a62c7cc2
commit c4f6e8dc89935bbd7b3170b20590ab94a62c7cc2
Author: Liubov Dmitrieva <ldmitrie@sourceware.org>
Date: Thu Aug 29 17:08:14 2013 +0400
Inappropriate code style for Intel MPX in string/strcpy.c and wcsmbc/wcscpy.c
Fix the code if MPX is enabled.
diff --git a/string/strcpy.c b/string/strcpy.c
index b71f753..04278ec 100644
--- a/string/strcpy.c
+++ b/string/strcpy.c
@@ -26,6 +26,7 @@ char *
strcpy (dest, src)
char *dest;
const char *src;
+#ifndef __CHKP__
{
char c;
char *s = (char *) src;
@@ -40,4 +41,12 @@ strcpy (dest, src)
return dest;
}
+#else
+{
+ const char *ret = dest;
+ while ((*dest++ = *src++) != '\0');
+ return ret;
+}
+#endif
+
libc_hidden_builtin_def (strcpy)
diff --git a/wcsmbs/wcscpy.c b/wcsmbs/wcscpy.c
index 3b1e0c6..3113cf5 100644
--- a/wcsmbs/wcscpy.c
+++ b/wcsmbs/wcscpy.c
@@ -25,6 +25,7 @@ wchar_t *
wcscpy (dest, src)
wchar_t *dest;
const wchar_t *src;
+#ifndef __CHKP__
{
wint_t c;
wchar_t *wcp;
@@ -56,3 +57,11 @@ wcscpy (dest, src)
return dest;
}
+#else
+{
+ const wchar_t *ret = dest;
+ while ((*dest++ = *src++) != L'\0');
+ return ret;
+
+}
+#endif
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=26ad8ed923d04dc3dc107ba3d92cb369cb5f7d74
commit 26ad8ed923d04dc3dc107ba3d92cb369cb5f7d74
Author: Liubov Dmitrieva <ldmitrie@sourceware.org>
Date: Thu Aug 29 19:25:35 2013 +0400
Inappropriate code style for Intel MPX in debug/wcscpy_chk.c. Fix the code if MPX is enabled.
diff --git a/debug/wcscpy_chk.c b/debug/wcscpy_chk.c
index 61092c3..3e6d185 100644
--- a/debug/wcscpy_chk.c
+++ b/debug/wcscpy_chk.c
@@ -23,6 +23,7 @@
/* Copy SRC to DEST. */
wchar_t *
__wcscpy_chk (wchar_t *dest, const wchar_t *src, size_t n)
+#ifndef __CHKP__
{
wint_t c;
wchar_t *wcp;
@@ -58,3 +59,22 @@ __wcscpy_chk (wchar_t *dest, const wchar_t *src, size_t n)
return dest;
}
+#else
+{
+ const wchar_t *result = dest;
+ dest--;
+ wint_t c;
+
+ do
+ {
+ if (__builtin_expect (n-- == 0, 0))
+ __chk_fail ();
+ c = src[0];
+ *++dest = c;
+ ++src;
+ }
+ while (c != L'\0');
+
+ return result;
+}
+#endif
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=2e0268156ce3896e0f26a7d624c49e408f55c862
commit 2e0268156ce3896e0f26a7d624c49e408f55c862
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Mon Mar 11 17:06:38 2013 +0400
Inappropriate code style for Intel MPX in debug/wcpcpy_chk. Fix the code if MPX is enabled.
diff --git a/debug/wcpcpy_chk.c b/debug/wcpcpy_chk.c
index 7c836e6..d90f293 100644
--- a/debug/wcpcpy_chk.c
+++ b/debug/wcpcpy_chk.c
@@ -26,6 +26,7 @@
DEST. Check for overflows. */
wchar_t *
__wcpcpy_chk (wchar_t *dest, const wchar_t *src, size_t destlen)
+#ifndef __CHKP__
{
wchar_t *wcp = (wchar_t *) dest - 1;
wint_t c;
@@ -42,3 +43,21 @@ __wcpcpy_chk (wchar_t *dest, const wchar_t *src, size_t destlen)
return wcp;
}
+#else
+{
+ dest--;
+ wint_t c;
+
+ do
+ {
+ if (__builtin_expect (destlen-- == 0, 0))
+ __chk_fail ();
+ c = src[0];
+ *++dest = c;
+ ++src;
+ }
+ while (c != L'\0');
+
+ return dest;
+}
+#endif
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=76e7a112597a0de1ba0ee505311f8fe10654b041
commit 76e7a112597a0de1ba0ee505311f8fe10654b041
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Mon May 27 18:54:53 2013 +0400
Inappropriate code style for Intel MPX at wcsmbs/wcpcpy.c. Use other implementation if MPX is enabled.
diff --git a/wcsmbs/wcpcpy.c b/wcsmbs/wcpcpy.c
index 6f952b4..4c541b1 100644
--- a/wcsmbs/wcpcpy.c
+++ b/wcsmbs/wcpcpy.c
@@ -18,8 +18,9 @@
#include <wchar.h>
-#define __need_ptrdiff_t
-#include <stddef.h>
+#ifndef __CHKP__
+# define __need_ptrdiff_t
+# include <stddef.h>
/* Copy SRC to DEST, returning the address of the terminating L'\0' in
@@ -42,5 +43,14 @@ __wcpcpy (dest, src)
return wcp;
}
+#else
+wchar_t *
+__wcpcpy (wchar_t *dst, const wchar_t *src)
+{
+ while ((*dst++ = *src++) != L'\0');
+ return dst - 1;
+}
+
+#endif
weak_alias (__wcpcpy, wcpcpy)
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=d77ddb29368d694593f1b2a3347a9c5d64c07eb9
commit d77ddb29368d694593f1b2a3347a9c5d64c07eb9
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Thu Dec 20 18:46:38 2012 +0400
Inappropriate code style for Intel MPX at posix/fnmatch_loop.c. Fixed.
diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c
index 078b982..802eb18 100644
--- a/posix/fnmatch_loop.c
+++ b/posix/fnmatch_loop.c
@@ -313,7 +313,7 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
/* Invalid character class name. */
return FNM_NOMATCH;
-# if defined _LIBC && ! WIDE_CHAR_VERSION
+# if defined _LIBC && ! WIDE_CHAR_VERSION && !defined __CHKP__
/* The following code is glibc specific but does
there a good job in speeding up the code since
we can avoid the btowc() call. */
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=785be3d06b090e255e108239dcc3dc9a85056353
commit 785be3d06b090e255e108239dcc3dc9a85056353
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Thu Dec 20 18:23:10 2012 +0400
Inappropriate code style for Intel MPX at argp/argp-help.c. Fixed.
diff --git a/argp/argp-help.c b/argp/argp-help.c
index ace71b4..8054785 100644
--- a/argp/argp-help.c
+++ b/argp/argp-help.c
@@ -867,7 +867,10 @@ hol_append (struct hol *hol, struct hol *more)
/* Fix up the short options pointers from HOL. */
for (e = entries, left = hol->num_entries; left > 0; e++, left--)
- e->short_options += (short_options - hol->short_options);
+ {
+ unsigned long offset = e->short_options - hol->short_options;
+ e->short_options = (char *)(short_options + offset);
+ }
/* Now add the short options from MORE, fixing up its entries
too. */
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=56e443131aca7b05c0220c195cd7500a5f8a7803
commit 56e443131aca7b05c0220c195cd7500a5f8a7803
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Wed Dec 19 17:03:44 2012 +0400
Inappropriate code style for Intel MPX. Expand bounds in crypt/crypt.c
diff --git a/crypt/crypt.c b/crypt/crypt.c
index e429950..96ec2eb 100644
--- a/crypt/crypt.c
+++ b/crypt/crypt.c
@@ -43,7 +43,13 @@ _ufc_doit_r(itr, __data, res)
int i;
long32 s, *k;
long32 *sb01 = (long32*)__data->sb0;
+#ifdef __CHKP__
+ sb01 = __bnd_set_ptr_bounds (sb01, sizeof(__data->sb0) + sizeof(__data->sb1));
+#endif
long32 *sb23 = (long32*)__data->sb2;
+#ifdef __CHKP__
+ sb23 = __bnd_set_ptr_bounds (sb23, sizeof(__data->sb2) + sizeof(__data->sb3));
+#endif
long32 l1, l2, r1, r2;
l1 = (long32)res[0]; l2 = (long32)res[1];
@@ -89,7 +95,13 @@ _ufc_doit_r(itr, __data, res)
int i;
long64 l, r, s, *k;
long64 *sb01 = (long64*)__data->sb0;
+#ifdef __CHKP__
+ sb01 = __bnd_set_ptr_bounds (sb01, sizeof(__data->sb0) + sizeof(__data->sb1));
+#endif
long64 *sb23 = (long64*)__data->sb2;
+#ifdef __CHKP__
+ sb23 = __bnd_set_ptr_bounds (sb23, sizeof(__data->sb2) + sizeof(__data->sb3));
+#endif
l = (((long64)res[0]) << 32) | ((long64)res[1]);
r = (((long64)res[2]) << 32) | ((long64)res[3]);
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=ebf5b900bbee969751db9d132024acecd28fbf62
commit ebf5b900bbee969751db9d132024acecd28fbf62
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Wed Dec 19 14:55:21 2012 +0400
Inappropriate code style for Intel MPX in libio/fileops.c.
Use INIT (maximum) bounds as it is hard to rewrite the algorithm.
diff --git a/libio/fileops.c b/libio/fileops.c
index e92f85b..a17504b 100644
--- a/libio/fileops.c
+++ b/libio/fileops.c
@@ -758,6 +758,9 @@ decide_maybe_mmap (_IO_FILE *fp)
void *p;
p = __mmap64 (NULL, st.st_size, PROT_READ, MAP_SHARED, fp->_fileno, 0);
+#ifdef __CHKP__
+ p = __bnd_init_ptr_bounds(p);
+#endif
if (p != MAP_FAILED)
{
/* OK, we managed to map the file. Set the buffer up and use a
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=af1d2d1ffb534abeadb2a82365f0b6ef6fc96e3a
commit af1d2d1ffb534abeadb2a82365f0b6ef6fc96e3a
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Thu Nov 8 16:35:39 2012 +0400
Inappropriate code style for Intel MPX in elf/dl-close.c
A cast implies memory access with bounds violation.
Let allow that.
diff --git a/elf/dl-close.c b/elf/dl-close.c
index fe3014c..15775ec 100644
--- a/elf/dl-close.c
+++ b/elf/dl-close.c
@@ -347,6 +347,10 @@ _dl_close_worker (struct link_map *map)
struct link_map *tmap = (struct link_map *)
((char *) imap->l_scope[cnt]
- offsetof (struct link_map, l_searchlist));
+#ifdef __CHKP__
+ tmap = __bnd_set_ptr_bounds(tmap, sizeof(struct link_map));
+#endif
+
assert (tmap->l_ns == nsid);
if (tmap->l_idx == IDX_STILL_USED)
++remain;
@@ -393,6 +397,9 @@ _dl_close_worker (struct link_map *map)
struct link_map *tmap = (struct link_map *)
((char *) imap->l_scope[cnt]
- offsetof (struct link_map, l_searchlist));
+#ifdef __CHKP__
+ tmap = __bnd_set_ptr_bounds(tmap, sizeof(struct link_map));
+#endif
if (tmap->l_idx != IDX_STILL_USED)
{
/* Remove the scope. Or replace with own map's
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=fcad0ab9dc3deded2cfd8e7bc467e204d8b55a63
commit fcad0ab9dc3deded2cfd8e7bc467e204d8b55a63
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Tue Dec 18 19:42:52 2012 +0400
Inappropriate code style for Intel MPX in crypt/crypt_util.c. Fixed.
diff --git a/crypt/crypt_util.c b/crypt/crypt_util.c
index 2409079..8b58668 100644
--- a/crypt/crypt_util.c
+++ b/crypt/crypt_util.c
@@ -487,7 +487,7 @@ small_tables_done:
* DES round.
*
*/
-
+#ifndef __CHKP__
if (__data->sb0 + sizeof (__data->sb0) == __data->sb1
&& __data->sb1 + sizeof (__data->sb1) == __data->sb2
&& __data->sb2 + sizeof (__data->sb2) == __data->sb3)
@@ -497,11 +497,14 @@ small_tables_done:
+ (int)sizeof(__data->sb2)
+ (int)sizeof(__data->sb3));
else {
+#endif
_ufc_clearmem(__data->sb0, (int)sizeof(__data->sb0));
_ufc_clearmem(__data->sb1, (int)sizeof(__data->sb1));
_ufc_clearmem(__data->sb2, (int)sizeof(__data->sb2));
_ufc_clearmem(__data->sb3, (int)sizeof(__data->sb3));
+#ifndef __CHKP__
}
+#endif
for(sg = 0; sg < 4; sg++) {
int j1, j2;
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=0a6a6f8037cc93a6d165d925d6c029fe42998acc
commit 0a6a6f8037cc93a6d165d925d6c029fe42998acc
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Mon Oct 15 15:01:09 2012 +0400
Inappropriate code style for Intel MPX. Fix missing of bounds in sysdeps/generic/unwind-dw2-fde.h
diff --git a/sysdeps/generic/unwind-dw2-fde.h b/sysdeps/generic/unwind-dw2-fde.h
index fad46bf..7fce24c 100644
--- a/sysdeps/generic/unwind-dw2-fde.h
+++ b/sysdeps/generic/unwind-dw2-fde.h
@@ -147,7 +147,7 @@ typedef struct dwarf_fde fde;
static inline struct dwarf_cie *
get_cie (struct dwarf_fde *f)
{
- return (void *)&f->CIE_delta - f->CIE_delta;
+ return (char *)f + offsetof (struct dwarf_fde, CIE_delta) - f->CIE_delta;
}
static inline fde *
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=01d45f7e37130cb47d0ef788283e99bc07564f34
commit 01d45f7e37130cb47d0ef788283e99bc07564f34
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Fri Dec 14 18:41:37 2012 +0400
Inappropriate code style for Intel MPX in debug/strcpy_chk.c Use different version if MPX enabled.
diff --git a/debug/strcpy_chk.c b/debug/strcpy_chk.c
index 81bf46f..ba6da70 100644
--- a/debug/strcpy_chk.c
+++ b/debug/strcpy_chk.c
@@ -27,6 +27,7 @@ __strcpy_chk (dest, src, destlen)
char *dest;
const char *src;
size_t destlen;
+#ifndef __CHKP__
{
char c;
char *s = (char *) src;
@@ -65,3 +66,45 @@ __strcpy_chk (dest, src, destlen)
return dest;
}
+#else
+{
+ char c;
+ char *s = (char *) src;
+ char *d = (char *) dest;
+
+ while (__builtin_expect (destlen >= 4, 0))
+ {
+ c = s[0];
+ d[0] = c;
+ if (c == '\0')
+ return dest;
+ c = s[1];
+ d[1] = c;
+ if (c == '\0')
+ return dest;
+ c = s[2];
+ d[2] = c;
+ if (c == '\0')
+ return dest;
+ c = s[3];
+ d[3] = c;
+ if (c == '\0')
+ return dest;
+ destlen -= 4;
+ d += 4;
+ s += 4;
+ }
+
+ do
+ {
+ if (__builtin_expect (destlen-- == 0, 0))
+ __chk_fail ();
+ c = *s;
+ *(d++) = c;
+ s++;
+ }
+ while (c != '\0');
+
+ return dest;
+}
+#endif
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=00ae469c06aeed1e7bd988875d241cc5a6339d01
commit 00ae469c06aeed1e7bd988875d241cc5a6339d01
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Fri Nov 23 18:50:27 2012 +0400
If Intel MPX enabled: always compile with -fno-check-pointers file elf/dl-init.c
because this file contains the code excecuting before runtime library
initialization happens.
diff --git a/elf/Makefile b/elf/Makefile
index 3b58649..4ef80c9 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -49,6 +49,7 @@ all-rtld-routines = $(rtld-routines) $(sysdep-rtld-routines)
CFLAGS-dl-runtime.c = -fexceptions -fasynchronous-unwind-tables
CFLAGS-dl-lookup.c = -fexceptions -fasynchronous-unwind-tables
CFLAGS-dl-iterate-phdr.c = $(uses-callbacks)
+CFLAGS-dl-init.c = -fno-check-pointers
ifeq ($(unwind-find-fde),yes)
routines += unwind-dw2-fde-glibc
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=4cd77a6b091db5450ec634eeaeab8e36ea3bb1dd
commit 4cd77a6b091db5450ec634eeaeab8e36ea3bb1dd
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Mon Dec 17 13:44:21 2012 +0400
Add attribute __bnd_variable_size to make using flexible size arrays Intel MPX complient.
diff --git a/bits/dirent.h b/bits/dirent.h
index 2117a7c..77cae84 100644
--- a/bits/dirent.h
+++ b/bits/dirent.h
@@ -32,7 +32,7 @@ struct dirent
unsigned char d_namlen; /* Length of the file name. */
/* Only this member is in the POSIX standard. */
- char d_name[1]; /* File name (actually longer). */
+ char d_name[1] __attribute__((bnd_variable_size)); /* File name (actually longer). */
};
#ifdef __USE_LARGEFILE64
@@ -43,7 +43,7 @@ struct dirent64
unsigned char d_type;
unsigned char d_namlen;
- char d_name[1];
+ char d_name[1] __attribute__((bnd_variable_size));
};
#endif
diff --git a/bits/sched.h b/bits/sched.h
index 0c200a9..0a9513a 100644
--- a/bits/sched.h
+++ b/bits/sched.h
@@ -65,7 +65,7 @@ typedef unsigned long int __cpu_mask;
/* Data structure to describe CPU mask. */
typedef struct
{
- __cpu_mask __bits[__CPU_SETSIZE / __NCPUBITS];
+ __cpu_mask __bits[__CPU_SETSIZE / __NCPUBITS] __attribute__((bnd_variable_size));
} cpu_set_t;
/* Access functions for CPU masks. */
diff --git a/debug/tst-chk1.c b/debug/tst-chk1.c
index 6ca8d9d..9783d3a 100644
--- a/debug/tst-chk1.c
+++ b/debug/tst-chk1.c
@@ -137,8 +137,8 @@ do_test (void)
}
setenv ("LIBC_FATAL_STDERR_", "1", 1);
- struct A { char buf1[9]; char buf2[1]; } a;
- struct wA { wchar_t buf1[9]; wchar_t buf2[1]; } wa;
+ struct A { char buf1[9] __attribute__((bnd_variable_size)); char buf2[1]; } a;
+ struct wA { wchar_t buf1[9] __attribute__((bnd_variable_size)); wchar_t buf2[1]; } wa;
printf ("Test checking routines at fortify level %d\n",
#ifdef __USE_FORTIFY_LEVEL
diff --git a/dlfcn/dlfcn.h b/dlfcn/dlfcn.h
index 1ed47b1..0fab755 100644
--- a/dlfcn/dlfcn.h
+++ b/dlfcn/dlfcn.h
@@ -180,7 +180,7 @@ typedef struct
{
size_t dls_size; /* Size in bytes of the whole buffer. */
unsigned int dls_cnt; /* Number of elements in `dls_serpath'. */
- Dl_serpath dls_serpath[1]; /* Actually longer, dls_cnt elements. */
+ Dl_serpath dls_serpath[1] __attribute__((bnd_variable_size)); /* Actually longer, dls_cnt elements. */
} Dl_serinfo;
#endif /* __USE_GNU */
diff --git a/include/link.h b/include/link.h
index 1682467..ca253eb 100644
--- a/include/link.h
+++ b/include/link.h
@@ -318,7 +318,7 @@ struct link_map
{
uintptr_t cookie;
unsigned int bindflags;
- } l_audit[0];
+ } l_audit[0] __attribute__((bnd_variable_size));
};
diff --git a/inet/netinet/in.h b/inet/netinet/in.h
index 89e3813..12294d0 100644
--- a/inet/netinet/in.h
+++ b/inet/netinet/in.h
@@ -319,7 +319,7 @@ struct ip_msfilter
/* Number of source addresses. */
uint32_t imsf_numsrc;
/* Source addresses. */
- struct in_addr imsf_slist[1];
+ struct in_addr imsf_slist[1] __attribute__((bnd_variable_size));
};
#define IP_MSFILTER_SIZE(numsrc) (sizeof (struct ip_msfilter) \
@@ -340,7 +340,7 @@ struct group_filter
/* Number of source addresses. */
uint32_t gf_numsrc;
/* Source addresses. */
- struct sockaddr_storage gf_slist[1];
+ struct sockaddr_storage gf_slist[1] __attribute__((bnd_variable_size));
};
#define GROUP_FILTER_SIZE(numsrc) (sizeof (struct group_filter) \
diff --git a/inet/protocols/routed.h b/inet/protocols/routed.h
index befd865..457d792 100644
--- a/inet/protocols/routed.h
+++ b/inet/protocols/routed.h
@@ -52,8 +52,8 @@ struct rip {
u_char rip_vers; /* protocol version # */
u_char rip_res1[2]; /* pad to 32-bit boundary */
union {
- struct netinfo ru_nets[1]; /* variable length... */
- char ru_tracefile[1]; /* ditto ... */
+ struct netinfo ru_nets[1] __attribute__((bnd_variable_size)); /* variable length... */
+ char ru_tracefile[1] __attribute__((bnd_variable_size)); /* ditto ... */
} ripun;
#define rip_nets ripun.ru_nets
#define rip_tracefile ripun.ru_tracefile
diff --git a/intl/dcigettext.c b/intl/dcigettext.c
index f4aa215..9885a13 100644
--- a/intl/dcigettext.c
+++ b/intl/dcigettext.c
@@ -204,7 +204,7 @@ struct known_translation_t
/* Pointer to the string in question. */
union
{
- char appended[ZERO]; /* used if domain != NULL */
+ char appended[ZERO] __attribute__((bnd_variable_size)); /* used if domain != NULL */
const char *ptr; /* used if domain == NULL */
}
msgid;
@@ -342,7 +342,7 @@ struct block_list
typedef struct transmem_list
{
struct transmem_list *next;
- char data[ZERO];
+ char data[ZERO] __attribute__((bnd_variable_size));
} transmem_block_t;
static struct transmem_list *transmem_list;
#else
diff --git a/intl/gettextP.h b/intl/gettextP.h
index d1ec644..79f0a4c 100644
--- a/intl/gettextP.h
+++ b/intl/gettextP.h
@@ -160,7 +160,7 @@ struct binding
struct binding *next;
char *dirname;
char *codeset;
- char domainname[ZERO];
+ char domainname[ZERO] __attribute__((bnd_variable_size));
};
/* A counter which is incremented each time some previous translations
diff --git a/intl/gmo.h b/intl/gmo.h
index 7b50597..b4c48cc 100644
--- a/intl/gmo.h
+++ b/intl/gmo.h
@@ -137,7 +137,7 @@ struct sysdep_string
nls_uint32 segsize;
/* Reference to system dependent string segment, or ~0 at the end. */
nls_uint32 sysdepref;
- } segments[1];
+ } segments[1] __attribute__((bnd_variable_size));
};
/* Marker for the end of the segments[] array. This has the value 0xFFFFFFFF,
diff --git a/intl/loadinfo.h b/intl/loadinfo.h
index 7563624..8004233 100644
--- a/intl/loadinfo.h
+++ b/intl/loadinfo.h
@@ -58,7 +58,7 @@ struct loaded_l10nfile
const void *data;
struct loaded_l10nfile *next;
- struct loaded_l10nfile *successor[1];
+ struct loaded_l10nfile *successor[1] __attribute__((bnd_variable_size));
};
diff --git a/io/fts.h b/io/fts.h
index 0a070ba..93f94f8 100644
--- a/io/fts.h
+++ b/io/fts.h
@@ -116,7 +116,7 @@ typedef struct _ftsent {
u_short fts_instr; /* fts_set() instructions */
struct stat *fts_statp; /* stat(2) information */
- char fts_name[1]; /* file name */
+ char fts_name[1] __attribute__((bnd_variable_size)); /* file name */
} FTSENT;
__BEGIN_DECLS
diff --git a/locale/localeinfo.h b/locale/localeinfo.h
index 3142726..8dbb598 100644
--- a/locale/localeinfo.h
+++ b/locale/localeinfo.h
@@ -84,7 +84,7 @@ struct __locale_data
const char *string;
unsigned int word; /* Note endian issues vs 64-bit pointers. */
}
- values __flexarr; /* Items, usually pointers into `filedata'. */
+ values __flexarr __attribute__((bnd_variable_size)); /* Items, usually pointers into `filedata'. */
};
/* We know three kinds of collation sorting rules. */
@@ -185,7 +185,7 @@ extern const union catnamestr_t
#include "categories.def"
#undef DEFINE_CATEGORY
};
- char str[0];
+ char str[0] __attribute__((bnd_variable_size));
} _nl_category_names attribute_hidden;
extern const uint8_t _nl_category_name_idxs[__LC_LAST] attribute_hidden;
extern const uint8_t _nl_category_name_sizes[__LC_LAST] attribute_hidden;
diff --git a/misc/search.h b/misc/search.h
index e3b3dfd..63a7768 100644
--- a/misc/search.h
+++ b/misc/search.h
@@ -35,7 +35,7 @@ struct qelem
{
struct qelem *q_forw;
struct qelem *q_back;
- char q_data[1];
+ char q_data[1] __attribute__((bnd_variable_size));
};
# endif
diff --git a/nptl/descr.h b/nptl/descr.h
index 58176ea..a175bb0 100644
--- a/nptl/descr.h
+++ b/nptl/descr.h
@@ -162,7 +162,7 @@ struct pthread
};
/* This descriptor's link on the `stack_used' or `__stack_user' list. */
- list_t list;
+ list_t list __attribute__((bnd_variable_size));
/* Thread ID - which is also a 'is this thread descriptor (and
therefore stack) used' flag. */
@@ -174,7 +174,10 @@ struct pthread
/* List of robust mutexes the thread is holding. */
#ifdef __PTHREAD_MUTEX_HAVE_PREV
void *robust_prev;
- struct robust_list_head robust_head;
+ struct robust_list_head robust_head __attribute__((bnd_variable_size));
+ /* sometimes we want to cast pair {robust_prev (void *) and the
+ * first field of struct robust_list_head (void *)}
+ * to __pthread_list_t (struct consists of two pointers: __prev, __next) */
/* The list above is strange. It is basically a double linked list
but the pointer to the next/previous element of the list points
@@ -186,7 +189,7 @@ struct pthread
# define ENQUEUE_MUTEX_BOTH(mutex, val) \
do { \
__pthread_list_t *next = (__pthread_list_t *) \
- ((((uintptr_t) THREAD_GETMEM (THREAD_SELF, robust_head.list)) & ~1ul) \
+ ((char *)(((uintptr_t) THREAD_GETMEM (THREAD_SELF, robust_head.list)) & ~1ul) \
- QUEUE_PTR_ADJUST); \
next->__prev = (void *) &mutex->__data.__list.__next; \
mutex->__data.__list.__next = THREAD_GETMEM (THREAD_SELF, \
diff --git a/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h b/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h
index 28b49bd..0adb200 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h
+++ b/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h
@@ -75,7 +75,7 @@ typedef union pthread_attr_t pthread_attr_t;
typedef struct __pthread_internal_list
{
struct __pthread_internal_list *__prev;
- struct __pthread_internal_list *__next;
+ struct __pthread_internal_list *__next __attribute__((bnd_variable_size));
} __pthread_list_t;
#else
typedef struct __pthread_internal_slist
diff --git a/stdio-common/psiginfo-define.h b/stdio-common/psiginfo-define.h
index e1d1a35..d76cb6b 100644
--- a/stdio-common/psiginfo-define.h
+++ b/stdio-common/psiginfo-define.h
@@ -3,7 +3,7 @@ static const union C(codestrs_t_, NOW) {
#define P(n, s) char MF(__LINE__)[sizeof (s)];
#include "psiginfo-data.h"
};
- char str[0];
+ char str[0] __attribute__((bnd_variable_size));
} C(codestrs_, NOW) = { {
#define P(n, s) s,
#include "psiginfo-data.h"
diff --git a/sunrpc/clnt_udp.c b/sunrpc/clnt_udp.c
index 1b6a20b..eca7122 100644
--- a/sunrpc/clnt_udp.c
+++ b/sunrpc/clnt_udp.c
@@ -96,7 +96,7 @@ struct cu_data
u_int cu_sendsz;
char *cu_outbuf;
u_int cu_recvsz;
- char cu_inbuf[1];
+ char cu_inbuf[1] __attribute__((bnd_variable_size)) ;
};
/*
diff --git a/sysdeps/gnu/netinet/ip_icmp.h b/sysdeps/gnu/netinet/ip_icmp.h
index 136fb47..5c2cb0c 100644
--- a/sysdeps/gnu/netinet/ip_icmp.h
+++ b/sysdeps/gnu/netinet/ip_icmp.h
@@ -189,7 +189,7 @@ struct icmp
} id_ip;
struct icmp_ra_addr id_radv;
u_int32_t id_mask;
- u_int8_t id_data[1];
+ u_int8_t id_data[1] __attribute__((bnd_variable_size));
} icmp_dun;
#define icmp_otime icmp_dun.id_ts.its_otime
#define icmp_rtime icmp_dun.id_ts.its_rtime
diff --git a/sysdeps/unix/sysv/linux/bits/sched.h b/sysdeps/unix/sysv/linux/bits/sched.h
index e42dee8..5086380 100644
--- a/sysdeps/unix/sysv/linux/bits/sched.h
+++ b/sysdeps/unix/sysv/linux/bits/sched.h
@@ -124,7 +124,7 @@ typedef unsigned long int __cpu_mask;
/* Data structure to describe CPU mask. */
typedef struct
{
- __cpu_mask __bits[__CPU_SETSIZE / __NCPUBITS];
+ __cpu_mask __bits[__CPU_SETSIZE / __NCPUBITS] __attribute__((bnd_variable_size));
} cpu_set_t;
/* Access functions for CPU masks. */
diff --git a/sysvipc/sys/msg.h b/sysvipc/sys/msg.h
index a0b38f0..c06424f 100644
--- a/sysvipc/sys/msg.h
+++ b/sysvipc/sys/msg.h
@@ -51,7 +51,7 @@ typedef __ssize_t ssize_t;
struct msgbuf
{
__syscall_slong_t mtype; /* type of received/sent message */
- char mtext[1]; /* text of the message */
+ char mtext[1] __attribute__((bnd_variable_size)); /* text of the message */
};
#endif
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=5d92ac866acfc532a4d49af0b1c2b69c260c0ce3
commit 5d92ac866acfc532a4d49af0b1c2b69c260c0ce3
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Mon Jan 21 15:35:12 2013 +0400
Use C code instead of inline assembler in macros of tls.h for i386 (for Intel MPX only).
diff --git a/nptl/sysdeps/i386/tls.h b/nptl/sysdeps/i386/tls.h
index 3d18b1d..bf30088 100644
--- a/nptl/sysdeps/i386/tls.h
+++ b/nptl/sysdeps/i386/tls.h
@@ -259,11 +259,24 @@ union user_desc_init
assignments like
pthread_descr self = thread_self();
do not get optimized away. */
-# define THREAD_SELF \
+
+# ifndef __CHKP__
+# define THREAD_SELF \
({ struct pthread *__self; \
asm ("movl %%gs:%c1,%0" : "=r" (__self) \
: "i" (offsetof (struct pthread, header.self))); \
__self;})
+# else
+# define THREAD_SELF \
+ ({ struct pthread *__self; \
+ asm ("movl %%gs:%c1,%0" : "=r" (__self) \
+ : "i" (offsetof (struct pthread, header.self))); \
+ /* Set first minimum bounds to make possible reading stackblock and stackblock_size. */ \
+ __self = __bnd_set_ptr_bounds(__self, TLS_INIT_TCB_SIZE); \
+ /* Set actual correct bounds. */ \
+ (struct pthread*) __bnd_copy_ptr_bounds(__self, __bnd_set_ptr_bounds(__self->stackblock, \
+ __self->stackblock_size)); })
+# endif
/* Magic for libthread_db to know how to do THREAD_SELF. */
# define DB_THREAD_SELF \
@@ -272,7 +285,8 @@ union user_desc_init
/* Read member of the thread descriptor directly. */
-# define THREAD_GETMEM(descr, member) \
+# ifndef __CHKP__
+# define THREAD_GETMEM(descr, member) \
({ __typeof (descr->member) __value; \
if (sizeof (__value) == 1) \
asm volatile ("movb %%gs:%P2,%b0" \
@@ -296,10 +310,15 @@ union user_desc_init
"i" (offsetof (struct pthread, member) + 4)); \
} \
__value; })
-
+# else
+# define THREAD_GETMEM(descr, member) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->member; })
+# endif
/* Same as THREAD_GETMEM, but the member offset can be non-constant. */
-# define THREAD_GETMEM_NC(descr, member, idx) \
+# ifndef __CHKP__
+# define THREAD_GETMEM_NC(descr, member, idx) \
({ __typeof (descr->member[0]) __value; \
if (sizeof (__value) == 1) \
asm volatile ("movb %%gs:%P2(%3),%b0" \
@@ -325,10 +344,15 @@ union user_desc_init
"r" (idx)); \
} \
__value; })
-
+# else
+# define THREAD_GETMEM_NC(descr, member, idx) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->member[idx]; })
+# endif
/* Same as THREAD_SETMEM, but the member offset can be non-constant. */
-# define THREAD_SETMEM(descr, member, value) \
+# ifndef __CHKP__
+# define THREAD_SETMEM(descr, member, value) \
({ if (sizeof (descr->member) == 1) \
asm volatile ("movb %b0,%%gs:%P1" : \
: "iq" (value), \
@@ -350,10 +374,15 @@ union user_desc_init
"i" (offsetof (struct pthread, member)), \
"i" (offsetof (struct pthread, member) + 4)); \
}})
-
+# else
+# define THREAD_SETMEM(descr, member, value) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->member = value; })
+#endif
/* Set member of the thread descriptor directly. */
-# define THREAD_SETMEM_NC(descr, member, idx, value) \
+# ifndef __CHKP__
+# define THREAD_SETMEM_NC(descr, member, idx, value) \
({ if (sizeof (descr->member[0]) == 1) \
asm volatile ("movb %b0,%%gs:%P1(%2)" : \
: "iq" (value), \
@@ -377,7 +406,11 @@ union user_desc_init
"i" (offsetof (struct pthread, member)), \
"r" (idx)); \
}})
-
+# else
+# define THREAD_SETMEM_NC(descr, member, idx, value) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->member[idx] = value; })
+# endif
/* Atomic compare and exchange on TLS, returning old value. */
#define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \
@@ -417,7 +450,8 @@ union user_desc_init
/* Call the user-provided thread function. */
-#define CALL_THREAD_FCT(descr) \
+#ifndef __CHKP__
+# define CALL_THREAD_FCT(descr) \
({ void *__res; \
int __ignore1, __ignore2; \
asm volatile ("pushl %%eax\n\t" \
@@ -430,7 +464,11 @@ union user_desc_init
: "i" (offsetof (struct pthread, start_routine)), \
"i" (offsetof (struct pthread, arg))); \
__res; })
-
+# else
+# define CALL_THREAD_FCT(descr) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->start_routine(__self->arg); })
+# endif
/* Set the stack guard field in TCB head. */
#define THREAD_SET_STACK_GUARD(value) \
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=022ff6731b1e0938048df6904d40e9bf876625b1
commit 022ff6731b1e0938048df6904d40e9bf876625b1
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Sat Nov 10 12:22:56 2012 +0400
Use C code instead of inline assembler in macros of tls.h for x86_64 (for Intel MPX only).
diff --git a/nptl/sysdeps/x86_64/tls.h b/nptl/sysdeps/x86_64/tls.h
index 61df1af..d3bf15a 100644
--- a/nptl/sysdeps/x86_64/tls.h
+++ b/nptl/sysdeps/x86_64/tls.h
@@ -89,6 +89,7 @@ typedef struct
#ifndef __ASSEMBLER__
+
/* Get system call information. */
# include <sysdep.h>
@@ -166,10 +167,15 @@ typedef struct
/* Return the address of the dtv for the current thread. */
-# define THREAD_DTV() \
+# ifndef __CHKP__
+# define THREAD_DTV() \
({ struct pthread *__pd; \
THREAD_GETMEM (__pd, header.dtv); })
-
+# else
+# define THREAD_DTV() \
+ ({ struct pthread *__self = THREAD_SELF; \
+ GET_DTV(__self); })
+# endif
/* Return the thread descriptor for the current thread.
@@ -177,18 +183,31 @@ typedef struct
assignments like
pthread_descr self = thread_self();
do not get optimized away. */
-# define THREAD_SELF \
+# ifndef __CHKP__
+# define THREAD_SELF \
({ struct pthread *__self; \
asm ("mov %%fs:%c1,%0" : "=r" (__self) \
: "i" (offsetof (struct pthread, header.self))); \
- __self;})
+ __self; })
+# else
+# define THREAD_SELF \
+ ({ struct pthread *__self; \
+ asm ("mov %%fs:%c1,%0" : "=r" (__self) \
+ : "i" (offsetof (struct pthread, header.self))); \
+ /* Set first minimum bounds to make possible reading stackblock and stackblock_size. */ \
+ __self = __bnd_set_ptr_bounds(__self, TLS_INIT_TCB_SIZE); \
+ /* Set actual correct bounds. */ \
+ (struct pthread*) __bnd_copy_ptr_bounds(__self, __bnd_set_ptr_bounds(__self->stackblock, \
+ __self->stackblock_size)); })
+# endif
/* Magic for libthread_db to know how to do THREAD_SELF. */
# define DB_THREAD_SELF_INCLUDE <sys/reg.h> /* For the FS constant. */
# define DB_THREAD_SELF CONST_THREAD_AREA (64, FS)
/* Read member of the thread descriptor directly. */
-# define THREAD_GETMEM(descr, member) \
+# ifndef __CHKP__
+# define THREAD_GETMEM(descr, member) \
({ __typeof (descr->member) __value; \
if (sizeof (__value) == 1) \
asm volatile ("movb %%fs:%P2,%b0" \
@@ -202,7 +221,7 @@ typedef struct
{ \
if (sizeof (__value) != 8) \
/* There should not be any value with a size other than 1, \
- 4 or 8. */ \
+ 4 or 8. */ \
abort (); \
\
asm volatile ("movq %%fs:%P1,%q0" \
@@ -210,10 +229,15 @@ typedef struct
: "i" (offsetof (struct pthread, member))); \
} \
__value; })
-
+# else
+# define THREAD_GETMEM(descr, member) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->member; })
+# endif
/* Same as THREAD_GETMEM, but the member offset can be non-constant. */
-# define THREAD_GETMEM_NC(descr, member, idx) \
+# ifndef __CHKP__
+# define THREAD_GETMEM_NC(descr, member, idx) \
({ __typeof (descr->member[0]) __value; \
if (sizeof (__value) == 1) \
asm volatile ("movb %%fs:%P2(%q3),%b0" \
@@ -228,7 +252,7 @@ typedef struct
{ \
if (sizeof (__value) != 8) \
/* There should not be any value with a size other than 1, \
- 4 or 8. */ \
+ 4 or 8. */ \
abort (); \
\
asm volatile ("movq %%fs:%P1(,%q2,8),%q0" \
@@ -237,7 +261,11 @@ typedef struct
"r" (idx)); \
} \
__value; })
-
+# else
+# define THREAD_GETMEM_NC(descr, member, idx) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->member[idx]; })
+# endif
/* Loading addresses of objects on x86-64 needs to be treated special
when generating PIC code. */
@@ -249,7 +277,8 @@ typedef struct
/* Same as THREAD_SETMEM, but the member offset can be non-constant. */
-# define THREAD_SETMEM(descr, member, value) \
+# ifndef __CHKP__
+# define THREAD_SETMEM(descr, member, value) \
({ if (sizeof (descr->member) == 1) \
asm volatile ("movb %b0,%%fs:%P1" : \
: "iq" (value), \
@@ -262,17 +291,22 @@ typedef struct
{ \
if (sizeof (descr->member) != 8) \
/* There should not be any value with a size other than 1, \
- 4 or 8. */ \
+ 4 or 8. */ \
abort (); \
\
asm volatile ("movq %q0,%%fs:%P1" : \
: IMM_MODE ((uint64_t) cast_to_integer (value)), \
"i" (offsetof (struct pthread, member))); \
}})
-
+# else
+# define THREAD_SETMEM(descr, member, value) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->member = value; })
+# endif
/* Set member of the thread descriptor directly. */
-# define THREAD_SETMEM_NC(descr, member, idx, value) \
+# ifndef __CHKP__
+# define THREAD_SETMEM_NC(descr, member, idx, value) \
({ if (sizeof (descr->member[0]) == 1) \
asm volatile ("movb %b0,%%fs:%P1(%q2)" : \
: "iq" (value), \
@@ -287,7 +321,7 @@ typedef struct
{ \
if (sizeof (descr->member[0]) != 8) \
/* There should not be any value with a size other than 1, \
- 4 or 8. */ \
+ 4 or 8. */ \
abort (); \
\
asm volatile ("movq %q0,%%fs:%P1(,%q2,8)" : \
@@ -295,7 +329,11 @@ typedef struct
"i" (offsetof (struct pthread, member[0])), \
"r" (idx)); \
}})
-
+# else
+# define THREAD_SETMEM_NC(descr, member, idx, value) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->member[idx] = value; })
+# endif
/* Atomic compare and exchange on TLS, returning old value. */
# define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \
@@ -333,8 +371,8 @@ typedef struct
/* Not necessary for other sizes in the moment. */ \
abort (); })
-
-# define CALL_THREAD_FCT(descr) \
+# ifndef __CHKP__
+# define CALL_THREAD_FCT(descr) \
({ void *__res; \
asm volatile ("movq %%fs:%P2, %%rdi\n\t" \
"callq *%%fs:%P1" \
@@ -344,7 +382,11 @@ typedef struct
: "di", "si", "cx", "dx", "r8", "r9", "r10", "r11", \
"memory", "cc"); \
__res; })
-
+# else
+# define CALL_THREAD_FCT(descr) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->start_routine(__self->arg); })
+# endif
/* Set the stack guard field in TCB head. */
# define THREAD_SET_STACK_GUARD(value) \
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=558bf1c0479495b1b7759bbe58b5f0b455fe7b46
commit 558bf1c0479495b1b7759bbe58b5f0b455fe7b46
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Wed Oct 24 16:00:49 2012 +0400
Intel MPX support for mmap and mremap wrappers of syscalls for x86_32 and x86_64.
Create bounds.
Use C wrapper of syscall instead of assembler wrapper for x86_64.
diff --git a/sysdeps/unix/sysv/linux/i386/Makefile b/sysdeps/unix/sysv/linux/i386/Makefile
index acc3021..f38f4b2 100644
--- a/sysdeps/unix/sysv/linux/i386/Makefile
+++ b/sysdeps/unix/sysv/linux/i386/Makefile
@@ -2,7 +2,7 @@
default-abi := 32
ifeq ($(subdir),misc)
-sysdep_routines += ioperm iopl vm86 call_pselect6 call_fallocate
+sysdep_routines += ioperm iopl vm86 call_pselect6 call_fallocate mremap
endif
ifeq ($(subdir),elf)
diff --git a/sysdeps/unix/sysv/linux/i386/mmap.S b/sysdeps/unix/sysv/linux/i386/mmap.S
index 0addf65..035a698 100644
--- a/sysdeps/unix/sysv/linux/i386/mmap.S
+++ b/sysdeps/unix/sysv/linux/i386/mmap.S
@@ -74,6 +74,11 @@ L(skip):
ja SYSCALL_ERROR_LABEL
/* Successful; return the syscall's value. */
+ mov 8(%esp), %ecx
+#ifdef __CHKP__
+ bndmk -1(%eax, %ecx), %bnd0
+#endif
+
ret
PSEUDO_END (__mmap)
diff --git a/sysdeps/unix/sysv/linux/i386/mmap64.S b/sysdeps/unix/sysv/linux/i386/mmap64.S
index 31a0f67..8b44c6e 100644
--- a/sysdeps/unix/sysv/linux/i386/mmap64.S
+++ b/sysdeps/unix/sysv/linux/i386/mmap64.S
@@ -89,6 +89,10 @@ L(do_syscall):
ja SYSCALL_ERROR_LABEL
/* Successful; return the syscall's value. */
+ mov 8(%esp), %ecx
+#ifdef __CHKP__
+ bndmk -1(%eax, %ecx), %bnd0
+#endif
ret
cfi_adjust_cfa_offset (16)
diff --git a/sysdeps/unix/sysv/linux/i386/mremap.c b/sysdeps/unix/sysv/linux/i386/mremap.c
new file mode 100644
index 0000000..ad55d9d
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/i386/mremap.c
@@ -0,0 +1,36 @@
+/* Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sysdeps/unix/sysv/linux/i386/sysdep.h>
+
+void *
+__mremap (void *old_address, size_t old_size, size_t new_size, int flags, ...)
+{
+ void *p = INLINE_SYSCALL (mremap, 4, old_address, old_size, new_size, flags);
+ if ((long) p == -1) return MAP_FAILED;
+#ifdef __CHKP__
+ return __bnd_set_ptr_bounds (p, new_size);
+#else
+ return p;
+#endif
+}
+
+weak_alias (__mremap, mremap)
diff --git a/sysdeps/unix/sysv/linux/x86_64/mmap.c b/sysdeps/unix/sysv/linux/x86_64/mmap.c
new file mode 100644
index 0000000..1ee6f96
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/mmap.c
@@ -0,0 +1,52 @@
+/* Copyright (C) 2012 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sysdeps/unix/sysv/linux/x86_64/sysdep.h>
+
+void *
+__mmap (void *addr, size_t len, int prot, int flags, int fd, off_t offset)
+{
+ void *p = INLINE_SYSCALL (mmap, 6, addr, len, prot, flags, fd, offset);
+ if ((long) p == -1) return MAP_FAILED;
+#ifdef __CHKP__
+ return __bnd_set_ptr_bounds (p, len);
+#else
+ return p;
+#endif
+}
+
+weak_alias (__mmap, mmap64)
+weak_alias (__mmap, __mmap64)
+weak_alias (__mmap, mmap)
+
+void *
+__mremap (void *old_address, size_t old_size, size_t new_size, int flags, ...)
+{
+ void *p = INLINE_SYSCALL (mremap, 4, old_address, old_size, new_size, flags);
+ if ((long) p == -1) return MAP_FAILED;
+#ifdef __CHKP__
+ return __bnd_set_ptr_bounds (p, new_size);
+#else
+ return p;
+#endif
+}
+
+weak_alias (__mremap, mremap)
diff --git a/sysdeps/unix/sysv/linux/x86_64/mmap64.c b/sysdeps/unix/sysv/linux/x86_64/mmap64.c
new file mode 100644
index 0000000..e69de29
diff --git a/sysdeps/unix/sysv/linux/x86_64/mremap.c b/sysdeps/unix/sysv/linux/x86_64/mremap.c
new file mode 100644
index 0000000..e69de29
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=152d00ad923c702a8753499421baba027d0beffc
commit 152d00ad923c702a8753499421baba027d0beffc
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Wed Oct 10 19:28:57 2012 +0400
Save/restore bounds in x86_64 and x86_32 version of _dl_runtime_resolve.
diff --git a/sysdeps/i386/dl-trampoline.S b/sysdeps/i386/dl-trampoline.S
index 945708f..5f3acb6 100644
--- a/sysdeps/i386/dl-trampoline.S
+++ b/sysdeps/i386/dl-trampoline.S
@@ -31,9 +31,29 @@ _dl_runtime_resolve:
cfi_adjust_cfa_offset (4)
pushl %edx
cfi_adjust_cfa_offset (4)
+#ifndef __CHKP__
movl 16(%esp), %edx # Copy args pushed by PLT in register. Note
movl 12(%esp), %eax # that `fixup' takes its parameters in regs.
+#else
+ subl $32, %esp
+ cfi_adjust_cfa_offset (32)
+ bndmov %bnd0, (%esp)
+ bndmov %bnd1, 8(%esp)
+ bndmov %bnd2, 16(%esp)
+ bndmov %bnd3, 24(%esp)
+ movl 48(%esp), %edx # Copy args pushed by PLT in register. Note
+ movl 44(%esp), %eax # that `fixup' takes its parameters in regs.
+ bndldx 44(%esp, %eax), %bnd0
+#endif
call _dl_fixup # Call resolver.
+#ifdef __CHKP__
+ bndmov 24(%esp), %bnd3
+ bndmov 16(%esp), %bnd2
+ bndmov 8(%esp), %bnd1
+ bndmov (%esp), %bnd0
+ addl $32, %esp
+ cfi_adjust_cfa_offset (-32)
+#endif
popl %edx # Get register content back.
cfi_adjust_cfa_offset (-4)
movl (%esp), %ecx
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index a25e390..e07c192 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -31,8 +31,13 @@
cfi_startproc
_dl_runtime_resolve:
cfi_adjust_cfa_offset(16) # Incorporate PLT
+#ifndef __CHKP__
subq $56,%rsp
cfi_adjust_cfa_offset(56)
+#else
+ subq $120,%rsp
+ cfi_adjust_cfa_offset(120)
+#endif
movq %rax,(%rsp) # Preserve registers otherwise clobbered.
movq %rcx, 8(%rsp)
movq %rdx, 16(%rsp)
@@ -40,10 +45,26 @@ _dl_runtime_resolve:
movq %rdi, 32(%rsp)
movq %r8, 40(%rsp)
movq %r9, 48(%rsp)
+#ifndef __CHKP__
movq 64(%rsp), %rsi # Copy args pushed by PLT in register.
movq 56(%rsp), %rdi # %rdi: link_map, %rsi: reloc_index
+#else
+ bndmov %bnd0, 56(%rsp)
+ bndmov %bnd1, 72(%rsp)
+ bndmov %bnd2, 88(%rsp)
+ bndmov %bnd3, 104(%rsp)
+ movq 128(%rsp), %rsi # Copy args pushed by PLT in register.
+ movq 120(%rsp), %rdi # %rdi: link_map, %rsi: reloc_index
+ bndldx 120(%rsp, %rdi), %bnd0
+#endif
call _dl_fixup # Call resolver.
movq %rax, %r11 # Save return value
+#ifdef __CHKP__
+ bndmov 104(%rsp), %bnd3
+ bndmov 88(%rsp), %bnd2
+ bndmov 72(%rsp), %bnd1
+ bndmov 56(%rsp), %bnd0
+#endif
movq 48(%rsp), %r9 # Get register content back.
movq 40(%rsp), %r8
movq 32(%rsp), %rdi
@@ -51,8 +72,13 @@ _dl_runtime_resolve:
movq 16(%rsp), %rdx
movq 8(%rsp), %rcx
movq (%rsp), %rax
+#ifndef __CHKP__
addq $72, %rsp # Adjust stack(PLT did 2 pushes)
cfi_adjust_cfa_offset(-72)
+#else
+ addq $136, %rsp # Adjust stack(PLT did 2 pushes)
+ cfi_adjust_cfa_offset(-136)
+#endif
jmp *%r11 # Jump to function address.
cfi_endproc
.size _dl_runtime_resolve, .-_dl_runtime_resolve
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=3e938a73037cfad920442148c39208d321248485
commit 3e938a73037cfad920442148c39208d321248485
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Mon Jul 23 19:39:27 2012 +0400
Add Intel MPX support to malloc allocator.
diff --git a/malloc/arena.c b/malloc/arena.c
index 12a48ad..2aaf1b8 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -131,9 +131,15 @@ int __malloc_initialized = -1;
#endif
/* find the heap and corresponding arena for a given ptr */
-
-#define heap_for_ptr(ptr) \
+#ifndef __CHKP__
+# define heap_for_ptr(ptr) \
((heap_info *)((unsigned long)(ptr) & ~(HEAP_MAX_SIZE-1)))
+#else
+static heap_info *heap_for_ptr (void *ptr) {
+ heap_info *t = (heap_info *)((unsigned long)(ptr) & ~(HEAP_MAX_SIZE-1));
+ return __bnd_set_ptr_bounds(t, sizeof(heap_info));
+}
+#endif
#define arena_for_chunk(ptr) \
(chunk_non_main_arena(ptr) ? heap_for_ptr(ptr)->ar_ptr : &main_arena)
diff --git a/malloc/hooks.c b/malloc/hooks.c
index 8c25846..c5c682f 100644
--- a/malloc/hooks.c
+++ b/malloc/hooks.c
@@ -171,6 +171,9 @@ mem2chunk_check(void* mem, unsigned char **magic_p)
next_chunk(prev_chunk(p))!=p) ))
return NULL;
magic = MAGICBYTE(p);
+#ifdef __CHKP__
+ p = (mchunkptr) __bnd_set_ptr_bounds(p, sz + SIZE_SZ);
+#endif
for(sz += SIZE_SZ-1; (c = ((unsigned char*)p)[sz]) != magic; sz -= c) {
if(c<=0 || sz<(c+2*SIZE_SZ)) return NULL;
}
diff --git a/malloc/malloc.c b/malloc/malloc.c
index dd295f5..e8fe610 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -218,7 +218,6 @@
#include <malloc-machine.h>
#include <malloc-sysdep.h>
-
#include <atomic.h>
#include <_itoa.h>
#include <bits/wordsize.h>
@@ -1222,11 +1221,12 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
/*
---------- Size and alignment checks and conversions ----------
*/
-
+#ifndef __CHKP__
/* conversion from malloc headers to user pointers, and back */
-#define chunk2mem(p) ((void*)((char*)(p) + 2*SIZE_SZ))
-#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+# define chunk2mem(p) ((void*)((char*)(p) + 2*SIZE_SZ))
+# define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+#endif
/* The smallest possible chunk */
#define MIN_CHUNK_SIZE (offsetof(struct malloc_chunk, fd_nextsize))
@@ -1239,12 +1239,11 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
/* Check if m has acceptable alignment */
#define aligned_OK(m) (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+#define aligned_chunk_OK(p) (((unsigned long)((char *)(p) + 2 * SIZE_SZ) & MALLOC_ALIGN_MASK) == 0)
#define misaligned_chunk(p) \
- ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+ ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : ((char *)(p) + 2 * SIZE_SZ)) \
& MALLOC_ALIGN_MASK)
-
-
/*
Check if a request is so large that it would wrap around zero when
padded and aligned. To simplify some other code, the bound is made
@@ -1312,49 +1311,116 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
/* Get size, ignoring use bits */
#define chunksize(p) ((p)->size & ~(SIZE_BITS))
+#ifdef __CHKP__
+static void* chunk2mem (void *p) {
+ void *t = p + 2 * SIZE_SZ;
+ if (chunk_is_mmapped((mchunkptr)p))
+ return (void *) __bnd_narrow_ptr_bounds (t, t, chunksize((mchunkptr)p) - 2 * SIZE_SZ);
+ /* prev_size field of the next chunk can be used */
+ return (void *) __bnd_set_ptr_bounds(t, chunksize((mchunkptr)p) - SIZE_SZ);
+}
+
+static mchunkptr mem2chunk(void *mem) {
+ mchunkptr temp = (mchunkptr)((char *)(mem) - 2 * SIZE_SZ);
+ temp = __bnd_set_ptr_bounds (temp, sizeof (struct malloc_chunk));
+ return __bnd_set_ptr_bounds (temp, chunksize(temp) > sizeof(struct malloc_chunk) ?
+ chunksize(temp) : sizeof(struct malloc_chunk));
+}
+
+static mchunkptr next_chunk (mchunkptr p) {
+ mchunkptr temp = (mchunkptr)((char*) p + chunksize(p));
+ return (mchunkptr) __bnd_set_ptr_bounds ((void *) temp, sizeof(struct malloc_chunk));
+}
-/* Ptr to next physical malloc_chunk. */
-#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & ~SIZE_BITS) ))
+static mchunkptr prev_chunk (mchunkptr p) {
+ mchunkptr temp = (mchunkptr)((char*) p - p->prev_size);
+ return (mchunkptr) __bnd_set_ptr_bounds ((void *) temp, sizeof(struct malloc_chunk));
+}
+
+static mchunkptr chunk_at_offset (mchunkptr p, INTERNAL_SIZE_T s) {
+ mchunkptr temp = (mchunkptr)((char*) p + s);
+ return (mchunkptr) __bnd_set_ptr_bounds ((void *) temp, sizeof(struct malloc_chunk));
+}
+
+static int inuse (mchunkptr p) {
+ return next_chunk(p)->size & PREV_INUSE;
+}
+
+static int inuse_bit_at_offset (mchunkptr p, INTERNAL_SIZE_T s) {
+ return chunk_at_offset(p, s)->size & PREV_INUSE;
+}
+
+static void set_inuse_bit_at_offset (mchunkptr p, INTERNAL_SIZE_T s) {
+ chunk_at_offset(p, s)->size |= PREV_INUSE;
+}
+
+static void clear_inuse_bit_at_offset (mchunkptr p, INTERNAL_SIZE_T s) {
+ chunk_at_offset(p, s)->size &= ~(PREV_INUSE);
+}
+
+/* Set size at head, without disturbing its use bit */
+# define set_head_size(p, s) \
+{ \
+ (p) = (__typeof(p)) __bnd_set_ptr_bounds ((void *) (p), (s) > sizeof (__typeof(p)) ? \
+ (s) : sizeof(__typeof(p))); \
+ (p)->size = ((p)->size & SIZE_BITS) | (s); \
+}
+
+/* Set size/use field */
+# define set_head(p, s) \
+({ \
+ (p) = (__typeof(p)) __bnd_set_ptr_bounds ((void *) (p), \
+ ((s) & ~(SIZE_BITS)) > sizeof (__typeof(p)) ? \
+ ((s) & ~(SIZE_BITS)): sizeof (__typeof(p))); \
+ (p)->size = (s); \
+})
+
+/* Set size at footer (only when chunk is not in use) */
+static void set_foot (mchunkptr p, INTERNAL_SIZE_T s) {
+ chunk_at_offset(p, s)->prev_size = s;
+}
+#else
+/* Ptr to next physicaly100y malloc_chunk. */
+# define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & ~SIZE_BITS) ))
/* Ptr to previous physical malloc_chunk */
-#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) ))
+# define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) ))
/* Treat space at ptr + offset as a chunk */
-#define chunk_at_offset(p, s) ((mchunkptr)(((char*)(p)) + (s)))
+# define chunk_at_offset(p, s) ((mchunkptr)(((char*)(p)) + (s)))
/* extract p's inuse bit */
-#define inuse(p)\
+# define inuse(p)\
((((mchunkptr)(((char*)(p))+((p)->size & ~SIZE_BITS)))->size) & PREV_INUSE)
/* set/clear chunk as being inuse without otherwise disturbing */
-#define set_inuse(p)\
+# define set_inuse(p)\
((mchunkptr)(((char*)(p)) + ((p)->size & ~SIZE_BITS)))->size |= PREV_INUSE
-#define clear_inuse(p)\
+# define clear_inuse(p)\
((mchunkptr)(((char*)(p)) + ((p)->size & ~SIZE_BITS)))->size &= ~(PREV_INUSE)
/* check/set/clear inuse bits in known places */
-#define inuse_bit_at_offset(p, s)\
+# define inuse_bit_at_offset(p, s)\
(((mchunkptr)(((char*)(p)) + (s)))->size & PREV_INUSE)
-#define set_inuse_bit_at_offset(p, s)\
+# define set_inuse_bit_at_offset(p, s)\
(((mchunkptr)(((char*)(p)) + (s)))->size |= PREV_INUSE)
-#define clear_inuse_bit_at_offset(p, s)\
+# define clear_inuse_bit_at_offset(p, s)\
(((mchunkptr)(((char*)(p)) + (s)))->size &= ~(PREV_INUSE))
/* Set size at head, without disturbing its use bit */
-#define set_head_size(p, s) ((p)->size = (((p)->size & SIZE_BITS) | (s)))
+# define set_head_size(p, s) ((p)->size = (((p)->size & SIZE_BITS) | (s)))
/* Set size/use field */
-#define set_head(p, s) ((p)->size = (s))
+# define set_head(p, s) ((p)->size = (s))
/* Set size at footer (only when chunk is not in use) */
-#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_size = (s))
-
-
+# define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_size = (s))
+#endif
/*
-------------------- Internal data structures --------------------
@@ -1945,7 +2011,7 @@ static void do_check_chunk(mstate av, mchunkptr p)
/* chunk is page-aligned */
assert(((p->prev_size + sz) & (GLRO(dl_pagesize)-1)) == 0);
/* mem is aligned */
- assert(aligned_OK(chunk2mem(p)));
+ assert(aligned_chunk_OK(p));
}
}
@@ -1968,7 +2034,7 @@ static void do_check_free_chunk(mstate av, mchunkptr p)
if ((unsigned long)(sz) >= MINSIZE)
{
assert((sz & MALLOC_ALIGN_MASK) == 0);
- assert(aligned_OK(chunk2mem(p)));
+ assert(aligned_chunk_OK(p));
/* ... matching footer field */
assert(next->prev_size == sz);
/* ... and is fully consolidated */
@@ -2042,7 +2108,7 @@ static void do_check_remalloced_chunk(mstate av, mchunkptr p, INTERNAL_SIZE_T s)
assert((sz & MALLOC_ALIGN_MASK) == 0);
assert((unsigned long)(sz) >= MINSIZE);
/* ... and alignment */
- assert(aligned_OK(chunk2mem(p)));
+ assert(aligned_chunk_OK(p));
/* chunk is less than MINSIZE more than request */
assert((long)(sz) - (long)(s) >= 0);
assert((long)(sz) - (long)(s + MINSIZE) < 0);
@@ -2313,16 +2379,16 @@ static void* sysmalloc(INTERNAL_SIZE_T nb, mstate av)
/* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
MALLOC_ALIGN_MASK is 2*SIZE_SZ-1. Each mmap'ed area is page
aligned and therefore definitely MALLOC_ALIGN_MASK-aligned. */
- assert (((INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK) == 0);
+ assert (((INTERNAL_SIZE_T)((void *)mm + 2 * SIZE_SZ) & MALLOC_ALIGN_MASK) == 0);
front_misalign = 0;
}
else
- front_misalign = (INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK;
+ front_misalign = (INTERNAL_SIZE_T)((void *)mm + 2 * SIZE_SZ) & MALLOC_ALIGN_MASK;
if (front_misalign > 0) {
correction = MALLOC_ALIGNMENT - front_misalign;
p = (mchunkptr)(mm + correction);
- p->prev_size = correction;
set_head(p, (size - correction) |IS_MMAPPED);
+ p->prev_size = correction;
}
else
{
@@ -2349,7 +2415,11 @@ static void* sysmalloc(INTERNAL_SIZE_T nb, mstate av)
/* Record incoming configuration of top */
old_top = av->top;
- old_size = chunksize(old_top);
+ if (old_top == initial_top(av)) {
+ old_size = 0;
+ } else {
+ old_size = chunksize(old_top);
+ }
old_end = (char*)(chunk_at_offset(old_top, old_size));
brk = snd_brk = (char*)(MORECORE_FAILURE);
@@ -2399,9 +2469,9 @@ static void* sysmalloc(INTERNAL_SIZE_T nb, mstate av)
become the top chunk again later. Note that a footer is set
up, too, although the chunk is marked in use. */
old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
- set_head(chunk_at_offset(old_top, old_size + 2*SIZE_SZ), 0|PREV_INUSE);
+ chunk_at_offset(old_top, old_size + 2*SIZE_SZ)->size = 0|PREV_INUSE;
if (old_size >= MINSIZE) {
- set_head(chunk_at_offset(old_top, old_size), (2*SIZE_SZ)|PREV_INUSE);
+ chunk_at_offset(old_top, old_size)->size = (2*SIZE_SZ)|PREV_INUSE;
set_foot(chunk_at_offset(old_top, old_size), (2*SIZE_SZ));
set_head(old_top, old_size|PREV_INUSE|NON_MAIN_ARENA);
_int_free(av, old_top, 1);
@@ -2545,7 +2615,7 @@ static void* sysmalloc(INTERNAL_SIZE_T nb, mstate av)
/* Guarantee alignment of first new chunk made from this space */
- front_misalign = (INTERNAL_SIZE_T)chunk2mem(brk) & MALLOC_ALIGN_MASK;
+ front_misalign = (INTERNAL_SIZE_T)((void *)brk + 2*SIZE_SZ) & MALLOC_ALIGN_MASK;
if (front_misalign > 0) {
/*
@@ -2599,9 +2669,9 @@ static void* sysmalloc(INTERNAL_SIZE_T nb, mstate av)
else {
if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
/* MORECORE/mmap must correctly align */
- assert(((unsigned long)chunk2mem(brk) & MALLOC_ALIGN_MASK) == 0);
+ assert(((unsigned long)((void *)brk + 2*SIZE_SZ) & MALLOC_ALIGN_MASK) == 0);
else {
- front_misalign = (INTERNAL_SIZE_T)chunk2mem(brk) & MALLOC_ALIGN_MASK;
+ front_misalign = (INTERNAL_SIZE_T)((void *)brk + 2*SIZE_SZ) & MALLOC_ALIGN_MASK;
if (front_misalign > 0) {
/*
@@ -2676,8 +2746,12 @@ static void* sysmalloc(INTERNAL_SIZE_T nb, mstate av)
/* finally, do the allocation */
p = av->top;
- size = chunksize(p);
-
+ if (p != initial_top(av)) {
+ size = chunksize(p);
+ }
+ else {
+ size = 0;
+ }
/* check that one of the above allocation paths succeeded */
if ((unsigned long)(size) >= (unsigned long)(nb + MINSIZE)) {
remainder_size = size - nb;
@@ -2820,11 +2894,10 @@ mremap_chunk(mchunkptr p, size_t new_size)
p = (mchunkptr)(cp + offset);
- assert(aligned_OK(chunk2mem(p)));
-
- assert((p->prev_size == offset));
+ assert(aligned_chunk_OK(p));
set_head(p, (new_size - offset)|IS_MMAPPED);
+ assert((p->prev_size == offset));
mp_.mmapped_mem -= size + offset;
mp_.mmapped_mem += new_size;
if ((unsigned long)mp_.mmapped_mem > (unsigned long)mp_.max_mmapped_mem)
@@ -2863,7 +2936,11 @@ __libc_malloc(size_t bytes)
(void)mutex_unlock(&ar_ptr->mutex);
assert(!victim || chunk_is_mmapped(mem2chunk(victim)) ||
ar_ptr == arena_for_chunk(mem2chunk(victim)));
+#ifndef __CHKP__
return victim;
+#else
+ return __bnd_narrow_ptr_bounds (victim, victim, bytes);
+#endif
}
libc_hidden_def(__libc_malloc)
@@ -2951,7 +3028,12 @@ __libc_realloc(void* oldmem, size_t bytes)
if(newp) return chunk2mem(newp);
#endif
/* Note the extra SIZE_SZ overhead. */
- if(oldsize - SIZE_SZ >= nb) return oldmem; /* do nothing */
+ if(oldsize - SIZE_SZ >= nb)
+#ifndef __CHKP__
+ return oldmem; /* do nothing */
+#else
+ return __bnd_narrow_ptr_bounds(oldmem, oldmem, bytes); /* do nothing */
+#endif
/* Must alloc, copy, free. */
newmem = __libc_malloc(bytes);
if (newmem == 0) return 0; /* propagate failure */
@@ -2993,8 +3075,11 @@ __libc_realloc(void* oldmem, size_t bytes)
_int_free(ar_ptr, oldp, 0);
}
}
-
+#ifndef __CHKP__
return newp;
+#else
+ return __bnd_narrow_ptr_bounds(newp, newp, bytes);
+#endif
}
libc_hidden_def (__libc_realloc)
@@ -3029,7 +3114,11 @@ __libc_memalign(size_t alignment, size_t bytes)
(void)mutex_unlock(&ar_ptr->mutex);
assert(!p || chunk_is_mmapped(mem2chunk(p)) ||
ar_ptr == arena_for_chunk(mem2chunk(p)));
+#ifndef __CHKP__
return p;
+#else
+ return __bnd_narrow_ptr_bounds(p, p, bytes);
+#endif
}
/* For ISO C11. */
weak_alias (__libc_memalign, aligned_alloc)
@@ -3065,8 +3154,11 @@ __libc_valloc(size_t bytes)
(void)mutex_unlock (&ar_ptr->mutex);
assert(!p || chunk_is_mmapped(mem2chunk(p)) ||
ar_ptr == arena_for_chunk(mem2chunk(p)));
-
+#ifndef __CHKP__
return p;
+#else
+ return __bnd_narrow_ptr_bounds(p, p, bytes);
+#endif
}
void*
@@ -3100,7 +3192,11 @@ __libc_pvalloc(size_t bytes)
assert(!p || chunk_is_mmapped(mem2chunk(p)) ||
ar_ptr == arena_for_chunk(mem2chunk(p)));
+#ifndef __CHKP__
return p;
+#else
+ return __bnd_narrow_ptr_bounds(p, p, bytes);
+#endif
}
void*
@@ -3132,6 +3228,9 @@ __libc_calloc(size_t n, size_t elem_size)
mem = (*hook)(sz, RETURN_ADDRESS (0));
if(mem == 0)
return 0;
+#ifdef __CHKP__
+ mem = __bnd_narrow_ptr_bounds(mem, mem, sz);
+#endif
return memset(mem, 0, sz);
}
@@ -3145,7 +3244,12 @@ __libc_calloc(size_t n, size_t elem_size)
need to clear. */
#if MORECORE_CLEARS
oldtop = top(av);
- oldtopsize = chunksize(top(av));
+ if (oldtop == initial_top(av))
+ {
+ oldtopsize = 0;
+ } else {
+ oldtopsize = chunksize(top(av));
+ }
#if MORECORE_CLEARS < 2
/* Only newly allocated memory is guaranteed to be cleared. */
if (av == &main_arena &&
@@ -3179,6 +3283,9 @@ __libc_calloc(size_t n, size_t elem_size)
/* Two optional cases in which clearing not necessary */
if (chunk_is_mmapped (p))
{
+#ifdef __CHKP__
+ mem = __bnd_narrow_ptr_bounds(mem, mem, sz);
+#endif
if (__builtin_expect (perturb_byte, 0))
MALLOC_ZERO (mem, sz);
return mem;
@@ -3221,8 +3328,11 @@ __libc_calloc(size_t n, size_t elem_size)
}
}
}
-
+#ifndef __CHKP__
return mem;
+#else
+ return __bnd_narrow_ptr_bounds(mem, mem, sz);
+#endif
}
/*
@@ -3676,7 +3786,11 @@ _int_malloc(mstate av, size_t bytes)
*/
victim = av->top;
- size = chunksize(victim);
+ if (victim == initial_top(av)) {
+ size = 0;
+ } else {
+ size = chunksize(victim);
+ }
if ((unsigned long)(size) >= (unsigned long)(nb + MINSIZE)) {
remainder_size = size - nb;
@@ -4051,6 +4165,9 @@ static void malloc_consolidate(mstate av)
p = atomic_exchange_acq (fb, 0);
if (p != 0) {
do {
+#ifdef __CHKP__
+ p = __bnd_set_ptr_bounds(p, sizeof (struct malloc_chunk));
+#endif
check_inuse_chunk(av, p);
nextp = p->fd;
@@ -4336,8 +4453,8 @@ _int_memalign(mstate av, size_t alignment, size_t bytes)
/* For mmapped chunks, just adjust offset */
if (chunk_is_mmapped(p)) {
- newp->prev_size = p->prev_size + leadsize;
set_head(newp, newsize|IS_MMAPPED);
+ newp->prev_size = p->prev_size + leadsize;
return chunk2mem(newp);
}
@@ -4350,7 +4467,7 @@ _int_memalign(mstate av, size_t alignment, size_t bytes)
p = newp;
assert (newsize >= nb &&
- (((unsigned long)(chunk2mem(p))) % alignment) == 0);
+ (((unsigned long)((char *)p + 2 * SIZE_SZ) % alignment) == 0));
}
/* Also give back spare room at the end */
@@ -4430,7 +4547,7 @@ static int mtrim(mstate av, size_t pad)
+ sizeof (struct malloc_chunk)
+ psm1) & ~psm1);
- assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+ assert ((char *) (p) + 6 * SIZE_SZ <= paligned_mem);
assert ((char *) p + size > paligned_mem);
/* This is the size we could potentially free. */
@@ -4932,7 +5049,6 @@ __posix_memalign (void **memptr, size_t alignment, size_t size)
}
weak_alias (__posix_memalign, posix_memalign)
-
int
malloc_info (int options, FILE *fp)
{
@@ -5121,7 +5237,6 @@ malloc_info (int options, FILE *fp)
return 0;
}
-
strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
strong_alias (__libc_free, __cfree) weak_alias (__libc_free, cfree)
strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
diff --git a/malloc/obstack.h b/malloc/obstack.h
index d2e056b..5e4b5b9 100644
--- a/malloc/obstack.h
+++ b/malloc/obstack.h
@@ -143,7 +143,7 @@ struct _obstack_chunk /* Lives at front of each chunk. */
{
char *limit; /* 1 past end of this chunk */
struct _obstack_chunk *prev; /* address of prior chunk or NULL */
- char contents[4]; /* objects begin here */
+ char contents[4] __attribute__((bnd_variable_size)); /* objects begin here */
};
struct obstack /* control current object in current chunk */
-----------------------------------------------------------------------
hooks/post-receive
--
GNU C Library master sources