This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch strlen created. glibc-2.16-ports-merge-813-gd389b47
- From: neleai at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 4 Dec 2012 13:32:47 -0000
- Subject: GNU C Library master sources branch strlen created. glibc-2.16-ports-merge-813-gd389b47
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, strlen has been created
at d389b47d7a51af5e93bbc5543ab829e326f425fa (commit)
- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=d389b47d7a51af5e93bbc5543ab829e326f425fa
commit d389b47d7a51af5e93bbc5543ab829e326f425fa
Author: Ondrej Bilka <neleai@seznam.cz>
Date: Tue Dec 4 15:22:21 2012 +0100
Faster strlen on x64.
diff --git a/ChangeLog b/ChangeLog
index 638934b..a1f1366 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2012-11-05 Ondrej Bilka <neleai@seznam.cz>
+
+ * sysdeps/x86_64/strlen.S: Added new implementation.
+ * sysdeps/x86_64/strnlen.S: Use sysdeps/x86_64/strlen.S.
+ * sysdeps/x86_64/rtld-strlen.S: Use sysdeps/x86_64/strlen.S.
+ * sysdeps/x86_64/multiarch/strlen.S: No longer needed.
+ * sysdeps/x86_64/multiarch/strnlen-sse2-no-bsf.S: No longer needed.
+ * sysdeps/x86_64/multiarch/strnlen.S: No longer needed.
+ * sysdeps/x86_64/multiarch/ifunc-impl-list.c: Delete unused
+ ifuncs.
+ * sysdeps/x86_64/multiarch/Makefile: Updated.
+
+
2012-12-01 Mike Frysinger <vapier@gentoo.org>
* libio/fileops.c (_IO_new_file_close_it): Do not always flush
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index dd6c27d..6b07afa 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -16,7 +16,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
strcat-sse2-unaligned strncat-sse2-unaligned \
strcat-ssse3 strncat-ssse3 strlen-sse2-pminub \
- strnlen-sse2-no-bsf strrchr-sse2-no-bsf strchr-sse2-no-bsf \
+ strrchr-sse2-no-bsf strchr-sse2-no-bsf \
memcmp-ssse3
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 332a60d..55896c3 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -188,9 +188,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
/* Support sysdeps/x86_64/multiarch/strnlen.S. */
- IFUNC_IMPL (i, name, strnlen,
- IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2_no_bsf)
- IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2))
/* Support sysdeps/x86_64/multiarch/strpbrk.S. */
IFUNC_IMPL (i, name, strpbrk,
diff --git a/sysdeps/x86_64/multiarch/strlen.S b/sysdeps/x86_64/multiarch/strlen.S
deleted file mode 100644
index f93432e..0000000
--- a/sysdeps/x86_64/multiarch/strlen.S
+++ /dev/null
@@ -1,68 +0,0 @@
-/* Multiple versions of strlen(str) -- determine the length of the string STR.
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2009-2012 Free Software Foundation, Inc.
- Contributed by Ulrich Drepper <drepper@redhat.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-
-/* Define multiple versions only for the definition in libc and for
- the DSO. In static binaries we need strlen before the initialization
- happened. */
-#if defined SHARED && !defined NOT_IN_libc
- .text
-ENTRY(strlen)
- .type strlen, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq __strlen_sse2_pminub(%rip), %rax
- testl $bit_Prefer_PMINUB_for_stringop, __cpu_features+FEATURE_OFFSET+index_Prefer_PMINUB_for_stringop(%rip)
- jnz 2f
- leaq __strlen_sse2(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
- jz 2f
- leaq __strlen_sse42(%rip), %rax
- ret
-2: testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
- jz 3f
- leaq __strlen_sse2_no_bsf(%rip), %rax
-3: ret
-END(strlen)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __strlen_sse2, @function; \
- .align 16; \
- .globl __strlen_sse2; \
- .hidden __strlen_sse2; \
- __strlen_sse2: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __strlen_sse2, .-__strlen_sse2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strlen calls through a PLT.
- The speedup we get from using SSE4.2 instruction is likely eaten away
- by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_strlen; __GI_strlen = __strlen_sse2
-#endif
-
-#include "../strlen.S"
diff --git a/sysdeps/x86_64/multiarch/strnlen-sse2-no-bsf.S b/sysdeps/x86_64/multiarch/strnlen-sse2-no-bsf.S
deleted file mode 100644
index 248328d..0000000
--- a/sysdeps/x86_64/multiarch/strnlen-sse2-no-bsf.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STRNLEN
-#define STRLEN __strnlen_sse2_no_bsf
-#include "strlen-sse2-no-bsf.S"
diff --git a/sysdeps/x86_64/multiarch/strnlen.S b/sysdeps/x86_64/multiarch/strnlen.S
deleted file mode 100644
index 4df05fc..0000000
--- a/sysdeps/x86_64/multiarch/strnlen.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* multiple version of strnlen
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2012 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-
-/* Define multiple versions only for the definition in libc. */
-#ifndef NOT_IN_libc
-
- .text
-ENTRY(__strnlen)
- .type __strnlen, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq __strnlen_sse2(%rip), %rax
- testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
- jz 2f
- leaq __strnlen_sse2_no_bsf(%rip), %rax
-2: ret
-END(__strnlen)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __strnlen_sse2, @function; \
- .align 16; \
- .globl __strnlen_sse2; \
- .hidden __strnlen_sse2; \
- __strnlen_sse2: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __strnlen_sse2, .-__strnlen_sse2
-
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
- .globl __GI_strnlen; __GI_strnlen = __strnlen_sse2
-#endif
-
-#include "../strnlen.S"
diff --git a/sysdeps/x86_64/rtld-strlen.S b/sysdeps/x86_64/rtld-strlen.S
index 7293f87..71be00b 100644
--- a/sysdeps/x86_64/rtld-strlen.S
+++ b/sysdeps/x86_64/rtld-strlen.S
@@ -1,138 +1 @@
-/* strlen(str) -- determine the length of the string STR.
- Copyright (C) 2002, 2003 Free Software Foundation, Inc.
- Based on i486 version contributed by Ulrich Drepper <drepper@redhat.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-#include "bp-sym.h"
-#include "bp-asm.h"
-
-
- .text
-ENTRY (strlen)
- movq %rdi, %rcx /* Duplicate source pointer. */
- andl $7, %ecx /* mask alignment bits */
- movq %rdi, %rax /* duplicate destination. */
- jz 1f /* aligned => start loop */
-
- neg %ecx /* We need to align to 8 bytes. */
- addl $8,%ecx
- /* Search the first bytes directly. */
-0: cmpb $0x0,(%rax) /* is byte NUL? */
- je 2f /* yes => return */
- incq %rax /* increment pointer */
- decl %ecx
- jnz 0b
-
-1: movq $0xfefefefefefefeff,%r8 /* Save magic. */
-
- .p2align 4 /* Align loop. */
-4: /* Main Loop is unrolled 4 times. */
- /* First unroll. */
- movq (%rax), %rcx /* get double word (= 8 bytes) in question */
- addq $8,%rax /* adjust pointer for next word */
- movq %r8, %rdx /* magic value */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz 3f /* found NUL => return pointer */
-
- /* Second unroll. */
- movq (%rax), %rcx /* get double word (= 8 bytes) in question */
- addq $8,%rax /* adjust pointer for next word */
- movq %r8, %rdx /* magic value */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz 3f /* found NUL => return pointer */
-
- /* Third unroll. */
- movq (%rax), %rcx /* get double word (= 8 bytes) in question */
- addq $8,%rax /* adjust pointer for next word */
- movq %r8, %rdx /* magic value */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz 3f /* found NUL => return pointer */
-
- /* Fourth unroll. */
- movq (%rax), %rcx /* get double word (= 8 bytes) in question */
- addq $8,%rax /* adjust pointer for next word */
- movq %r8, %rdx /* magic value */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jz 4b /* no NUL found => continue loop */
-
- .p2align 4 /* Align, it's a jump target. */
-3: subq $8,%rax /* correct pointer increment. */
-
- testb %cl, %cl /* is first byte NUL? */
- jz 2f /* yes => return */
- incq %rax /* increment pointer */
-
- testb %ch, %ch /* is second byte NUL? */
- jz 2f /* yes => return */
- incq %rax /* increment pointer */
-
- testl $0x00ff0000, %ecx /* is third byte NUL? */
- jz 2f /* yes => return pointer */
- incq %rax /* increment pointer */
-
- testl $0xff000000, %ecx /* is fourth byte NUL? */
- jz 2f /* yes => return pointer */
- incq %rax /* increment pointer */
-
- shrq $32, %rcx /* look at other half. */
-
- testb %cl, %cl /* is first byte NUL? */
- jz 2f /* yes => return */
- incq %rax /* increment pointer */
-
- testb %ch, %ch /* is second byte NUL? */
- jz 2f /* yes => return */
- incq %rax /* increment pointer */
-
- testl $0xff0000, %ecx /* is third byte NUL? */
- jz 2f /* yes => return pointer */
- incq %rax /* increment pointer */
-2:
- subq %rdi, %rax /* compute difference to string start */
- ret
-END (strlen)
-libc_hidden_builtin_def (strlen)
+#include "sysdeps/x86_64/strlen.S"
diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
index f83d857..ce91919 100644
--- a/sysdeps/x86_64/strlen.S
+++ b/sysdeps/x86_64/strlen.S
@@ -1,7 +1,4 @@
-/* strlen(str) -- determine the length of the string STR.
- Copyright (C) 2009, 2010 Free Software Foundation, Inc.
- Contributed by Ulrich Drepper <drepper@redhat.com>.
- This file is part of the GNU C Library.
+/* Copyright (C) 2012 Free Software Foundation, Inc.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -17,85 +14,159 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
+#include <sysdep.h>
-
- .text
+.text
ENTRY(strlen)
- xor %rax, %rax
- mov %edi, %ecx
- and $0x3f, %ecx
- pxor %xmm0, %xmm0
- cmp $0x30, %ecx
- ja L(next)
- movdqu (%rdi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %edx
- test %edx, %edx
- jnz L(exit_less16)
- mov %rdi, %rax
- and $-16, %rax
- jmp L(align16_start)
-L(next):
- mov %rdi, %rax
- and $-16, %rax
- pcmpeqb (%rax), %xmm0
- mov $-1, %esi
- sub %rax, %rcx
- shl %cl, %esi
- pmovmskb %xmm0, %edx
- and %esi, %edx
- jnz L(exit)
-L(align16_start):
- pxor %xmm0, %xmm0
- pxor %xmm1, %xmm1
- pxor %xmm2, %xmm2
- pxor %xmm3, %xmm3
- .p2align 4
-L(align16_loop):
- pcmpeqb 16(%rax), %xmm0
- pmovmskb %xmm0, %edx
- test %edx, %edx
- jnz L(exit16)
-
- pcmpeqb 32(%rax), %xmm1
- pmovmskb %xmm1, %edx
- test %edx, %edx
- jnz L(exit32)
-
- pcmpeqb 48(%rax), %xmm2
- pmovmskb %xmm2, %edx
- test %edx, %edx
- jnz L(exit48)
-
- pcmpeqb 64(%rax), %xmm3
- pmovmskb %xmm3, %edx
- lea 64(%rax), %rax
- test %edx, %edx
- jz L(align16_loop)
-L(exit):
- sub %rdi, %rax
-L(exit_less16):
- bsf %rdx, %rdx
- add %rdx, %rax
- ret
- .p2align 4
-L(exit16):
- sub %rdi, %rax
- bsf %rdx, %rdx
- lea 16(%rdx,%rax), %rax
- ret
- .p2align 4
-L(exit32):
- sub %rdi, %rax
- bsf %rdx, %rdx
- lea 32(%rdx,%rax), %rax
- ret
- .p2align 4
-L(exit48):
- sub %rdi, %rax
- bsf %rdx, %rdx
- lea 48(%rdx,%rax), %rax
- ret
+
+#ifdef AS_STRCAT
+#define RETURN jmp .cpy_str
+#elif defined(AS_STRNCAT)
+#define RETURN jmp .cpy_str
+mov %rdx,%r9
+#else
+#define RETURN ret
+#endif
+
+#ifdef AS_STRNLEN
+test %rsi,%rsi
+jne .l1
+xor %rax,%rax
+RETURN
+.l1:
+
+#define STRNLEN_PROLOG(lab)\
+ mov %rsi,%r8;\
+ andq $-64,%rax;\
+ addq %rdi,%rsi;\
+ subq %rax,%rsi;\
+ addq %rcx,%r8;\
+ testq %r9,%r8;\
+ jne .##lab ;\
+ bts %r8,%rdx;\
+ .##lab##:
+#else
+#define STRNLEN_PROLOG(lab) andq $-64,%rax
+#endif
+
+#define FIND_ZERO \
+ pcmpeqb (%rax),%xmm8 ;\
+ pcmpeqb 16(%rax),%xmm9;\
+ pmovmskb %xmm8, %r8d;\
+ pcmpeqb 32(%rax),%xmm10;\
+ pmovmskb %xmm9, %edx;\
+ pcmpeqb 48(%rax),%xmm11;\
+ salq $16, %rdx;\
+ pmovmskb %xmm10, %r10d;\
+ pmovmskb %xmm11, %ecx;\
+ salq $16, %rcx;\
+ orq %r8, %rdx;\
+ orq %r10, %rcx;\
+ salq $32, %rcx;\
+ orq %rcx, %rdx;
+
+
+#define PROLOG(lab) \
+ FIND_ZERO;\
+ movq %rdi, %rcx;\
+ xorq %rax,%rcx;\
+ STRNLEN_PROLOG(lab);\
+ sarq %cl,%rdx;\
+ test %rdx, %rdx;\
+
+
+ # strlen spends 99% time on first 80 characters which we optimize
+ movq %rdi, %rax
+ pxor %xmm8,%xmm8
+ movq %rdi, %r8
+ pxor %xmm9,%xmm9
+ andq $4095, %r8
+ pxor %xmm10,%xmm10
+ pxor %xmm11,%xmm11
+#ifdef AS_STRNLEN
+ movq $-64, %r9
+#endif
+ cmpq $4032, %r8
+#ifndef LINE_ALIGNED_START
+ # we cannot unify this branch with next as it is ~6 cycles slower.
+ ja .next
+ andq $-16,%rax
+ PROLOG(fall1)
+ je .L16
+ bsfq %rdx, %rax
+ RETURN
+ .next:
+#endif
+
+ andq $-64,%rax
+ PROLOG(fall2)
+ pxor %xmm11,%xmm11
+ je .L16
+ bsfq %rdx, %rax
+ RETURN
+
+ #.p2align 4,,10
+ #.p2align 3
+.L19:
+ addq $64, %rax
+.L17:
+ pxor %xmm8,%xmm8
+ pxor %xmm9,%xmm9
+ pxor %xmm10,%xmm10
+ pxor %xmm11,%xmm11
+#ifdef AS_STRNLEN
+ testq %rsi,%rsi
+ jne .fall3
+ subq %rdi,%rax
+ RETURN
+ .fall3:
+ FIND_ZERO
+ testq %r9,%rsi
+ jne .fall4
+ bts %rsi,%rdx
+ .fall4:
+#else
+ FIND_ZERO
+#endif
+
+ bsfq %rdx, %rdx
+ addq %rdx, %rax
+ subq %rdi, %rax
+ RETURN
+
+ #.p2align 4,,10
+ #.p2align 3
+.L16:
+ #ifdef AS_STRNLEN
+ addq %r9,%rsi
+ testq %r9,%rsi
+ je .L19
+ #endif
+ #prefetcht0 576(%rax)
+ movdqa 64(%rax), %xmm8
+ pminub 80(%rax), %xmm8
+ pminub 96(%rax), %xmm8
+ pminub 112(%rax), %xmm8
+ pcmpeqb %xmm11, %xmm8
+ pmovmskb %xmm8, %edx
+ testl %edx, %edx
+ jne .L19
+ subq $-128, %rax
+ #ifdef AS_STRNLEN
+ addq %r9,%rsi
+ testq %r9,%rsi
+ je .L17
+ #endif
+ #prefetcht0 512(%rax)
+ movdqa (%rax), %xmm8
+ pminub 16(%rax), %xmm8
+ pminub 32(%rax), %xmm8
+ pminub 48(%rax), %xmm8
+ pcmpeqb %xmm11, %xmm8
+ pmovmskb %xmm8, %edx
+ testl %edx, %edx
+ je .L16
+ jmp .L17
END(strlen)
libc_hidden_builtin_def (strlen)
+weak_alias(strlen,__strlen_sse2)
diff --git a/sysdeps/x86_64/strnlen.S b/sysdeps/x86_64/strnlen.S
index 7b38bf4..dc393a4 100644
--- a/sysdeps/x86_64/strnlen.S
+++ b/sysdeps/x86_64/strnlen.S
@@ -1,63 +1,6 @@
-/* strnlen(str,maxlen) -- determine the length of the string STR up to MAXLEN.
- Copyright (C) 2010 Free Software Foundation, Inc.
- Contributed by Ulrich Drepper <drepper@redhat.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-
- .text
-ENTRY(__strnlen)
- movq %rsi, %rax
- testq %rsi, %rsi
- jz 3f
- pxor %xmm2, %xmm2
- movq %rdi, %rcx
- movq %rdi, %r8
- movq $16, %r9
- andq $~15, %rdi
- movdqa %xmm2, %xmm1
- pcmpeqb (%rdi), %xmm2
- orl $0xffffffff, %r10d
- subq %rdi, %rcx
- shll %cl, %r10d
- subq %rcx, %r9
- pmovmskb %xmm2, %edx
- andl %r10d, %edx
- jnz 1f
- subq %r9, %rsi
- jbe 3f
-
-2: movdqa 16(%rdi), %xmm0
- leaq 16(%rdi), %rdi
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %edx
- testl %edx, %edx
- jnz 1f
- subq $16, %rsi
- jnbe 2b
-3: ret
-
-1: subq %r8, %rdi
- bsfl %edx, %edx
- addq %rdi, %rdx
- cmpq %rdx, %rax
- cmovnbq %rdx, %rax
- ret
-END(__strnlen)
-weak_alias (__strnlen, strnlen)
-libc_hidden_def (strnlen)
+#define AS_STRNLEN
+#define strlen __strnlen
+#define __strlen_sse2 __strnlen_sse2
+#include "sysdeps/x86_64/strlen.S"
+weak_alias(__strnlen,strnlen);
+weak_alias(__strnlen,__GI_strnlen);
-----------------------------------------------------------------------
hooks/post-receive
--
GNU C Library master sources