This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch neleai/strlen updated. glibc-2.16-ports-merge-814-gbb2a289
- From: neleai at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 4 Dec 2012 17:15:31 -0000
- Subject: GNU C Library master sources branch neleai/strlen updated. glibc-2.16-ports-merge-814-gbb2a289
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, neleai/strlen has been updated
via bb2a2896dc6a72967cc792043ab691d045587136 (commit)
from d389b47d7a51af5e93bbc5543ab829e326f425fa (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=bb2a2896dc6a72967cc792043ab691d045587136
commit bb2a2896dc6a72967cc792043ab691d045587136
Author: Ondrej Bilka <neleai@seznam.cz>
Date: Tue Dec 4 18:58:53 2012 +0100
Testing avx2 implementation.
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 6b07afa..299038c 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -10,14 +10,14 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
- strncase_l-ssse3 strlen-sse4 strlen-sse2-no-bsf memset-x86-64 \
+ strncase_l-ssse3 memset-x86-64 \
strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
strcpy-sse2-unaligned strncpy-sse2-unaligned \
stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
strcat-sse2-unaligned strncat-sse2-unaligned \
- strcat-ssse3 strncat-ssse3 strlen-sse2-pminub \
+ strcat-ssse3 strncat-ssse3 \
strrchr-sse2-no-bsf strchr-sse2-no-bsf \
- memcmp-ssse3
+ memcmp-ssse3 strlen-avx2
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
CFLAGS-varshift.c += -msse4
@@ -30,6 +30,8 @@ CFLAGS-strcasestr-nonascii.c += -msse4
endif
endif
+CPPFLAGS-strlen-avx2.S += -mavx2
+
ifeq ($(subdir),wcsmbs)
sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcscpy-ssse3 wcscpy-c
endif
diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S
new file mode 100644
index 0000000..ef1da34
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strlen-avx2.S
@@ -0,0 +1,73 @@
+/* strlen_avx2(str) -- determine the length of the string STR.
+ Copyright (C) 2012 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+
+ .text
+ENTRY(strlen_avx2)
+ movq %rdi, %rax
+ vpxor %ymm8, %ymm8, %ymm8
+ andq $-64, %rax
+ vpcmpeqb (%rax), %ymm8, %ymm9
+ vpcmpeqb 32(%rax), %ymm8, %ymm10
+ vpmovmskb %ymm9, %rcx
+ vpmovmskb %ymm10, %rdx
+ salq $32, %rdx
+ orq %rcx, %rdx
+ movl %edi, %ecx
+# andl $63, %ecx # x64 shift do modulo implicitly
+ sarq %cl, %rdx
+ testq %rdx,%rdx
+ je L(loop)
+ bsfq %rdx, %rax
+ ret
+
+ .p2align 4,,10
+ .p2align 3
+L(ret0):
+ addq $64, %rax
+L(ret64):
+ vpmovmskb %ymm9, %rcx
+ vpmovmskb %ymm10, %rdx
+ salq $32, %rdx
+ orq %rcx, %rdx
+ bsfq %rdx, %rdx
+ addq %rdx, %rax
+ subq %rdi, %rax
+ ret
+
+ .p2align 4,,10
+ .p2align 3
+L(loop):
+ vpcmpeqb 64(%rax), %ymm8, %ymm9
+ vpcmpeqb 96(%rax), %ymm8, %ymm10
+ vpor %ymm9, %ymm10, %ymm11
+ vpmovmskb %ymm11, %edx
+ testl %edx, %edx
+ jne L(ret0)
+ subq $-128, %rax
+ vpcmpeqb (%rax), %ymm8, %ymm9
+ vpcmpeqb 32(%rax), %ymm8, %ymm10
+ vpor %ymm9, %ymm10, %ymm11
+ vpmovmskb %ymm11, %edx
+ testl %edx, %edx
+ je L(loop)
+ jmp L(ret64)
+END(strlen_avx2)
+libc_hidden_builtin_def (strlen_avx2)
-----------------------------------------------------------------------
Summary of changes:
sysdeps/x86_64/multiarch/Makefile | 8 ++-
sysdeps/x86_64/multiarch/strlen-avx2.S | 73 ++++++++++++++++++++++++++++++++
2 files changed, 78 insertions(+), 3 deletions(-)
create mode 100644 sysdeps/x86_64/multiarch/strlen-avx2.S
hooks/post-receive
--
GNU C Library master sources