This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch hjl/wcsrchr/sse2 updated. glibc-2.25-375-g2a96f1f


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, hjl/wcsrchr/sse2 has been updated
       via  2a96f1f4456d4efaa1757e664cef4effa716b131 (commit)
      from  9b644f055edef77a3fe02cb582a7e904b539388f (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=2a96f1f4456d4efaa1757e664cef4effa716b131

commit 2a96f1f4456d4efaa1757e664cef4effa716b131
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri May 26 05:58:46 2017 -0700

    Add __wcsrchr_old_2

diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 3736f54..7b1d291 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -30,5 +30,6 @@ CFLAGS-strspn-c.c += -msse4
 endif
 
 ifeq ($(subdir),wcsmbs)
-sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcscpy-ssse3 wcscpy-c
+sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcscpy-ssse3 \
+		   wcscpy-c wcsrchr-old wcsrchr-old-2
 endif
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 06d9a9d..49dd07b 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -286,6 +286,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2))
 
+  /* Support sysdeps/x86_64/multiarch/wcsrchr.S.  */
+  IFUNC_IMPL (i, name, wcsrchr,
+	      IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr)
+	      IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_old_2)
+	      IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_old))
+
   /* Support sysdeps/x86_64/multiarch/wcscpy.S.  */
   IFUNC_IMPL (i, name, wcscpy,
 	      IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3),
diff --git a/sysdeps/x86_64/multiarch/wcsrchr-old-2.S b/sysdeps/x86_64/multiarch/wcsrchr-old-2.S
new file mode 100644
index 0000000..52a2b6e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsrchr-old-2.S
@@ -0,0 +1,265 @@
+/* wcsrchr with SSSE3
+   Copyright (C) 2011-2017 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.text
+ENTRY (__wcsrchr_old_2)
+
+	movd	%rsi, %xmm1
+	mov	%rdi, %rcx
+	punpckldq %xmm1, %xmm1
+	pxor	%xmm2, %xmm2
+	punpckldq %xmm1, %xmm1
+	and	$63, %rcx
+	cmp	$48, %rcx
+	ja	L(crosscache)
+
+	movdqu	(%rdi), %xmm0
+	pcmpeqd	%xmm0, %xmm2
+	pcmpeqd	%xmm1, %xmm0
+	pmovmskb %xmm2, %rcx
+	pmovmskb %xmm0, %rax
+	add	$16, %rdi
+
+	test	%rax, %rax
+	jnz	L(unaligned_match1)
+
+	test	%rcx, %rcx
+	jnz	L(return_null)
+
+	and	$-16, %rdi
+	xor	%r8, %r8
+	jmp	L(loop)
+
+	.p2align 4
+L(unaligned_match1):
+	test	%rcx, %rcx
+	jnz	L(prolog_find_zero_1)
+
+	mov	%rax, %r8
+	mov	%rdi, %rsi
+	and	$-16, %rdi
+	jmp	L(loop)
+
+	.p2align 4
+L(crosscache):
+	and	$15, %rcx
+	and	$-16, %rdi
+	pxor	%xmm3, %xmm3
+	movdqa	(%rdi), %xmm0
+	pcmpeqd	%xmm0, %xmm3
+	pcmpeqd	%xmm1, %xmm0
+	pmovmskb %xmm3, %rdx
+	pmovmskb %xmm0, %rax
+	shr	%cl, %rdx
+	shr	%cl, %rax
+	add	$16, %rdi
+
+	test	%rax, %rax
+	jnz	L(unaligned_match)
+
+	test	%rdx, %rdx
+	jnz	L(return_null)
+
+	xor	%r8, %r8
+	jmp	L(loop)
+
+	.p2align 4
+L(unaligned_match):
+	test	%rdx, %rdx
+	jnz	L(prolog_find_zero)
+
+	mov	%rax, %r8
+	lea	(%rdi, %rcx), %rsi
+
+/* Loop start on aligned string.  */
+	.p2align 4
+L(loop):
+	movdqa	(%rdi), %xmm0
+	pcmpeqd	%xmm0, %xmm2
+	add	$16, %rdi
+	pcmpeqd	%xmm1, %xmm0
+	pmovmskb %xmm2, %rcx
+	pmovmskb %xmm0, %rax
+	or	%rax, %rcx
+	jnz	L(matches)
+
+	movdqa	(%rdi), %xmm3
+	pcmpeqd	%xmm3, %xmm2
+	add	$16, %rdi
+	pcmpeqd	%xmm1, %xmm3
+	pmovmskb %xmm2, %rcx
+	pmovmskb %xmm3, %rax
+	or	%rax, %rcx
+	jnz	L(matches)
+
+	movdqa	(%rdi), %xmm4
+	pcmpeqd	%xmm4, %xmm2
+	add	$16, %rdi
+	pcmpeqd	%xmm1, %xmm4
+	pmovmskb %xmm2, %rcx
+	pmovmskb %xmm4, %rax
+	or	%rax, %rcx
+	jnz	L(matches)
+
+	movdqa	(%rdi), %xmm5
+	pcmpeqd	%xmm5, %xmm2
+	add	$16, %rdi
+	pcmpeqd	%xmm1, %xmm5
+	pmovmskb %xmm2, %rcx
+	pmovmskb %xmm5, %rax
+	or	%rax, %rcx
+	jz	L(loop)
+
+	.p2align 4
+L(matches):
+	test	%rax, %rax
+	jnz	L(match)
+L(return_value):
+	test	%r8, %r8
+	jz	L(return_null)
+	mov	%r8, %rax
+	mov	%rsi, %rdi
+
+	andl	$0x11111111, %eax
+	bsrl	%eax, %eax
+	lea	-16(%rdi, %rax), %rax
+	ret
+
+	.p2align 4
+L(match):
+	pmovmskb %xmm2, %rcx
+	test	%rcx, %rcx
+	jnz	L(find_zero)
+	mov	%rax, %r8
+	mov	%rdi, %rsi
+	jmp	L(loop)
+
+	.p2align 4
+L(find_zero):
+	test	$15, %cl
+	jnz	L(find_zero_in_first_wchar)
+	test	%cl, %cl
+	jnz	L(find_zero_in_second_wchar)
+	test	$15, %ch
+	jnz	L(find_zero_in_third_wchar)
+
+	and	$1 << 13 - 1, %rax
+	jz	L(return_value)
+
+	andl	$0x11111111, %eax
+	bsrl	%eax, %eax
+	lea	-16(%rdi, %rax), %rax
+	ret
+
+	.p2align 4
+L(find_zero_in_first_wchar):
+	test	$1, %rax
+	jz	L(return_value)
+	lea	-16(%rdi), %rax
+	ret
+
+	.p2align 4
+L(find_zero_in_second_wchar):
+	and	$1 << 5 - 1, %rax
+	jz	L(return_value)
+
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%rdi), %rax
+	ret
+
+	.p2align 4
+L(find_zero_in_third_wchar):
+	and	$1 << 9 - 1, %rax
+	jz	L(return_value)
+
+	test	%ah, %ah
+	jnz	L(match_third_wchar)
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%rdi), %rax
+	ret
+
+	.p2align 4
+L(prolog_find_zero):
+	add	%rcx, %rdi
+	mov     %rdx, %rcx
+L(prolog_find_zero_1):
+	test	$15, %cl
+	jnz	L(prolog_find_zero_in_first_wchar)
+	test	%cl, %cl
+	jnz	L(prolog_find_zero_in_second_wchar)
+	test	$15, %ch
+	jnz	L(prolog_find_zero_in_third_wchar)
+
+	and	$1 << 13 - 1, %rax
+	jz	L(return_null)
+
+	andl	$0x11111111, %eax
+	bsrl	%eax, %eax
+	lea	-16(%rdi, %rax), %rax
+	ret
+
+	.p2align 4
+L(prolog_find_zero_in_first_wchar):
+	test	$1, %rax
+	jz	L(return_null)
+	lea	-16(%rdi), %rax
+	ret
+
+	.p2align 4
+L(prolog_find_zero_in_second_wchar):
+	and	$1 << 5 - 1, %rax
+	jz	L(return_null)
+
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%rdi), %rax
+	ret
+
+	.p2align 4
+L(prolog_find_zero_in_third_wchar):
+	and	$1 << 9 - 1, %rax
+	jz	L(return_null)
+
+	test	%ah, %ah
+	jnz	L(match_third_wchar)
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%rdi), %rax
+	ret
+
+	.p2align 4
+L(match_second_wchar):
+	lea	-12(%rdi), %rax
+	ret
+
+	.p2align 4
+L(match_third_wchar):
+	lea	-8(%rdi), %rax
+	ret
+
+	.p2align 4
+L(return_null):
+	xor	%rax, %rax
+	ret
+
+END (__wcsrchr_old_2)
diff --git a/sysdeps/x86_64/multiarch/wcsrchr-old.S b/sysdeps/x86_64/multiarch/wcsrchr-old.S
new file mode 100644
index 0000000..e61fff1
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsrchr-old.S
@@ -0,0 +1,282 @@
+/* wcsrchr with SSSE3
+   Copyright (C) 2011-2017 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.text
+ENTRY (__wcsrchr_old)
+
+	movd	%rsi, %xmm1
+	mov	%rdi, %rcx
+	punpckldq %xmm1, %xmm1
+	pxor	%xmm2, %xmm2
+	punpckldq %xmm1, %xmm1
+	and	$63, %rcx
+	cmp	$48, %rcx
+	ja	L(crosscache)
+
+	movdqu	(%rdi), %xmm0
+	pcmpeqd	%xmm0, %xmm2
+	pcmpeqd	%xmm1, %xmm0
+	pmovmskb %xmm2, %rcx
+	pmovmskb %xmm0, %rax
+	add	$16, %rdi
+
+	test	%rax, %rax
+	jnz	L(unaligned_match1)
+
+	test	%rcx, %rcx
+	jnz	L(return_null)
+
+	and	$-16, %rdi
+	xor	%r8, %r8
+	jmp	L(loop)
+
+	.p2align 4
+L(unaligned_match1):
+	test	%rcx, %rcx
+	jnz	L(prolog_find_zero_1)
+
+	mov	%rax, %r8
+	mov	%rdi, %rsi
+	and	$-16, %rdi
+	jmp	L(loop)
+
+	.p2align 4
+L(crosscache):
+	and	$15, %rcx
+	and	$-16, %rdi
+	pxor	%xmm3, %xmm3
+	movdqa	(%rdi), %xmm0
+	pcmpeqd	%xmm0, %xmm3
+	pcmpeqd	%xmm1, %xmm0
+	pmovmskb %xmm3, %rdx
+	pmovmskb %xmm0, %rax
+	shr	%cl, %rdx
+	shr	%cl, %rax
+	add	$16, %rdi
+
+	test	%rax, %rax
+	jnz	L(unaligned_match)
+
+	test	%rdx, %rdx
+	jnz	L(return_null)
+
+	xor	%r8, %r8
+	jmp	L(loop)
+
+	.p2align 4
+L(unaligned_match):
+	test	%rdx, %rdx
+	jnz	L(prolog_find_zero)
+
+	mov	%rax, %r8
+	lea	(%rdi, %rcx), %rsi
+
+/* Loop start on aligned string.  */
+	.p2align 4
+L(loop):
+	movdqa	(%rdi), %xmm0
+	pcmpeqd	%xmm0, %xmm2
+	add	$16, %rdi
+	pcmpeqd	%xmm1, %xmm0
+	pmovmskb %xmm2, %rcx
+	pmovmskb %xmm0, %rax
+	or	%rax, %rcx
+	jnz	L(matches)
+
+	movdqa	(%rdi), %xmm3
+	pcmpeqd	%xmm3, %xmm2
+	add	$16, %rdi
+	pcmpeqd	%xmm1, %xmm3
+	pmovmskb %xmm2, %rcx
+	pmovmskb %xmm3, %rax
+	or	%rax, %rcx
+	jnz	L(matches)
+
+	movdqa	(%rdi), %xmm4
+	pcmpeqd	%xmm4, %xmm2
+	add	$16, %rdi
+	pcmpeqd	%xmm1, %xmm4
+	pmovmskb %xmm2, %rcx
+	pmovmskb %xmm4, %rax
+	or	%rax, %rcx
+	jnz	L(matches)
+
+	movdqa	(%rdi), %xmm5
+	pcmpeqd	%xmm5, %xmm2
+	add	$16, %rdi
+	pcmpeqd	%xmm1, %xmm5
+	pmovmskb %xmm2, %rcx
+	pmovmskb %xmm5, %rax
+	or	%rax, %rcx
+	jz	L(loop)
+
+	.p2align 4
+L(matches):
+	test	%rax, %rax
+	jnz	L(match)
+L(return_value):
+	test	%r8, %r8
+	jz	L(return_null)
+	mov	%r8, %rax
+	mov	%rsi, %rdi
+
+	test	$15 << 4, %ah
+	jnz	L(match_fourth_wchar)
+	test	%ah, %ah
+	jnz	L(match_third_wchar)
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%rdi), %rax
+	ret
+
+	.p2align 4
+L(match):
+	pmovmskb %xmm2, %rcx
+	test	%rcx, %rcx
+	jnz	L(find_zero)
+	mov	%rax, %r8
+	mov	%rdi, %rsi
+	jmp	L(loop)
+
+	.p2align 4
+L(find_zero):
+	test	$15, %cl
+	jnz	L(find_zero_in_first_wchar)
+	test	%cl, %cl
+	jnz	L(find_zero_in_second_wchar)
+	test	$15, %ch
+	jnz	L(find_zero_in_third_wchar)
+
+	and	$1 << 13 - 1, %rax
+	jz	L(return_value)
+
+	test	$15 << 4, %ah
+	jnz	L(match_fourth_wchar)
+	test	%ah, %ah
+	jnz	L(match_third_wchar)
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%rdi), %rax
+	ret
+
+	.p2align 4
+L(find_zero_in_first_wchar):
+	test	$1, %rax
+	jz	L(return_value)
+	lea	-16(%rdi), %rax
+	ret
+
+	.p2align 4
+L(find_zero_in_second_wchar):
+	and	$1 << 5 - 1, %rax
+	jz	L(return_value)
+
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%rdi), %rax
+	ret
+
+	.p2align 4
+L(find_zero_in_third_wchar):
+	and	$1 << 9 - 1, %rax
+	jz	L(return_value)
+
+	test	%ah, %ah
+	jnz	L(match_third_wchar)
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%rdi), %rax
+	ret
+
+	.p2align 4
+L(prolog_find_zero):
+	add	%rcx, %rdi
+	mov     %rdx, %rcx
+L(prolog_find_zero_1):
+	test	$15, %cl
+	jnz	L(prolog_find_zero_in_first_wchar)
+	test	%cl, %cl
+	jnz	L(prolog_find_zero_in_second_wchar)
+	test	$15, %ch
+	jnz	L(prolog_find_zero_in_third_wchar)
+
+	and	$1 << 13 - 1, %rax
+	jz	L(return_null)
+
+	test	$15 << 4, %ah
+	jnz	L(match_fourth_wchar)
+	test	%ah, %ah
+	jnz	L(match_third_wchar)
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%rdi), %rax
+	ret
+
+	.p2align 4
+L(prolog_find_zero_in_first_wchar):
+	test	$1, %rax
+	jz	L(return_null)
+	lea	-16(%rdi), %rax
+	ret
+
+	.p2align 4
+L(prolog_find_zero_in_second_wchar):
+	and	$1 << 5 - 1, %rax
+	jz	L(return_null)
+
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%rdi), %rax
+	ret
+
+	.p2align 4
+L(prolog_find_zero_in_third_wchar):
+	and	$1 << 9 - 1, %rax
+	jz	L(return_null)
+
+	test	%ah, %ah
+	jnz	L(match_third_wchar)
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%rdi), %rax
+	ret
+
+	.p2align 4
+L(match_second_wchar):
+	lea	-12(%rdi), %rax
+	ret
+
+	.p2align 4
+L(match_third_wchar):
+	lea	-8(%rdi), %rax
+	ret
+
+	.p2align 4
+L(match_fourth_wchar):
+	lea	-4(%rdi), %rax
+	ret
+
+	.p2align 4
+L(return_null):
+	xor	%rax, %rax
+	ret
+
+END (__wcsrchr_old)
diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S
index 9bfd754..6b570c9 100644
--- a/sysdeps/x86_64/wcsrchr.S
+++ b/sysdeps/x86_64/wcsrchr.S
@@ -2,3 +2,5 @@
 #define strrchr wcsrchr
 
 #include "strrchr.S"
+
+strong_alias (wcsrchr, __wcsrchr)

-----------------------------------------------------------------------

Summary of changes:
 sysdeps/x86_64/multiarch/Makefile          |    3 +-
 sysdeps/x86_64/multiarch/ifunc-impl-list.c |    6 +
 sysdeps/x86_64/multiarch/wcsrchr-old-2.S   |  265 ++++++++++++++++++++++++++
 sysdeps/x86_64/multiarch/wcsrchr-old.S     |  282 ++++++++++++++++++++++++++++
 sysdeps/x86_64/wcsrchr.S                   |    2 +
 5 files changed, 557 insertions(+), 1 deletions(-)
 create mode 100644 sysdeps/x86_64/multiarch/wcsrchr-old-2.S
 create mode 100644 sysdeps/x86_64/multiarch/wcsrchr-old.S


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]