This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch hjl/wcsrchr/sse2 created. glibc-2.25-374-g7e79614


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, hjl/wcsrchr/sse2 has been created
        at  7e79614553a30a2a70d7039072bcfe58c27eb637 (commit)

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=7e79614553a30a2a70d7039072bcfe58c27eb637

commit 7e79614553a30a2a70d7039072bcfe58c27eb637
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Thu May 25 20:58:44 2017 -0700

    Bad wcsrchr.S

diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S
index e6a33bc..a18b40f 100644
--- a/sysdeps/x86_64/strrchr.S
+++ b/sysdeps/x86_64/strrchr.S
@@ -19,31 +19,50 @@
 
 #include <sysdep.h>
 
+#ifdef USE_AS_WCSRCHR
+# define PCMPEQ	pcmpeqd
+# define PMINU	pminud
+#else
+# define PCMPEQ	pcmpeqb
+# define PMINU	pminub
+#endif
+
 	.text
 ENTRY (strrchr)
 	movd	%esi, %xmm1
 	movq	%rdi, %rax
 	andl	$4095, %eax
+#ifndef USE_AS_WCSRCHR
 	punpcklbw	%xmm1, %xmm1
-	cmpq	$4032, %rax
 	punpcklwd	%xmm1, %xmm1
+#endif
 	pshufd	$0, %xmm1, %xmm1
+	cmpq	$4032, %rax
 	ja	L(cross_page)
 	movdqu	(%rdi), %xmm0
 	pxor	%xmm2, %xmm2
 	movdqa	%xmm0, %xmm3
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm3
+	PCMPEQ	%xmm1, %xmm0
+	PCMPEQ	%xmm2, %xmm3
 	pmovmskb	%xmm0, %ecx
 	pmovmskb	%xmm3, %edx
 	testq	%rdx, %rdx
 	je	L(next_48_bytes)
+#ifdef XUSE_AS_WCSRCHR
+	leaq	-4(%rdx), %rax
+#else
 	leaq	-1(%rdx), %rax
+#endif
 	xorq	%rdx, %rax
 	andq	%rcx, %rax
 	je	L(exit)
 	bsrq	%rax, %rax
 	addq	%rdi, %rax
+#ifdef USE_AS_WCSRCHR
+	subq	$3, %rax
+#endif
+	test	$1, %rax
+	jnz	L(hlt1)
 	ret
 
 	.p2align 4
@@ -51,21 +70,21 @@ L(next_48_bytes):
 	movdqu	16(%rdi), %xmm4
 	movdqa	%xmm4, %xmm5
 	movdqu	32(%rdi), %xmm3
-	pcmpeqb	%xmm1, %xmm4
-	pcmpeqb	%xmm2, %xmm5
+	PCMPEQ	%xmm1, %xmm4
+	PCMPEQ	%xmm2, %xmm5
 	movdqu	48(%rdi), %xmm0
 	pmovmskb	%xmm5, %edx
 	movdqa	%xmm3, %xmm5
-	pcmpeqb	%xmm1, %xmm3
-	pcmpeqb	%xmm2, %xmm5
-	pcmpeqb	%xmm0, %xmm2
+	PCMPEQ	%xmm1, %xmm3
+	PCMPEQ	%xmm2, %xmm5
+	PCMPEQ	%xmm0, %xmm2
 	salq	$16, %rdx
 	pmovmskb	%xmm3, %r8d
 	pmovmskb	%xmm5, %eax
 	pmovmskb	%xmm2, %esi
 	salq	$32, %r8
 	salq	$32, %rax
-	pcmpeqb	%xmm1, %xmm0
+	PCMPEQ	%xmm1, %xmm0
 	orq	%rdx, %rax
 	movq	%rsi, %rdx
 	pmovmskb	%xmm4, %esi
@@ -78,12 +97,22 @@ L(next_48_bytes):
 	orq	%rcx, %rsi
 	orq	%rdx, %rax
 	je	L(loop_header2)
+#ifdef XUSE_AS_WCSRCHR
+	leaq	-4(%rax), %rcx
+#else
 	leaq	-1(%rax), %rcx
+#endif
 	xorq	%rax, %rcx
 	andq	%rcx, %rsi
 	je	L(exit)
 	bsrq	%rsi, %rsi
+#ifdef USE_AS_WCSRCHR
+	leaq	-3(%rdi,%rsi), %rax
+#else
 	leaq	(%rdi,%rsi), %rax
+#endif
+	test	$1, %rax
+	jnz	L(hlt2)
 	ret
 
 	.p2align 4
@@ -109,24 +138,24 @@ L(loop_entry):
 	movdqa	48(%rdi), %xmm2
 	movdqa	%xmm3, %xmm0
 	movdqa	16(%rdi), %xmm4
-	pminub	%xmm2, %xmm0
+	PMINU	%xmm2, %xmm0
 	movdqa	(%rdi), %xmm5
-	pminub	%xmm4, %xmm0
-	pminub	%xmm5, %xmm0
-	pcmpeqb	%xmm7, %xmm0
+	PMINU	%xmm4, %xmm0
+	PMINU	%xmm5, %xmm0
+	PCMPEQ	%xmm7, %xmm0
 	pmovmskb	%xmm0, %eax
 	movdqa	%xmm5, %xmm0
-	pcmpeqb	%xmm1, %xmm0
+	PCMPEQ	%xmm1, %xmm0
 	pmovmskb	%xmm0, %r9d
 	movdqa	%xmm4, %xmm0
-	pcmpeqb	%xmm1, %xmm0
+	PCMPEQ	%xmm1, %xmm0
 	pmovmskb	%xmm0, %edx
 	movdqa	%xmm3, %xmm0
-	pcmpeqb	%xmm1, %xmm0
+	PCMPEQ	%xmm1, %xmm0
 	salq	$16, %rdx
 	pmovmskb	%xmm0, %r10d
 	movdqa	%xmm2, %xmm0
-	pcmpeqb	%xmm1, %xmm0
+	PCMPEQ	%xmm1, %xmm0
 	salq	$32, %r10
 	orq	%r10, %rdx
 	pmovmskb	%xmm0, %r8d
@@ -135,12 +164,12 @@ L(loop_entry):
 	orq	%r8, %rdx
 	testl	%eax, %eax
 	je	L(loop64)
-	pcmpeqb	%xmm6, %xmm4
-	pcmpeqb	%xmm6, %xmm3
-	pcmpeqb	%xmm6, %xmm5
+	PCMPEQ	%xmm6, %xmm4
+	PCMPEQ	%xmm6, %xmm3
+	PCMPEQ	%xmm6, %xmm5
 	pmovmskb	%xmm4, %eax
 	pmovmskb	%xmm3, %r10d
-	pcmpeqb	%xmm6, %xmm2
+	PCMPEQ	%xmm6, %xmm2
 	pmovmskb	%xmm5, %r9d
 	salq	$32, %r10
 	salq	$16, %rax
@@ -149,18 +178,40 @@ L(loop_entry):
 	orq	%r9, %rax
 	salq	$48, %r8
 	orq	%r8, %rax
+#ifdef USE_AS_WCSRCHR
+	leaq	-4(%rax), %r8
+#else
 	leaq	-1(%rax), %r8
+#endif
 	xorq	%rax, %r8
 	andq	%r8, %rdx
 	cmovne	%rdi, %rcx
 	cmovne	%rdx, %rsi
 	bsrq	%rsi, %rsi
+#ifdef USE_AS_WCSRCHR
+	leaq	-3(%rcx,%rsi), %rax
+#else
 	leaq	(%rcx,%rsi), %rax
+#endif
+	test	$1, %rax
+	jnz	L(hlt3)
 	ret
 
+L(hlt1):
+	hlt
+L(hlt2):
+	hlt
+L(hlt3):
+	hlt
+L(hlt4):
+	hlt
 	.p2align 4
 L(no_c_found):
+#ifdef USE_AS_WCSRCHR
+	movl	$4, %esi
+#else
 	movl	$1, %esi
+#endif
 	xorl	%ecx, %ecx
 	jmp	L(loop_header)
 
@@ -177,26 +228,26 @@ L(cross_page):
 	movdqu	(%rax), %xmm5
 	movdqa	%xmm5, %xmm6
 	movdqu	16(%rax), %xmm4
-	pcmpeqb	%xmm1, %xmm5
-	pcmpeqb	%xmm0, %xmm6
+	PCMPEQ	%xmm1, %xmm5
+	PCMPEQ	%xmm0, %xmm6
 	movdqu	32(%rax), %xmm3
 	pmovmskb	%xmm6, %esi
 	movdqa	%xmm4, %xmm6
 	movdqu	48(%rax), %xmm2
-	pcmpeqb	%xmm1, %xmm4
-	pcmpeqb	%xmm0, %xmm6
+	PCMPEQ	%xmm1, %xmm4
+	PCMPEQ	%xmm0, %xmm6
 	pmovmskb	%xmm6, %edx
 	movdqa	%xmm3, %xmm6
-	pcmpeqb	%xmm1, %xmm3
-	pcmpeqb	%xmm0, %xmm6
-	pcmpeqb	%xmm2, %xmm0
+	PCMPEQ	%xmm1, %xmm3
+	PCMPEQ	%xmm0, %xmm6
+	PCMPEQ	%xmm2, %xmm0
 	salq	$16, %rdx
 	pmovmskb	%xmm3, %r9d
 	pmovmskb	%xmm6, %r8d
 	pmovmskb	%xmm0, %ecx
 	salq	$32, %r9
 	salq	$32, %r8
-	pcmpeqb	%xmm1, %xmm2
+	PCMPEQ	%xmm1, %xmm2
 	orq	%r8, %rdx
 	salq	$48, %rcx
 	pmovmskb	%xmm5, %r8d
@@ -215,14 +266,25 @@ L(cross_page):
 	shrq	%cl, %rsi
 	testq	%rdx, %rdx
 	je	L(loop_header2)
+#ifdef XUSE_AS_WCSRCHR
+	leaq	-4(%rdx), %rax
+#else
 	leaq	-1(%rdx), %rax
+#endif
 	xorq	%rdx, %rax
 	andq	%rax, %rsi
 	je	L(exit)
 	bsrq	%rsi, %rax
 	addq	%rdi, %rax
+#ifdef USE_AS_WCSRCHR
+	subq	$3, %rax
+#endif
+	test	$1, %rax
+	jnz	L(hlt4)
 	ret
 END (strrchr)
 
+#ifndef USE_AS_WCSRCHR
 weak_alias (strrchr, rindex)
 libc_hidden_builtin_def (strrchr)
+#endif
diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S
index a6c385c..9bfd754 100644
--- a/sysdeps/x86_64/wcsrchr.S
+++ b/sysdeps/x86_64/wcsrchr.S
@@ -1,282 +1,4 @@
-/* wcsrchr with SSSE3
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
+#define USE_AS_WCSRCHR 1
+#define strrchr wcsrchr
 
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-	.text
-ENTRY (wcsrchr)
-
-	movd	%rsi, %xmm1
-	mov	%rdi, %rcx
-	punpckldq %xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	punpckldq %xmm1, %xmm1
-	and	$63, %rcx
-	cmp	$48, %rcx
-	ja	L(crosscache)
-
-	movdqu	(%rdi), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %rcx
-	pmovmskb %xmm0, %rax
-	add	$16, %rdi
-
-	test	%rax, %rax
-	jnz	L(unaligned_match1)
-
-	test	%rcx, %rcx
-	jnz	L(return_null)
-
-	and	$-16, %rdi
-	xor	%r8, %r8
-	jmp	L(loop)
-
-	.p2align 4
-L(unaligned_match1):
-	test	%rcx, %rcx
-	jnz	L(prolog_find_zero_1)
-
-	mov	%rax, %r8
-	mov	%rdi, %rsi
-	and	$-16, %rdi
-	jmp	L(loop)
-
-	.p2align 4
-L(crosscache):
-	and	$15, %rcx
-	and	$-16, %rdi
-	pxor	%xmm3, %xmm3
-	movdqa	(%rdi), %xmm0
-	pcmpeqd	%xmm0, %xmm3
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm3, %rdx
-	pmovmskb %xmm0, %rax
-	shr	%cl, %rdx
-	shr	%cl, %rax
-	add	$16, %rdi
-
-	test	%rax, %rax
-	jnz	L(unaligned_match)
-
-	test	%rdx, %rdx
-	jnz	L(return_null)
-
-	xor	%r8, %r8
-	jmp	L(loop)
-
-	.p2align 4
-L(unaligned_match):
-	test	%rdx, %rdx
-	jnz	L(prolog_find_zero)
-
-	mov	%rax, %r8
-	lea	(%rdi, %rcx), %rsi
-
-/* Loop start on aligned string.  */
-	.p2align 4
-L(loop):
-	movdqa	(%rdi), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	add	$16, %rdi
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %rcx
-	pmovmskb %xmm0, %rax
-	or	%rax, %rcx
-	jnz	L(matches)
-
-	movdqa	(%rdi), %xmm3
-	pcmpeqd	%xmm3, %xmm2
-	add	$16, %rdi
-	pcmpeqd	%xmm1, %xmm3
-	pmovmskb %xmm2, %rcx
-	pmovmskb %xmm3, %rax
-	or	%rax, %rcx
-	jnz	L(matches)
-
-	movdqa	(%rdi), %xmm4
-	pcmpeqd	%xmm4, %xmm2
-	add	$16, %rdi
-	pcmpeqd	%xmm1, %xmm4
-	pmovmskb %xmm2, %rcx
-	pmovmskb %xmm4, %rax
-	or	%rax, %rcx
-	jnz	L(matches)
-
-	movdqa	(%rdi), %xmm5
-	pcmpeqd	%xmm5, %xmm2
-	add	$16, %rdi
-	pcmpeqd	%xmm1, %xmm5
-	pmovmskb %xmm2, %rcx
-	pmovmskb %xmm5, %rax
-	or	%rax, %rcx
-	jz	L(loop)
-
-	.p2align 4
-L(matches):
-	test	%rax, %rax
-	jnz	L(match)
-L(return_value):
-	test	%r8, %r8
-	jz	L(return_null)
-	mov	%r8, %rax
-	mov	%rsi, %rdi
-
-	test	$15 << 4, %ah
-	jnz	L(match_fourth_wchar)
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(match):
-	pmovmskb %xmm2, %rcx
-	test	%rcx, %rcx
-	jnz	L(find_zero)
-	mov	%rax, %r8
-	mov	%rdi, %rsi
-	jmp	L(loop)
-
-	.p2align 4
-L(find_zero):
-	test	$15, %cl
-	jnz	L(find_zero_in_first_wchar)
-	test	%cl, %cl
-	jnz	L(find_zero_in_second_wchar)
-	test	$15, %ch
-	jnz	L(find_zero_in_third_wchar)
-
-	and	$1 << 13 - 1, %rax
-	jz	L(return_value)
-
-	test	$15 << 4, %ah
-	jnz	L(match_fourth_wchar)
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(find_zero_in_first_wchar):
-	test	$1, %rax
-	jz	L(return_value)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(find_zero_in_second_wchar):
-	and	$1 << 5 - 1, %rax
-	jz	L(return_value)
-
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(find_zero_in_third_wchar):
-	and	$1 << 9 - 1, %rax
-	jz	L(return_value)
-
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(prolog_find_zero):
-	add	%rcx, %rdi
-	mov     %rdx, %rcx
-L(prolog_find_zero_1):
-	test	$15, %cl
-	jnz	L(prolog_find_zero_in_first_wchar)
-	test	%cl, %cl
-	jnz	L(prolog_find_zero_in_second_wchar)
-	test	$15, %ch
-	jnz	L(prolog_find_zero_in_third_wchar)
-
-	and	$1 << 13 - 1, %rax
-	jz	L(return_null)
-
-	test	$15 << 4, %ah
-	jnz	L(match_fourth_wchar)
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(prolog_find_zero_in_first_wchar):
-	test	$1, %rax
-	jz	L(return_null)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(prolog_find_zero_in_second_wchar):
-	and	$1 << 5 - 1, %rax
-	jz	L(return_null)
-
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(prolog_find_zero_in_third_wchar):
-	and	$1 << 9 - 1, %rax
-	jz	L(return_null)
-
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(match_second_wchar):
-	lea	-12(%rdi), %rax
-	ret
-
-	.p2align 4
-L(match_third_wchar):
-	lea	-8(%rdi), %rax
-	ret
-
-	.p2align 4
-L(match_fourth_wchar):
-	lea	-4(%rdi), %rax
-	ret
-
-	.p2align 4
-L(return_null):
-	xor	%rax, %rax
-	ret
-
-END (wcsrchr)
+#include "strrchr.S"

-----------------------------------------------------------------------


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]