This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch master updated. glibc-2.25-437-g8fe5736


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  8fe57365bfb5a417d911ab715a5671b3b1d7b155 (commit)
      from  dc485ceb2ac596d27294cc1942adf3181f15e8bf (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=8fe57365bfb5a417d911ab715a5671b3b1d7b155

commit 8fe57365bfb5a417d911ab715a5671b3b1d7b155
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Jun 9 05:42:16 2017 -0700

    x86-64: Optimize strchr/strchrnul/wcschr with AVX2
    
    Optimize strchr/strchrnul/wcschr with AVX2 to search 32 bytes with vector
    instructions.  It is as fast as SSE2 versions for size <= 16 bytes and up
    to 1X faster for or size > 16 bytes on Haswell.  Select AVX2 version on
    AVX2 machines where vzeroupper is preferred and AVX unaligned load is fast.
    
    NB: It uses TZCNT instead of BSF since TZCNT produces the same result
    as BSF for non-zero input.  TZCNT is faster than BSF and is executed
    as BSF if machine doesn't support TZCNT.
    
    	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
    	strchr-sse2, strchrnul-sse2, strchr-avx2, strchrnul-avx2,
    	wcschr-sse2 and wcschr-avx2.
    	* sysdeps/x86_64/multiarch/ifunc-impl-list.c
    	(__libc_ifunc_impl_list): Add tests for __strchr_avx2,
    	__strchrnul_avx2, __strchrnul_sse2, __wcschr_avx2 and
    	__wcschr_sse2.
    	* sysdeps/x86_64/multiarch/strchr-avx2.S: New file.
    	* sysdeps/x86_64/multiarch/strchr-sse2.S: Likewise.
    	* sysdeps/x86_64/multiarch/strchr.c: Likewise.
    	* sysdeps/x86_64/multiarch/strchrnul-avx2.S: Likewise.
    	* sysdeps/x86_64/multiarch/strchrnul-sse2.S: Likewise.
    	* sysdeps/x86_64/multiarch/strchrnul.c: Likewise.
    	* sysdeps/x86_64/multiarch/wcschr-avx2.S: Likewise.
    	* sysdeps/x86_64/multiarch/wcschr-sse2.S: Likewise.
    	* sysdeps/x86_64/multiarch/wcschr.c: Likewise.
    	* sysdeps/x86_64/multiarch/strchr.S: Removed.

diff --git a/ChangeLog b/ChangeLog
index fc3dff0..e8eb9e7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,26 @@
 2017-06-09  H.J. Lu  <hongjiu.lu@intel.com>
 
 	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
+	strchr-sse2, strchrnul-sse2, strchr-avx2, strchrnul-avx2,
+	wcschr-sse2 and wcschr-avx2.
+	* sysdeps/x86_64/multiarch/ifunc-impl-list.c
+	(__libc_ifunc_impl_list): Add tests for __strchr_avx2,
+	__strchrnul_avx2, __strchrnul_sse2, __wcschr_avx2 and
+	__wcschr_sse2.
+	* sysdeps/x86_64/multiarch/strchr-avx2.S: New file.
+	* sysdeps/x86_64/multiarch/strchr-sse2.S: Likewise.
+	* sysdeps/x86_64/multiarch/strchr.c: Likewise.
+	* sysdeps/x86_64/multiarch/strchrnul-avx2.S: Likewise.
+	* sysdeps/x86_64/multiarch/strchrnul-sse2.S: Likewise.
+	* sysdeps/x86_64/multiarch/strchrnul.c: Likewise.
+	* sysdeps/x86_64/multiarch/wcschr-avx2.S: Likewise.
+	* sysdeps/x86_64/multiarch/wcschr-sse2.S: Likewise.
+	* sysdeps/x86_64/multiarch/wcschr.c: Likewise.
+	* sysdeps/x86_64/multiarch/strchr.S: Removed.
+
+2017-06-09  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
 	strlen-sse2, strnlen-sse2, strlen-avx2, strnlen-avx2,
 	wcslen-sse2, wcslen-avx2 and wcsnlen-avx2.
 	* sysdeps/x86_64/multiarch/ifunc-impl-list.c
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 1846ae1..4523f51 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -13,6 +13,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
 		   memcpy-ssse3-back \
 		   memmove-ssse3-back \
 		   memmove-avx512-no-vzeroupper strcasecmp_l-ssse3 \
+		   strchr-sse2 strchrnul-sse2 strchr-avx2 strchrnul-avx2 \
 		   strlen-sse2 strnlen-sse2 strlen-avx2 strnlen-avx2 \
 		   strncase_l-ssse3 strcat-ssse3 strncat-ssse3\
 		   strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
@@ -37,6 +38,7 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \
 		   wmemcmp-avx2-movbe \
 		   wmemchr-sse2 wmemchr-avx2 \
 		   wcscpy-ssse3 wcscpy-c \
+		   wcschr-sse2 wcschr-avx2 \
 		   wcsnlen-sse4_1 wcsnlen-c \
 		   wcslen-sse2 wcslen-avx2 wcsnlen-avx2
 endif
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 9d499ff..8dda1b0 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -229,11 +229,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/strchr.S.  */
+  /* Support sysdeps/x86_64/multiarch/strchr.c.  */
   IFUNC_IMPL (i, name, strchr,
+	      IFUNC_IMPL_ADD (array, i, strchr,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __strchr_avx2)
 	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf)
 	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2))
 
+  /* Support sysdeps/x86_64/multiarch/strchrnul.c.  */
+  IFUNC_IMPL (i, name, strchrnul,
+	      IFUNC_IMPL_ADD (array, i, strchrnul,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __strchrnul_avx2)
+	      IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2))
+
   /* Support sysdeps/x86_64/multiarch/strcmp.S.  */
   IFUNC_IMPL (i, name, strcmp,
 	      IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2),
@@ -318,6 +328,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2))
 
+  /* Support sysdeps/x86_64/multiarch/wcschr.c.  */
+  IFUNC_IMPL (i, name, wcschr,
+	      IFUNC_IMPL_ADD (array, i, wcschr,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __wcschr_avx2)
+	      IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2))
+
   /* Support sysdeps/x86_64/multiarch/wcscpy.S.  */
   IFUNC_IMPL (i, name, wcscpy,
 	      IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3),
diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S
new file mode 100644
index 0000000..e4292d3
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchr-avx2.S
@@ -0,0 +1,254 @@
+/* strchr/strchrnul optimized with AVX2.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef STRCHR
+#  define STRCHR	__strchr_avx2
+# endif
+
+# ifdef USE_AS_WCSCHR
+#  define VPBROADCAST	vpbroadcastd
+#  define VPCMPEQ	vpcmpeqd
+#  define CHAR_REG	esi
+# else
+#  define VPBROADCAST	vpbroadcastb
+#  define VPCMPEQ	vpcmpeqb
+#  define CHAR_REG	sil
+# endif
+
+# ifndef VZEROUPPER
+#  define VZEROUPPER	vzeroupper
+# endif
+
+# define VEC_SIZE 32
+
+	.section .text.avx,"ax",@progbits
+ENTRY (STRCHR)
+	movl	%edi, %ecx
+	/* Broadcast CHAR to YMM0.  */
+	vmovd	%esi, %xmm0
+	vpxor	%xmm9, %xmm9, %xmm9
+	VPBROADCAST %xmm0, %ymm0
+	/* Check if we may cross page boundary with one vector load.  */
+	andl	$(2 * VEC_SIZE - 1), %ecx
+	cmpl	$VEC_SIZE, %ecx
+	ja	L(cros_page_boundary)
+
+	/* Check the first VEC_SIZE bytes.  Search for both CHAR and the
+	   null byte.  */
+	vmovdqu	(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x0)
+
+	/* Align data for aligned loads in the loop.  */
+	addq	$VEC_SIZE, %rdi
+	andl	$(VEC_SIZE - 1), %ecx
+	andq	$-VEC_SIZE, %rdi
+
+	jmp	L(more_4x_vec)
+
+	.p2align 4
+L(cros_page_boundary):
+	andl	$(VEC_SIZE - 1), %ecx
+	andq	$-VEC_SIZE, %rdi
+	vmovdqu	(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	/* Remove the leading bytes.  */
+	sarl	%cl, %eax
+	testl	%eax, %eax
+	jz	L(aligned_more)
+	/* Found CHAR or the null byte.  */
+	tzcntl	%eax, %eax
+	addq	%rcx, %rax
+# ifdef USE_AS_STRCHRNUL
+	addq	%rdi, %rax
+# else
+	xorl	%edx, %edx
+	leaq	(%rdi, %rax), %rax
+	cmp	(%rax), %CHAR_REG
+	cmovne	%rdx, %rax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(aligned_more):
+	addq	$VEC_SIZE, %rdi
+
+L(more_4x_vec):
+	/* Check the first 4 * VEC_SIZE.  Only one VEC_SIZE at a time
+	   since data is only aligned to VEC_SIZE.  */
+	vmovdqa	(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x0)
+
+	vmovdqa	VEC_SIZE(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x1)
+
+	vmovdqa	(VEC_SIZE * 2)(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x2)
+
+	vmovdqa	(VEC_SIZE * 3)(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x3)
+
+	addq	$(VEC_SIZE * 4), %rdi
+
+	/* Align data to 4 * VEC_SIZE.  */
+	movq	%rdi, %rcx
+	andl	$(4 * VEC_SIZE - 1), %ecx
+	andq	$-(4 * VEC_SIZE), %rdi
+
+	.p2align 4
+L(loop_4x_vec):
+	/* Compare 4 * VEC at a time forward.  */
+	vmovdqa	(%rdi), %ymm5
+	vmovdqa	VEC_SIZE(%rdi), %ymm6
+	vmovdqa	(VEC_SIZE * 2)(%rdi), %ymm7
+	vmovdqa	(VEC_SIZE * 3)(%rdi), %ymm8
+
+	VPCMPEQ %ymm5, %ymm0, %ymm1
+	VPCMPEQ %ymm6, %ymm0, %ymm2
+	VPCMPEQ %ymm7, %ymm0, %ymm3
+	VPCMPEQ %ymm8, %ymm0, %ymm4
+
+	VPCMPEQ %ymm5, %ymm9, %ymm5
+	VPCMPEQ %ymm6, %ymm9, %ymm6
+	VPCMPEQ %ymm7, %ymm9, %ymm7
+	VPCMPEQ %ymm8, %ymm9, %ymm8
+
+	vpor	%ymm1, %ymm5, %ymm1
+	vpor	%ymm2, %ymm6, %ymm2
+	vpor	%ymm3, %ymm7, %ymm3
+	vpor	%ymm4, %ymm8, %ymm4
+
+	vpor	%ymm1, %ymm2, %ymm5
+	vpor	%ymm3, %ymm4, %ymm6
+
+	vpor	%ymm5, %ymm6, %ymm5
+
+	vpmovmskb %ymm5, %eax
+	testl	%eax, %eax
+	jnz	L(4x_vec_end)
+
+	addq	$(VEC_SIZE * 4), %rdi
+
+	jmp	L(loop_4x_vec)
+
+	.p2align 4
+L(first_vec_x0):
+	/* Found CHAR or the null byte.  */
+	tzcntl	%eax, %eax
+# ifdef USE_AS_STRCHRNUL
+	addq	%rdi, %rax
+# else
+	xorl	%edx, %edx
+	leaq	(%rdi, %rax), %rax
+	cmp	(%rax), %CHAR_REG
+	cmovne	%rdx, %rax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x1):
+	tzcntl	%eax, %eax
+# ifdef USE_AS_STRCHRNUL
+	addq	$VEC_SIZE, %rax
+	addq	%rdi, %rax
+# else
+	xorl	%edx, %edx
+	leaq	VEC_SIZE(%rdi, %rax), %rax
+	cmp	(%rax), %CHAR_REG
+	cmovne	%rdx, %rax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x2):
+	tzcntl	%eax, %eax
+# ifdef USE_AS_STRCHRNUL
+	addq	$(VEC_SIZE * 2), %rax
+	addq	%rdi, %rax
+# else
+	xorl	%edx, %edx
+	leaq	(VEC_SIZE * 2)(%rdi, %rax), %rax
+	cmp	(%rax), %CHAR_REG
+	cmovne	%rdx, %rax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(4x_vec_end):
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x0)
+	vpmovmskb %ymm2, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x1)
+	vpmovmskb %ymm3, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x2)
+	vpmovmskb %ymm4, %eax
+	testl	%eax, %eax
+L(first_vec_x3):
+	tzcntl	%eax, %eax
+# ifdef USE_AS_STRCHRNUL
+	addq	$(VEC_SIZE * 3), %rax
+	addq	%rdi, %rax
+# else
+	xorl	%edx, %edx
+	leaq	(VEC_SIZE * 3)(%rdi, %rax), %rax
+	cmp	(%rax), %CHAR_REG
+	cmovne	%rdx, %rax
+# endif
+	VZEROUPPER
+	ret
+
+END (STRCHR)
+#endif
diff --git a/sysdeps/x86_64/multiarch/strchr-sse2.S b/sysdeps/x86_64/multiarch/strchr-sse2.S
new file mode 100644
index 0000000..c4d1c0c
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchr-sse2.S
@@ -0,0 +1,28 @@
+/* strchr optimized with SSE2.
+   Copyright (C) 2009-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define strchr __strchr_sse2
+
+# undef weak_alias
+# define weak_alias(strchr, index)
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strchr)
+#endif
+
+#include "../strchr.S"
diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S
deleted file mode 100644
index c9f54ca..0000000
--- a/sysdeps/x86_64/multiarch/strchr.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Multiple versions of strchr
-   Copyright (C) 2009-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-
-/* Define multiple versions only for the definition in libc.  */
-#if IS_IN (libc)
-	.text
-ENTRY(strchr)
-	.type	strchr, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	__strchr_sse2(%rip), %rax
-2:	HAS_ARCH_FEATURE (Slow_BSF)
-	jz	3f
-	leaq    __strchr_sse2_no_bsf(%rip), %rax
-3:	ret
-END(strchr)
-
-
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __strchr_sse2, @function; \
-	.align 16; \
-	.globl __strchr_sse2; \
-	.hidden __strchr_sse2; \
-	__strchr_sse2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __strchr_sse2, .-__strchr_sse2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strchr calls through a PLT.
-   The speedup we get from using SSE4.2 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_strchr; __GI_strchr = __strchr_sse2
-#endif
-
-#include "../strchr.S"
diff --git a/sysdeps/x86_64/multiarch/strchr.c b/sysdeps/x86_64/multiarch/strchr.c
new file mode 100644
index 0000000..22af16e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchr.c
@@ -0,0 +1,55 @@
+/* Multiple versions of strchr.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2009-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strchr __redirect_strchr
+# include <string.h>
+# undef strchr
+
+# define SYMBOL_NAME strchr
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+    return OPTIMIZE (avx2);
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF))
+    return OPTIMIZE (sse2_no_bsf);
+
+  return OPTIMIZE (sse2);
+}
+
+libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ());
+weak_alias (strchr, index)
+# ifdef SHARED
+__hidden_ver1 (strchr, __GI_strchr, __redirect_strchr)
+  __attribute__((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strchrnul-avx2.S b/sysdeps/x86_64/multiarch/strchrnul-avx2.S
new file mode 100644
index 0000000..fa0cc09
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchrnul-avx2.S
@@ -0,0 +1,3 @@
+#define STRCHR __strchrnul_avx2
+#define USE_AS_STRCHRNUL 1
+#include "strchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/strchrnul-sse2.S b/sysdeps/x86_64/multiarch/strchrnul-sse2.S
new file mode 100644
index 0000000..4d199b3
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchrnul-sse2.S
@@ -0,0 +1,26 @@
+/* strchrnul optimized with SSE2.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define __strchrnul __strchrnul_sse2
+
+# undef weak_alias
+# define weak_alias(__strchrnul, strchrnul)
+#endif
+
+#include "../strchrnul.S"
diff --git a/sysdeps/x86_64/multiarch/strchrnul.c b/sysdeps/x86_64/multiarch/strchrnul.c
new file mode 100644
index 0000000..4a4c55a
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchrnul.c
@@ -0,0 +1,34 @@
+/* Multiple versions of strchrnul.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strchrnul __redirect_strchrnul
+# define __strchrnul __redirect___strchrnul
+# include <string.h>
+# undef __strchrnul
+# undef strchrnul
+
+# define SYMBOL_NAME strchrnul
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_strchrnul, __strchrnul,
+		       IFUNC_SELECTOR ());
+weak_alias (__strchrnul, strchrnul)
+#endif
diff --git a/sysdeps/x86_64/multiarch/wcschr-avx2.S b/sysdeps/x86_64/multiarch/wcschr-avx2.S
new file mode 100644
index 0000000..67726b6
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcschr-avx2.S
@@ -0,0 +1,3 @@
+#define STRCHR __wcschr_avx2
+#define USE_AS_WCSCHR 1
+#include "strchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wcschr-sse2.S b/sysdeps/x86_64/multiarch/wcschr-sse2.S
new file mode 100644
index 0000000..9a1b45a
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcschr-sse2.S
@@ -0,0 +1,30 @@
+/* wcschr optimized with SSE2.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define __wcschr __wcschr_sse2
+
+# undef weak_alias
+# define weak_alias(__wcschr, wcschr)
+# undef libc_hidden_def
+# define libc_hidden_def(__wcschr)
+# undef libc_hidden_weak
+# define libc_hidden_weak(wcschr)
+#endif
+
+#include "../wcschr.S"
diff --git a/sysdeps/x86_64/multiarch/wcschr.c b/sysdeps/x86_64/multiarch/wcschr.c
new file mode 100644
index 0000000..70d3692
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcschr.c
@@ -0,0 +1,39 @@
+/* Multiple versions of wcschr.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define wcschr __redirect_wcschr
+# define __wcschr __redirect___wcschr
+# include <wchar.h>
+# undef wcschr
+# undef __wcschr
+
+# define SYMBOL_NAME wcschr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_wcschr, __wcschr, IFUNC_SELECTOR ());
+weak_alias (__wcschr, wcschr);
+# ifdef SHARED
+__hidden_ver1 (__wcschr, __GI___wcschr, __redirect___wcschr)
+  __attribute__((visibility ("hidden")));
+__hidden_ver1 (wcschr, __GI_wcschr, __redirect_wcschr)
+  __attribute__((weak, visibility ("hidden")));
+# endif
+#endif

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                  |   20 +++
 sysdeps/x86_64/multiarch/Makefile          |    2 +
 sysdeps/x86_64/multiarch/ifunc-impl-list.c |   19 ++-
 sysdeps/x86_64/multiarch/strchr-avx2.S     |  254 ++++++++++++++++++++++++++++
 sysdeps/x86_64/multiarch/strchr-sse2.S     |   28 +++
 sysdeps/x86_64/multiarch/strchr.S          |   57 ------
 sysdeps/x86_64/multiarch/strchr.c          |   55 ++++++
 sysdeps/x86_64/multiarch/strchrnul-avx2.S  |    3 +
 sysdeps/x86_64/multiarch/strchrnul-sse2.S  |   26 +++
 sysdeps/x86_64/multiarch/strchrnul.c       |   34 ++++
 sysdeps/x86_64/multiarch/wcschr-avx2.S     |    3 +
 sysdeps/x86_64/multiarch/wcschr-sse2.S     |   30 ++++
 sysdeps/x86_64/multiarch/wcschr.c          |   39 +++++
 13 files changed, 512 insertions(+), 58 deletions(-)
 create mode 100644 sysdeps/x86_64/multiarch/strchr-avx2.S
 create mode 100644 sysdeps/x86_64/multiarch/strchr-sse2.S
 delete mode 100644 sysdeps/x86_64/multiarch/strchr.S
 create mode 100644 sysdeps/x86_64/multiarch/strchr.c
 create mode 100644 sysdeps/x86_64/multiarch/strchrnul-avx2.S
 create mode 100644 sysdeps/x86_64/multiarch/strchrnul-sse2.S
 create mode 100644 sysdeps/x86_64/multiarch/strchrnul.c
 create mode 100644 sysdeps/x86_64/multiarch/wcschr-avx2.S
 create mode 100644 sysdeps/x86_64/multiarch/wcschr-sse2.S
 create mode 100644 sysdeps/x86_64/multiarch/wcschr.c


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]