This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: RFC: Rewrite x86-64 IFUNC selector in C



On 28/05/2017 13:26, H.J. Lu wrote:
> On Thu, May 25, 2017 at 2:55 PM, Adhemerval Zanella
> <adhemerval.zanella@linaro.org> wrote:
>>
>>
>> On 25/05/2017 18:38, H.J. Lu wrote:
>>> On Thu, May 25, 2017 at 2:25 PM, Erich Elsen <eriche@google.com> wrote:
>>>> Ok, I'll get started then.
>>>>
>>>> Are there any general comments about the attached conversion for
>>>> memcpy?  Just so I don't repeat the same wrong thing many times.
>>>
>>> You missed:
>>>
>>> /* Define multiple versions only for the definition in lib and for
>>>    DSO.  In static binaries we need memcpy before the initialization
>>>    happened.  */
>>> #if defined SHARED && IS_IN (libc)
>>>
>>> +typedef void * (*memcpy_fn)(void *, const void *, size_t);
>>> +
>>> +extern void * __memcpy_erms(void *dest, const void *src, size_t n);
>>> +extern void * __memcpy_sse2_unaligned(void *dest, const void *src, size_t n);
>>> +extern void * __memcpy_sse2_unaligned_erms(void *dest, const void
>>> *src, size_t n);
>>> +extern void * __memcpy_ssse3(void *dest, const void *src, size_t n);
>>> +extern void * __memcpy_ssse3_back(void *dest, const void *src, size_t n);
>>> +extern void * __memcpy_avx_unaligned(void *dest, const void *src, size_t n);
>>> +extern void * __memcpy_avx_unaligned_erms(void *dest, const void
>>> *src, size_t n);
>>> +extern void * __memcpy_avx512_unaligned(void *dest, const void *src, size_t n);
>>> +extern void * __memcpy_avx512_unaligned_erms(void *dest, const void
>>> *src, size_t n);
>>>
>>> Please use something similar to multiarch/strstr.c:
>>>
>>> /* Redefine strstr so that the compiler won't complain about the type
>>>    mismatch with the IFUNC selector in strong_alias, below.  */
>>> #undef  strstr
>>> #define strstr __redirect_strstr
>>> #include <string.h>
>>> #undef  strstr
>>> ...
>>> extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden;
>>>
>>> +/* Defined in cacheinfo.c */
>>> +extern long int __x86_shared_cache_size attribute_hidden;
>>> +extern long int __x86_shared_cache_size_half attribute_hidden;
>>> +extern long int __x86_data_cache_size attribute_hidden;
>>> +extern long int __x86_data_cache_size_half attribute_hidden;
>>> +extern long int __x86_shared_non_temporal_threshold attribute_hidden;
>>
>> It seems it will be used not only for memcpy, so I would suggest to add
>> on a common header on multiarch.
>>
>>>
>>> Remove them.
>>> static void * select_memcpy_impl(void) {
>>> +  const struct cpu_features* cpu_features_struct_p = __get_cpu_features ();
>>> +
>>> +  if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_ERMS)) {
>>> +    return __memcpy_erms;
>>> +  }
>>> +
>>> +  if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX512F_Usable)) {
>>> +    if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_No_VZEROUPPER))
>>> +      return __memcpy_avx512_unaligned_erms;
>>> +    return __memcpy_avx512_unaligned;
>>> +  }
>>> +
>>> +  if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX_Fast_Unaligned_Load)) {
>>> +    if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) {
>>> +      return __memcpy_avx_unaligned_erms;
>>> +
>>> +    }
>>> +    return __memcpy_avx_unaligned;
>>> +  }
>>> +  else {
>>> +    if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Unaligned_Copy)) {
>>> +      if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) {
>>> +        return __memcpy_sse2_unaligned_erms;
>>> +
>>> +      }
>>> +      return __memcpy_sse2_unaligned;
>>> +    }
>>> +    else {
>>> +      if (!CPU_FEATURES_CPU_P(cpu_features_struct_p, SSSE3)) {
>>> +        return __memcpy_sse2_unaligned;
>>> +
>>> +      }
>>> +      if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Copy_Backward)) {
>>> +        return __memcpy_ssse3_back;
>>> +
>>> +      }
>>> +      return __memcpy_ssse3;
>>> +    }
>>> +  }
>>> +}
>>>
>>> Please
>>>
>>> 1. Fix formatting.
>>> 2. Remove unnecessary {}.
>>> 3. Don't use "else".
>>>
>>> +void *__new_memcpy(void *dest, const void *src, size_t n)
>>> +  __attribute__ ((ifunc ("select_memcpy_impl")));
>>>
>>> Use "typeof" here.
>>
>> We have the libc_ifunc{_redirect} to handle the __attribute__ ((ifunc)) support
>> from compiler.  I think you can use:
>>
>> # include <string.h>
>>
>> // extern __typeof (memcpy) __memcpy_<each supported one> attribute_hidden;
>>
>> static void *memcpy_selector (void)
>> {
>>   // fill me.
>> }
>>
>> libc_ifunc_hidden (memcpy, memcpy, memcpy_selector);
>> libc_hidden_def (memcpy)
> 
> Here is my take.  It only covers memcpy and mempcpy.   Please
> extend it to memmove as well as *_chk functions.
> 

I think we can simplify it further and use the already existent ifunc macros on
libc-symbols.h.  Also, for memmove I think we can organize the code better (at
least for ifunc) and build a extra object with a more meaningful name.  I used
your logic for the ifunc selection and extended for memmove as well.

diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 3736f54..b6179aa 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -7,6 +7,7 @@ ifeq ($(subdir),string)
 sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
 		   strcmp-sse2-unaligned strncmp-ssse3 \
 		   memcmp-sse4 memcpy-ssse3 \
+		   mem-impls \
 		   memmove-ssse3 \
 		   memcpy-ssse3-back \
 		   memmove-ssse3-back \
diff --git a/sysdeps/x86_64/multiarch/memmove.S b/sysdeps/x86_64/multiarch/mem-impls.S
similarity index 52%
rename from sysdeps/x86_64/multiarch/memmove.S
rename to sysdeps/x86_64/multiarch/mem-impls.S
index 8c534e8..5e74fa0 100644
--- a/sysdeps/x86_64/multiarch/memmove.S
+++ b/sysdeps/x86_64/multiarch/mem-impls.S
@@ -1,6 +1,5 @@
-/* Multiple versions of memmove
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+/* Multiple versions of memmove, memcpy, and mempcpy.
+   Copyright (C) 2017 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,57 +16,6 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
-   DSO.  */
-#if IS_IN (libc)
-	.text
-ENTRY(__libc_memmove)
-	.type	__libc_memmove, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	lea	__memmove_erms(%rip), %RAX_LP
-	HAS_ARCH_FEATURE (Prefer_ERMS)
-	jnz	2f
-	HAS_ARCH_FEATURE (Prefer_No_AVX512)
-	jnz	1f
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-	jz	1f
-	lea	__memmove_avx512_no_vzeroupper(%rip), %RAX_LP
-	HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
-	jnz	2f
-	lea	__memmove_avx512_unaligned_erms(%rip), %RAX_LP
-	HAS_CPU_FEATURE (ERMS)
-	jnz	2f
-	lea	__memmove_avx512_unaligned(%rip), %RAX_LP
-	ret
-1:	lea	__memmove_avx_unaligned(%rip), %RAX_LP
-	HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-	jz	L(Fast_Unaligned_Load)
-	HAS_CPU_FEATURE (ERMS)
-	jz	2f
-	lea	__memmove_avx_unaligned_erms(%rip), %RAX_LP
-	ret
-L(Fast_Unaligned_Load):
-	lea	__memmove_sse2_unaligned(%rip), %RAX_LP
-	HAS_ARCH_FEATURE (Fast_Unaligned_Copy)
-	jz	L(SSSE3)
-	HAS_CPU_FEATURE (ERMS)
-	jz	2f
-	lea	__memmove_sse2_unaligned_erms(%rip), %RAX_LP
-	ret
-L(SSSE3):
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	lea    __memmove_ssse3_back(%rip), %RAX_LP
-	HAS_ARCH_FEATURE (Fast_Copy_Backward)
-	jnz	2f
-	lea	__memmove_ssse3(%rip), %RAX_LP
-2:	ret
-END(__libc_memmove)
-#endif
-
 #if IS_IN (libc)
 # define MEMMOVE_SYMBOL(p,s)	p##_sse2_##s
 
@@ -82,15 +30,16 @@ libc_hidden_ver (__mempcpy_sse2_unaligned, __mempcpy)
    The speedup we get from using SSE2 instructions is likely eaten away
    by the indirect call in the PLT.  */
 #  define libc_hidden_builtin_def
+# else
+strong_alias (__memmove_sse2_unaligned, memmove)
 # endif
-strong_alias (__libc_memmove, memmove)
 #endif
 
 #if !defined SHARED || !IS_IN (libc)
 weak_alias (__mempcpy, mempcpy)
 #endif
 
-#include "../memmove.S"
+#include <sysdeps/x86_64/memmove.S>
 
 #if defined SHARED && IS_IN (libc)
 # include <shlib-compat.h>
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
deleted file mode 100644
index af27703..0000000
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ /dev/null
@@ -1,75 +0,0 @@
-/* Multiple versions of memcpy
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
-   DSO.  In static binaries we need memcpy before the initialization
-   happened.  */
-#if defined SHARED && IS_IN (libc)
-	.text
-ENTRY(__new_memcpy)
-	.type	__new_memcpy, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	lea	__memcpy_erms(%rip), %RAX_LP
-	HAS_ARCH_FEATURE (Prefer_ERMS)
-	jnz	2f
-	HAS_ARCH_FEATURE (Prefer_No_AVX512)
-	jnz	1f
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-	jz	1f
-	lea	__memcpy_avx512_no_vzeroupper(%rip), %RAX_LP
-	HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
-	jnz	2f
-	lea	__memcpy_avx512_unaligned_erms(%rip), %RAX_LP
-	HAS_CPU_FEATURE (ERMS)
-	jnz	2f
-	lea	__memcpy_avx512_unaligned(%rip), %RAX_LP
-	ret
-1:	lea	__memcpy_avx_unaligned(%rip), %RAX_LP
-	HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-	jz	L(Fast_Unaligned_Load)
-	HAS_CPU_FEATURE (ERMS)
-	jz	2f
-	lea	__memcpy_avx_unaligned_erms(%rip), %RAX_LP
-	ret
-L(Fast_Unaligned_Load):
-	lea	__memcpy_sse2_unaligned(%rip), %RAX_LP
-	HAS_ARCH_FEATURE (Fast_Unaligned_Copy)
-	jz	L(SSSE3)
-	HAS_CPU_FEATURE (ERMS)
-	jz	2f
-	lea	__memcpy_sse2_unaligned_erms(%rip), %RAX_LP
-	ret
-L(SSSE3):
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	lea    __memcpy_ssse3_back(%rip), %RAX_LP
-	HAS_ARCH_FEATURE (Fast_Copy_Backward)
-	jnz	2f
-	lea	__memcpy_ssse3(%rip), %RAX_LP
-2:	ret
-END(__new_memcpy)
-
-# undef memcpy
-# include <shlib-compat.h>
-versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
-#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy.c b/sysdeps/x86_64/multiarch/memcpy.c
new file mode 100644
index 0000000..ad1b31f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy.c
@@ -0,0 +1,35 @@
+/* Multiple version of memcpy.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if defined SHARED && IS_IN (libc)
+
+# define memcpy __redirect_memcpy
+# include <string.h>
+# undef memcpy
+
+# define SYMBOL_NAME memcpy
+# include "memifunc.h"
+
+extern __typeof (__redirect_memcpy) __new_memcpy;
+
+libc_ifunc (__new_memcpy, memcpy_ifunc_selector ());
+
+# include <shlib-compat.h>
+versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
+#endif
diff --git a/sysdeps/x86_64/multiarch/memifunc.h b/sysdeps/x86_64/multiarch/memifunc.h
new file mode 100644
index 0000000..894b4a0
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memifunc.h
@@ -0,0 +1,95 @@
+/* Common definition for memcpy, mempcpy, and memmove implementation.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* memcpy, mempcpy, and memmove share the same logic for ifunc selection.  */
+
+#include <cpu-features.h>
+#include <ldsodefs.h>
+
+#define PASTER1(x,y)        x ## _ ## y
+#define EVALUATOR1(x,y)     PASTER1(x,y)
+#define PASTER2(x,y)       __ ## x ## _ ## y
+#define EVALUATOR2(x,y)    PASTER2(x,y)
+
+/* Basically set '__redirect_<symbol>' to use as type definition,
+   '__<symbol>_<variant>' as the optimized implementation and
+   '<symbol>_ifunc_selector' as the IFUNC selector.  */
+#define REDIRECT_NAME   EVALUATOR1(__redirect, SYMBOL_NAME)
+#define OPTIMIZE(name)  EVALUATOR2(SYMBOL_NAME, name)
+#define IFUNC_NAME	EVALUATOR1(SYMBOL_NAME, ifunc_selector)
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE(erms) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(sse2_unaligned)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(sse2_unaligned_erms)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(ssse3) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(ssse3_back) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(avx_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(avx_unaligned_erms)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_unaligned)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_unaligned_erms)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_no_vzeroupper)
+  attribute_hidden;
+
+static inline void *
+IFUNC_NAME (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS))
+    return OPTIMIZE(erms);
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
+      && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+    {
+      if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+	return OPTIMIZE(avx512_no_vzeroupper);
+
+      if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+	return OPTIMIZE(avx512_unaligned_erms);
+
+      return OPTIMIZE(avx512_unaligned);
+    }
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+    {
+      if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+	return OPTIMIZE(avx_unaligned_erms);
+
+      return OPTIMIZE(avx_unaligned);
+    }
+
+  if (!CPU_FEATURES_CPU_P (cpu_features, SSSE3)
+      || CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Copy))
+    {
+      if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+	return OPTIMIZE(sse2_unaligned_erms);
+
+      return OPTIMIZE(sse2_unaligned);
+    }
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Copy_Backward))
+    return OPTIMIZE(ssse3_back);
+
+  return OPTIMIZE(ssse3);
+}
diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c
new file mode 100644
index 0000000..76372fc
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove.c
@@ -0,0 +1,33 @@
+/* Multiple version of memmmove.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if defined SHARED && IS_IN (libc)
+
+# define memmove __redirect_memmove
+# include <string.h>
+# undef memmove
+
+# define SYMBOL_NAME memmove
+# include "memifunc.h"
+
+extern __typeof (__redirect_memmove) __libc_memmove;
+
+libc_ifunc (__libc_memmove, memmove_ifunc_selector ());
+strong_alias (__libc_memmove, memmove);
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
deleted file mode 100644
index b8b2b28..0000000
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ /dev/null
@@ -1,73 +0,0 @@
-/* Multiple versions of mempcpy
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
-   DSO.  In static binaries we need mempcpy before the initialization
-   happened.  */
-#if defined SHARED && IS_IN (libc)
-	.text
-ENTRY(__mempcpy)
-	.type	__mempcpy, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	lea	__mempcpy_erms(%rip), %RAX_LP
-	HAS_ARCH_FEATURE (Prefer_ERMS)
-	jnz	2f
-	HAS_ARCH_FEATURE (Prefer_No_AVX512)
-	jnz	1f
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-	jz	1f
-	lea	__mempcpy_avx512_no_vzeroupper(%rip), %RAX_LP
-	HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
-	jnz	2f
-	lea	__mempcpy_avx512_unaligned_erms(%rip), %RAX_LP
-	HAS_CPU_FEATURE (ERMS)
-	jnz	2f
-	lea	__mempcpy_avx512_unaligned(%rip), %RAX_LP
-	ret
-1:	lea	__mempcpy_avx_unaligned(%rip), %RAX_LP
-	HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-	jz	L(Fast_Unaligned_Load)
-	HAS_CPU_FEATURE (ERMS)
-	jz	2f
-	lea	__mempcpy_avx_unaligned_erms(%rip), %RAX_LP
-	ret
-L(Fast_Unaligned_Load):
-	lea	__mempcpy_sse2_unaligned(%rip), %RAX_LP
-	HAS_ARCH_FEATURE (Fast_Unaligned_Copy)
-	jz	L(SSSE3)
-	HAS_CPU_FEATURE (ERMS)
-	jz	2f
-	lea	__mempcpy_sse2_unaligned_erms(%rip), %RAX_LP
-	ret
-L(SSSE3):
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	lea    __mempcpy_ssse3_back(%rip), %RAX_LP
-	HAS_ARCH_FEATURE (Fast_Copy_Backward)
-	jnz	2f
-	lea	__mempcpy_ssse3(%rip), %RAX_LP
-2:	ret
-END(__mempcpy)
-
-weak_alias (__mempcpy, mempcpy)
-#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy.c b/sysdeps/x86_64/multiarch/mempcpy.c
new file mode 100644
index 0000000..e59bde2
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy.c
@@ -0,0 +1,34 @@
+/* Multiple version of mempcpy.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if defined SHARED && IS_IN (libc)
+
+# define mempcpy __redirect_mempcpy
+# define __mempcpy __redirect___mempcpy
+# include <string.h>
+# undef mempcpy
+# undef __mempcpy
+
+# define SYMBOL_NAME mempcpy
+# include "memifunc.h"
+
+libc_ifunc_redirected (__redirect_mempcpy, __mempcpy,
+		       mempcpy_ifunc_selector ());
+weak_alias (__mempcpy, mempcpy)
+#endif
-- 
2.7.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]