This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: RFC: Rewrite x86-64 IFUNC selector in C
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: Adhemerval Zanella <adhemerval dot zanella at linaro dot org>
- Cc: Erich Elsen <eriche at google dot com>, Siddhesh Poyarekar <siddhesh at gotplt dot org>, "Carlos O'Donell" <carlos at redhat dot com>, GNU C Library <libc-alpha at sourceware dot org>
- Date: Sun, 28 May 2017 09:26:25 -0700
- Subject: Re: RFC: Rewrite x86-64 IFUNC selector in C
- Authentication-results: sourceware.org; auth=none
- References: <CAMe9rOq7fCtNSfhQN=QXGjSRkKNfWwC4c9c_kqb4iFbpmNYBEA@mail.gmail.com> <f07f563b-e74f-e2ec-38f5-5f092f73f490@gotplt.org> <4a16e1e8-9baf-7b75-41b0-e25a127c649a@linaro.org> <CAOVZoAOYJ7zNWRZkgEj2Pq=v=GHs2j4XkuFw8077M3vZnxBy0w@mail.gmail.com> <CAMe9rOrDDH==5E5dmLV2Z=mENNBo_u9VfSZpB=Lp_qUtQ3spvg@mail.gmail.com> <264ee0ca-ee55-297b-ac16-2761c77e0bfc@linaro.org>
On Thu, May 25, 2017 at 2:55 PM, Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
>
>
> On 25/05/2017 18:38, H.J. Lu wrote:
>> On Thu, May 25, 2017 at 2:25 PM, Erich Elsen <eriche@google.com> wrote:
>>> Ok, I'll get started then.
>>>
>>> Are there any general comments about the attached conversion for
>>> memcpy? Just so I don't repeat the same wrong thing many times.
>>
>> You missed:
>>
>> /* Define multiple versions only for the definition in lib and for
>> DSO. In static binaries we need memcpy before the initialization
>> happened. */
>> #if defined SHARED && IS_IN (libc)
>>
>> +typedef void * (*memcpy_fn)(void *, const void *, size_t);
>> +
>> +extern void * __memcpy_erms(void *dest, const void *src, size_t n);
>> +extern void * __memcpy_sse2_unaligned(void *dest, const void *src, size_t n);
>> +extern void * __memcpy_sse2_unaligned_erms(void *dest, const void
>> *src, size_t n);
>> +extern void * __memcpy_ssse3(void *dest, const void *src, size_t n);
>> +extern void * __memcpy_ssse3_back(void *dest, const void *src, size_t n);
>> +extern void * __memcpy_avx_unaligned(void *dest, const void *src, size_t n);
>> +extern void * __memcpy_avx_unaligned_erms(void *dest, const void
>> *src, size_t n);
>> +extern void * __memcpy_avx512_unaligned(void *dest, const void *src, size_t n);
>> +extern void * __memcpy_avx512_unaligned_erms(void *dest, const void
>> *src, size_t n);
>>
>> Please use something similar to multiarch/strstr.c:
>>
>> /* Redefine strstr so that the compiler won't complain about the type
>> mismatch with the IFUNC selector in strong_alias, below. */
>> #undef strstr
>> #define strstr __redirect_strstr
>> #include <string.h>
>> #undef strstr
>> ...
>> extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden;
>>
>> +/* Defined in cacheinfo.c */
>> +extern long int __x86_shared_cache_size attribute_hidden;
>> +extern long int __x86_shared_cache_size_half attribute_hidden;
>> +extern long int __x86_data_cache_size attribute_hidden;
>> +extern long int __x86_data_cache_size_half attribute_hidden;
>> +extern long int __x86_shared_non_temporal_threshold attribute_hidden;
>
> It seems it will be used not only for memcpy, so I would suggest to add
> on a common header on multiarch.
>
>>
>> Remove them.
>> static void * select_memcpy_impl(void) {
>> + const struct cpu_features* cpu_features_struct_p = __get_cpu_features ();
>> +
>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_ERMS)) {
>> + return __memcpy_erms;
>> + }
>> +
>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX512F_Usable)) {
>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_No_VZEROUPPER))
>> + return __memcpy_avx512_unaligned_erms;
>> + return __memcpy_avx512_unaligned;
>> + }
>> +
>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX_Fast_Unaligned_Load)) {
>> + if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) {
>> + return __memcpy_avx_unaligned_erms;
>> +
>> + }
>> + return __memcpy_avx_unaligned;
>> + }
>> + else {
>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Unaligned_Copy)) {
>> + if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) {
>> + return __memcpy_sse2_unaligned_erms;
>> +
>> + }
>> + return __memcpy_sse2_unaligned;
>> + }
>> + else {
>> + if (!CPU_FEATURES_CPU_P(cpu_features_struct_p, SSSE3)) {
>> + return __memcpy_sse2_unaligned;
>> +
>> + }
>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Copy_Backward)) {
>> + return __memcpy_ssse3_back;
>> +
>> + }
>> + return __memcpy_ssse3;
>> + }
>> + }
>> +}
>>
>> Please
>>
>> 1. Fix formatting.
>> 2. Remove unnecessary {}.
>> 3. Don't use "else".
>>
>> +void *__new_memcpy(void *dest, const void *src, size_t n)
>> + __attribute__ ((ifunc ("select_memcpy_impl")));
>>
>> Use "typeof" here.
>
> We have the libc_ifunc{_redirect} to handle the __attribute__ ((ifunc)) support
> from compiler. I think you can use:
>
> # include <string.h>
>
> // extern __typeof (memcpy) __memcpy_<each supported one> attribute_hidden;
>
> static void *memcpy_selector (void)
> {
> // fill me.
> }
>
> libc_ifunc_hidden (memcpy, memcpy, memcpy_selector);
> libc_hidden_def (memcpy)
Here is my take. It only covers memcpy and mempcpy. Please
extend it to memmove as well as *_chk functions.
--
H.J.
From 81b43f3b2e5b9c613f219f6e072b051a83ed4c8a Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Sun, 28 May 2017 09:21:22 -0700
Subject: [PATCH] Add memmove.h
---
sysdeps/x86_64/multiarch/memcpy.S | 75 -------------------------------
sysdeps/x86_64/multiarch/memcpy.c | 40 +++++++++++++++++
sysdeps/x86_64/multiarch/memmove.h | 91 ++++++++++++++++++++++++++++++++++++++
sysdeps/x86_64/multiarch/mempcpy.S | 73 ------------------------------
sysdeps/x86_64/multiarch/mempcpy.c | 39 ++++++++++++++++
5 files changed, 170 insertions(+), 148 deletions(-)
delete mode 100644 sysdeps/x86_64/multiarch/memcpy.S
create mode 100644 sysdeps/x86_64/multiarch/memcpy.c
create mode 100644 sysdeps/x86_64/multiarch/memmove.h
delete mode 100644 sysdeps/x86_64/multiarch/mempcpy.S
create mode 100644 sysdeps/x86_64/multiarch/mempcpy.c
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
deleted file mode 100644
index af27703..0000000
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ /dev/null
@@ -1,75 +0,0 @@
-/* Multiple versions of memcpy
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
- DSO. In static binaries we need memcpy before the initialization
- happened. */
-#if defined SHARED && IS_IN (libc)
- .text
-ENTRY(__new_memcpy)
- .type __new_memcpy, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- lea __memcpy_erms(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Prefer_ERMS)
- jnz 2f
- HAS_ARCH_FEATURE (Prefer_No_AVX512)
- jnz 1f
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jz 1f
- lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
- jnz 2f
- lea __memcpy_avx512_unaligned_erms(%rip), %RAX_LP
- HAS_CPU_FEATURE (ERMS)
- jnz 2f
- lea __memcpy_avx512_unaligned(%rip), %RAX_LP
- ret
-1: lea __memcpy_avx_unaligned(%rip), %RAX_LP
- HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- jz L(Fast_Unaligned_Load)
- HAS_CPU_FEATURE (ERMS)
- jz 2f
- lea __memcpy_avx_unaligned_erms(%rip), %RAX_LP
- ret
-L(Fast_Unaligned_Load):
- lea __memcpy_sse2_unaligned(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Fast_Unaligned_Copy)
- jz L(SSSE3)
- HAS_CPU_FEATURE (ERMS)
- jz 2f
- lea __memcpy_sse2_unaligned_erms(%rip), %RAX_LP
- ret
-L(SSSE3):
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- lea __memcpy_ssse3_back(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Fast_Copy_Backward)
- jnz 2f
- lea __memcpy_ssse3(%rip), %RAX_LP
-2: ret
-END(__new_memcpy)
-
-# undef memcpy
-# include <shlib-compat.h>
-versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
-#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy.c b/sysdeps/x86_64/multiarch/memcpy.c
new file mode 100644
index 0000000..fa0e6a2
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy.c
@@ -0,0 +1,40 @@
+/* Multiple versions of memcpy
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in lib and for
+ DSO. In static binaries we need memcpy before the initialization
+ happened. */
+#if defined SHARED && IS_IN (libc)
+# define IFUNC_SELECTOR(type_name) \
+ __typeof (type_name) __new_memcpy \
+ __attribute__ ((ifunc ("memcpy_ifunc"))); \
+ static inhibit_stack_protector void *memcpy_ifunc (void)
+# define REDIRECT_NAME __redirect_memcpy
+# define OPTIMIZE(name) __memcpy_##name
+
+# define memcpy __redirect_memcpy
+# include <string.h>
+# undef memcpy
+
+# include "memmove.h"
+# include <init-arch.h>
+
+# include <shlib-compat.h>
+versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
+#endif
diff --git a/sysdeps/x86_64/multiarch/memmove.h b/sysdeps/x86_64/multiarch/memmove.h
new file mode 100644
index 0000000..f4d2be5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove.h
@@ -0,0 +1,91 @@
+/* Multiple versions of memmove
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef IFUNC_SELECTOR
+# define IFUNC_SELECTOR(type_name) \
+ __typeof (type_name) __libc_memmove \
+ __attribute__ ((ifunc ("memmove_ifunc"))); \
+ static inhibit_stack_protector void *memmove_ifunc (void)
+# define REDIRECT_NAME __redirect_memmove
+# define OPTIMIZE(name) __memmove_##name
+#endif
+
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3_back)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_no_vzeroupper)
+ attribute_hidden;
+
+IFUNC_SELECTOR (REDIRECT_NAME)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS))
+ return OPTIMIZE (erms);
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
+ && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+ {
+ if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ return OPTIMIZE (avx512_no_vzeroupper);
+
+ if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ return OPTIMIZE (avx512_unaligned_erms);
+
+ return OPTIMIZE (avx512_unaligned);
+ }
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ {
+ if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ return OPTIMIZE (avx_unaligned_erms);
+
+ return OPTIMIZE (avx_unaligned);
+ }
+
+ if (!CPU_FEATURES_CPU_P (cpu_features, SSSE3)
+ || CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Copy))
+ {
+ if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ return OPTIMIZE (sse2_unaligned_erms);
+
+ return OPTIMIZE (sse2_unaligned);
+ }
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Copy_Backward))
+ return OPTIMIZE (ssse3_back);
+
+ return OPTIMIZE (ssse3);
+}
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
deleted file mode 100644
index b8b2b28..0000000
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ /dev/null
@@ -1,73 +0,0 @@
-/* Multiple versions of mempcpy
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
- DSO. In static binaries we need mempcpy before the initialization
- happened. */
-#if defined SHARED && IS_IN (libc)
- .text
-ENTRY(__mempcpy)
- .type __mempcpy, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- lea __mempcpy_erms(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Prefer_ERMS)
- jnz 2f
- HAS_ARCH_FEATURE (Prefer_No_AVX512)
- jnz 1f
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jz 1f
- lea __mempcpy_avx512_no_vzeroupper(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
- jnz 2f
- lea __mempcpy_avx512_unaligned_erms(%rip), %RAX_LP
- HAS_CPU_FEATURE (ERMS)
- jnz 2f
- lea __mempcpy_avx512_unaligned(%rip), %RAX_LP
- ret
-1: lea __mempcpy_avx_unaligned(%rip), %RAX_LP
- HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- jz L(Fast_Unaligned_Load)
- HAS_CPU_FEATURE (ERMS)
- jz 2f
- lea __mempcpy_avx_unaligned_erms(%rip), %RAX_LP
- ret
-L(Fast_Unaligned_Load):
- lea __mempcpy_sse2_unaligned(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Fast_Unaligned_Copy)
- jz L(SSSE3)
- HAS_CPU_FEATURE (ERMS)
- jz 2f
- lea __mempcpy_sse2_unaligned_erms(%rip), %RAX_LP
- ret
-L(SSSE3):
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- lea __mempcpy_ssse3_back(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Fast_Copy_Backward)
- jnz 2f
- lea __mempcpy_ssse3(%rip), %RAX_LP
-2: ret
-END(__mempcpy)
-
-weak_alias (__mempcpy, mempcpy)
-#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy.c b/sysdeps/x86_64/multiarch/mempcpy.c
new file mode 100644
index 0000000..b4669dc
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy.c
@@ -0,0 +1,39 @@
+/* Multiple versions of mempcpy
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in lib and for
+ DSO. In static binaries we need mempcpy before the initialization
+ happened. */
+#if defined SHARED && IS_IN (libc)
+# define IFUNC_SELECTOR(type_name) \
+ __typeof (type_name) __mempcpy \
+ __attribute__ ((ifunc ("mempcpy_ifunc"))); \
+ static inhibit_stack_protector void *mempcpy_ifunc (void)
+# define REDIRECT_NAME __redirect_mempcpy
+# define OPTIMIZE(name) __mempcpy_##name
+
+# define __mempcpy __redirect_mempcpy
+# include <string.h>
+# undef __mempcpy
+
+# include "memmove.h"
+# include <init-arch.h>
+
+weak_alias (__mempcpy, mempcpy)
+#endif
--
2.9.4