This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: memcpy performance regressions 2.19 -> 2.24(5)


I was also thinking that it might be nice to have a TUNABLE that sets
the implementation of memcpy directly.  It would be easier to do this
if memcpy.S was memcpy.c.  Attached is a patch that does the
conversion but doesn't add the tunables.  How would you feel about
this?  It has no runtime impact, probably increases the size slightly,
and makes the code easier to read / modify.

On Mon, May 22, 2017 at 8:19 PM, Erich Elsen <eriche@google.com> wrote:
> Here is the patch that slightly refactors how init_cacheinfo is called.
>
> On Mon, May 22, 2017 at 7:24 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Mon, May 22, 2017 at 6:23 PM, Erich Elsen <eriche@google.com> wrote:
>>> I definitely think increasing the size in the case of processors with
>>> a large number of cores makes sense.  Hopefully with some testing we
>>> can confirm it is a net win and/or find a more empirical number.
>>>
>>> Thanks for that patch with the tunable support.  I've just put a
>>> similar patch in review for sharing right now.  It adds support in the
>>> case that HAVE_TUNABLES isn't defined like the similar code in arena.c
>>>  and also makes a minor change that turns init_cacheinfo into a
>>> init_cacheinfo_impl (a hidden callable).  init_cacheinfo is now a
>>> constructor that just calls the impl and passes the cpu_features
>>> struct.  This is useful in that it makes the code a bit more modular
>>> (something that we'll need to be able to test this internally).
>>
>> This sounds a good idea.  I'd also like to add tunable support in
>> init_cpu_features to turn on/off CPU features.   non_temporal_threshold
>> will be one of them.
>>
>>
>> --
>> H.J.
From a2957f5a0b21f9588e8756228b11b86f886b0f4c Mon Sep 17 00:00:00 2001
From: Erich Elsen <eriche@google.com>
Date: Tue, 23 May 2017 12:29:24 -0700
Subject: [PATCH] add memcpy.c

---
 sysdeps/x86_64/multiarch/memcpy.c | 70 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 sysdeps/x86_64/multiarch/memcpy.c

diff --git a/sysdeps/x86_64/multiarch/memcpy.c b/sysdeps/x86_64/multiarch/memcpy.c
new file mode 100644
index 0000000000..b0ff8c71fd
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy.c
@@ -0,0 +1,70 @@
+#include "cpu-features.h"
+#include "init-arch.h"
+#include "shlib-compat.h"
+#include <stdlib.h>
+
+typedef void * (*memcpy_fn)(void *, const void *, size_t);
+
+extern void * __memcpy_erms(void *dest, const void *src, size_t n);
+extern void * __memcpy_sse2_unaligned(void *dest, const void *src, size_t n);
+extern void * __memcpy_sse2_unaligned_erms(void *dest, const void *src, size_t n);
+extern void * __memcpy_ssse3(void *dest, const void *src, size_t n);
+extern void * __memcpy_ssse3_back(void *dest, const void *src, size_t n);
+extern void * __memcpy_avx_unaligned(void *dest, const void *src, size_t n);
+extern void * __memcpy_avx_unaligned_erms(void *dest, const void *src, size_t n);
+extern void * __memcpy_avx512_unaligned(void *dest, const void *src, size_t n);
+extern void * __memcpy_avx512_unaligned_erms(void *dest, const void *src, size_t n);
+
+/* Defined in cacheinfo.c */
+extern long int __x86_shared_cache_size attribute_hidden;
+extern long int __x86_shared_cache_size_half attribute_hidden;
+extern long int __x86_data_cache_size attribute_hidden;
+extern long int __x86_data_cache_size_half attribute_hidden;
+extern long int __x86_shared_non_temporal_threshold attribute_hidden;
+
+static void * select_memcpy_impl(void) {
+  const struct cpu_features* cpu_features_struct_p = __get_cpu_features ();
+
+  if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_ERMS)) {
+    return __memcpy_erms;
+  }
+
+  if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX512F_Usable)) {
+    if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_No_VZEROUPPER))
+      return __memcpy_avx512_unaligned_erms;
+    return __memcpy_avx512_unaligned;
+  }
+
+  if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX_Fast_Unaligned_Load)) {
+    if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) {
+      return __memcpy_avx_unaligned_erms;
+
+    }
+    return __memcpy_avx_unaligned;
+  }
+  else {
+    if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Unaligned_Copy)) {
+      if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) {
+        return __memcpy_sse2_unaligned_erms;
+
+      }
+      return __memcpy_sse2_unaligned;
+    }
+    else {
+      if (!CPU_FEATURES_CPU_P(cpu_features_struct_p, SSSE3)) {
+        return __memcpy_sse2_unaligned;
+
+      }
+      if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Copy_Backward)) {
+        return __memcpy_ssse3_back;
+
+      }
+      return __memcpy_ssse3;
+    }
+  }
+}
+
+void *__new_memcpy(void *dest, const void *src, size_t n)
+  __attribute__ ((ifunc ("select_memcpy_impl")));
+
+versioned_symbol(libc, __new_memcpy, memcpy, GLIBC_2_14);
-- 
2.13.0.219.gdb65acc882-goog


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]