This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] [BZ #20071] X86 ld.so uses the incorrect cache sizes


There are private copies of cache info in ld.so, libc.so and libc.a.
They provide faster and consistent access to cache info for string and
memory functions.  This patch adds cache info to _dl_x86_cpu_features
and initializes it early together with other CPU features.  It allows
a processor to override cache info derived from CPUID and initializes
private copies of cache info in ld.so, libc.so and libc.a from
_dl_x86_cpu_features.

Tested on x86 and x86-64.  Any feedbacks, comments?


H.J.
--
	[BZ #20071]
	* sysdeps/x86/cacheinfo.c (__cache_sysconf): Defined only in
	libc.
	(__x86_data_cache_size_half): Initialize to 0.
	(__x86_data_cache_size): Likewise.
	(__x86_raw_data_cache_size_half): Likewise.
	(__x86_raw_data_cache_size): Likewise.
	(__x86_shared_cache_size_half): Likewise.
	(__x86_shared_cache_size): Likewise.
	(__x86_raw_shared_cache_size_half): Likewise.
	(__x86_raw_shared_cache_size): Likewise.
	(init_x86_cacheinfo): New function.
	(__init_cpu_features_cacheinfo): Likewise.
	(init_cacheinfo): Only define in libc.so.  Just call
	init_x86_cacheinfo.
	* sysdeps/x86/cpu-features.c (init_cpu_features): Call
	__init_cpu_features_cacheinfo.
	* sysdeps/x86/cpu-features.h (cache_info): New.
	(cpu_features): Add cache.
---
 sysdeps/x86/cacheinfo.c    | 104 +++++++++++++++++++++++++++++++++------------
 sysdeps/x86/cpu-features.c |   2 +
 sysdeps/x86/cpu-features.h |  26 ++++++++++++
 3 files changed, 106 insertions(+), 26 deletions(-)

diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
index 143b333..d4b29c5 100644
--- a/sysdeps/x86/cacheinfo.c
+++ b/sysdeps/x86/cacheinfo.c
@@ -429,6 +429,7 @@ handle_amd (int name)
 }
 
 
+#if IS_IN (libc)
 /* Get the value of the system variable NAME.  */
 long int
 attribute_hidden
@@ -445,24 +446,25 @@ __cache_sysconf (int name)
   /* CPU not known, we have no information.  */
   return 0;
 }
+#endif
 
 
 /* Data cache size for use in memory and string routines, typically
    L1 size, rounded to multiple of 256 bytes.  */
-long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
-long int __x86_data_cache_size attribute_hidden = 32 * 1024;
+long int __x86_data_cache_size_half attribute_hidden;
+long int __x86_data_cache_size attribute_hidden;
 /* Similar to __x86_data_cache_size_half, but not rounded.  */
-long int __x86_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
+long int __x86_raw_data_cache_size_half attribute_hidden;
 /* Similar to __x86_data_cache_size, but not rounded.  */
-long int __x86_raw_data_cache_size attribute_hidden = 32 * 1024;
+long int __x86_raw_data_cache_size attribute_hidden;
 /* Shared cache size for use in memory and string routines, typically
    L2 or L3 size, rounded to multiple of 256 bytes.  */
-long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
-long int __x86_shared_cache_size attribute_hidden = 1024 * 1024;
+long int __x86_shared_cache_size_half attribute_hidden;
+long int __x86_shared_cache_size attribute_hidden;
 /* Similar to __x86_shared_cache_size_half, but not rounded.  */
-long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
+long int __x86_raw_shared_cache_size_half attribute_hidden;
 /* Similar to __x86_shared_cache_size, but not rounded.  */
-long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
+long int __x86_raw_shared_cache_size attribute_hidden;
 
 /* Threshold to use non temporal store.  */
 long int __x86_shared_non_temporal_threshold attribute_hidden;
@@ -473,9 +475,29 @@ int __x86_prefetchw attribute_hidden;
 #endif
 
 
-static void
-__attribute__((constructor))
-init_cacheinfo (void)
+/* Initialize private copy of cache info in ld.so, libc.so or libc.a
+   from _dl_x86_cpu_features for faster and consistent access.  */
+
+static inline void
+init_x86_cacheinfo (const struct cache_info *cache)
+{
+  __x86_data_cache_size_half = cache->data_size_half;
+  __x86_data_cache_size = cache->data_size;
+  __x86_raw_data_cache_size_half = cache->raw_data_size_half;
+  __x86_raw_data_cache_size = cache->raw_data_size;
+  __x86_shared_cache_size_half = cache->shared_size_half;
+  __x86_shared_cache_size = cache->shared_size;
+  __x86_raw_shared_cache_size_half = cache->raw_shared_size_half;
+  __x86_raw_shared_cache_size = cache->raw_shared_size;
+  __x86_shared_non_temporal_threshold
+    = cache->shared_non_temporal_threshold;
+}
+
+#if IS_IN (rtld) || !defined SHARED
+/* Initialize cache info in _dl_x86_cpu_features in ld.so or libc.a.  */
+
+void
+__init_cpu_features_cacheinfo (struct cache_info *cache)
 {
   /* Find out what brand of processor.  */
   unsigned int eax;
@@ -646,28 +668,58 @@ init_cacheinfo (void)
 #endif
     }
 
-  if (data > 0)
+  /* Default data cache size to 32KB.  */
+  if (data <= 0)
+    data = 32 * 1024;
+
+  if (cache->raw_data_size == 0)
     {
-      __x86_raw_data_cache_size_half = data / 2;
-      __x86_raw_data_cache_size = data;
-      /* Round data cache size to multiple of 256 bytes.  */
-      data = data & ~255L;
-      __x86_data_cache_size_half = data / 2;
-      __x86_data_cache_size = data;
+      cache->raw_data_size_half = data / 2;
+      cache->raw_data_size = data;
     }
+  else
+    data = cache->raw_data_size;
+
+  /* Round data cache size to multiple of 256 bytes.  */
+  data = data & ~255L;
+  cache->data_size_half = data / 2;
+  cache->data_size = data;
+
+  /* Default shared cache size to 1MB.  */
+  if (shared < 0)
+    shared = 1024 * 1024;
 
-  if (shared > 0)
+  if (cache->raw_shared_size == 0)
     {
-      __x86_raw_shared_cache_size_half = shared / 2;
-      __x86_raw_shared_cache_size = shared;
-      /* Round shared cache size to multiple of 256 bytes.  */
-      shared = shared & ~255L;
-      __x86_shared_cache_size_half = shared / 2;
-      __x86_shared_cache_size = shared;
+      cache->raw_shared_size_half = shared / 2;
+      cache->raw_shared_size = shared;
     }
+  else
+    shared = cache->raw_shared_size;
+
+  /* Round shared cache size to multiple of 256 bytes.  */
+  shared = shared & ~255L;
+  cache->shared_size_half = shared / 2;
+  cache->shared_size = shared;
 
   /* The large memcpy micro benchmark in glibc shows that 6 times of
      shared cache size is the approximate value above which non-temporal
      store becomes faster.  */
-  __x86_shared_non_temporal_threshold = __x86_shared_cache_size * 6;
+  if (cache->shared_non_temporal_threshold == 0)
+    cache->shared_non_temporal_threshold = cache->shared_size * 6;
+
+  /* Initialize private copy of cache info in ld.so or libc.a.  */
+  init_x86_cacheinfo (cache);
 }
+#endif
+
+#if IS_IN (libc) && defined SHARED
+/* Initialize private copy of cache info in libc.so.  */
+
+static void
+__attribute__((constructor))
+init_cacheinfo (void)
+{
+  init_x86_cacheinfo (&GLRO(dl_x86_cpu_features).cache);
+}
+#endif
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index a5fa81f..be9af5d 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -263,4 +263,6 @@ no_cpuid:
   cpu_features->family = family;
   cpu_features->model = model;
   cpu_features->kind = kind;
+
+  __init_cpu_features_cacheinfo (&cpu_features->cache);
 }
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 9529d61..04cb9e1 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -164,6 +164,28 @@
 
 #else	/* __ASSEMBLER__ */
 
+struct cache_info
+{
+  /* Data cache size for use in memory and string routines, typically
+     L1 size, rounded to multiple of 256 bytes.  */
+  long int data_size_half;
+  long int data_size;
+  /* Similar to data_size_half, but not rounded.  */
+  long int raw_data_size_half;
+  /* Similar to data_size, but not rounded.  */
+  long int raw_data_size;
+  /* Shared cache size for use in memory and string routines, typically
+     L2 or L3 size, rounded to multiple of 256 bytes.  */
+  long int shared_size_half;
+  long int shared_size;
+  /* Similar to shared_size_half, but not rounded.  */
+  long int raw_shared_size_half;
+  /* Similar to shared_size, but not rounded.  */
+  long int raw_shared_size;
+  /* Threshold to use non temporal store.  */
+  long int shared_non_temporal_threshold;
+};
+
 enum
   {
     COMMON_CPUID_INDEX_1 = 0,
@@ -193,8 +215,12 @@ struct cpu_features
   unsigned int family;
   unsigned int model;
   unsigned int feature[FEATURE_INDEX_MAX];
+  struct cache_info cache;
 };
 
+extern void __init_cpu_features_cacheinfo (struct cache_info *)
+  attribute_hidden;
+
 /* Used from outside of glibc to get access to the CPU features
    structure.  */
 extern const struct cpu_features *__get_cpu_features (void)
-- 
2.5.5


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]