This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PATCH: Check GLIBC_IFUNC to enable/disable ifunc features


On Tue, Jun 28, 2016 at 7:28 PM, Mike Frysinger <vapier@gentoo.org> wrote:
> should have a comment or two mentioning why you need to code all of
> these things ad-hoc
>
> also needs a manual update
>

I am not sure if it needs manual update.  I consider it more for glibc
developers than for normal end users.

> seems like the feature compare is case-sensitive ?  but some of the
> feature strings are inconsistent case ?  would be nice if they were
> standardized (like all lower case).
> -mike

I'd like to match the ones in cpu-features.h so that one can cut and
paste from it.

Here is the updated patch.  Any more feedbacks?

Thanks.


-- 
H.J.
From 471248bb2d34ed027ed06487210db8dcd93cbe1b Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Mon, 27 Jun 2016 15:13:50 -0700
Subject: [PATCH] Add environment variable, GLIBC_IFUNC, to control IFUNC
 selection

The current IFUNC selection is based on microbenchmarks in glibc.  It
should give the best performance for most workloads.  But other choices
may have better performance for a particular workload or the hardware
is too new to make the best choice.  We provide an environment variable,
GLIBC_IFUNC=xxx=0:yyy=1:zzz=0...., to enable CPU/ARCH feature yyy,
disable CPU/ARCH feature yyy and zzz, where the feature name is
case-sensitive and has to match the ones in cpu-features.h.  It can be
used by glibc developers to override the IFUNC selection to improve
performance for a particular workload or tune for a new processor.  It
isn't intended for normal end users.

	* sysdeps/i386/dl-machine.h (dl_platform_init): Pass the
	array of environment strings to init_cpu_features.
	* sysdeps/x86/libc-start.c (__libc_start_main): Likewise.
	* sysdeps/x86/cpu-features.c (equal): New function.
	(CHECK_GLIBC_IFUNC_CPU_OFF): New macro.
	(CHECK_GLIBC_IFUNC_ARCH_OFF): Likewise.
	(CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH): Likewise.
	(CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH): Likewise.
	(init_cpu_features): Updated to take the array of environment
	strings.  Process GLIBC_IFUNC environment variable.
---
 sysdeps/i386/dl-machine.h   |   3 +-
 sysdeps/x86/cpu-features.c  | 245 +++++++++++++++++++++++++++++++++++++++++++-
 sysdeps/x86/libc-start.c    |   2 +-
 sysdeps/x86_64/dl-machine.h |   3 +-
 4 files changed, 249 insertions(+), 4 deletions(-)

diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
index 4e3968a..7584931 100644
--- a/sysdeps/i386/dl-machine.h
+++ b/sysdeps/i386/dl-machine.h
@@ -240,7 +240,8 @@ dl_platform_init (void)
 #ifdef SHARED
   /* init_cpu_features has been called early from __libc_start_main in
      static executable.  */
-  init_cpu_features (&GLRO(dl_x86_cpu_features));
+  init_cpu_features (&GLRO(dl_x86_cpu_features),
+		     &_dl_argv[_dl_argc + 1]);
 #endif
 }
 
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 9ce4b49..0e5765e 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -91,8 +91,139 @@ get_common_indeces (struct cpu_features *cpu_features,
     }
 }
 
+#ifdef __x86_64__
+typedef long long op_t;
+#else
+typedef int op_t;
+#endif
+
+/* Return true if the first LEN bytes of strings A and B are the same
+   where LEN != 0.  We don't use string/memory functions since they may
+   not be available here and this version is faster for its usage.  */
+
+static bool
+equal (const char *a, const char *b, size_t len)
+{
+  size_t op_len = len % sizeof (op_t);
+  if (op_len)
+    {
+      switch (op_len)
+	{
+	case 1:
+	  if (*(char *) a != *(char *) b)
+	    return false;
+	  break;
+	case 2:
+	  if (*(short *) a != *(short *) b)
+	    return false;
+	  break;
+	case 3:
+	  if (*(short *) a != *(short *) b
+	      || *(char *) (a + 2) != *(char *) (b + 2))
+	    return false;
+	  break;
+#ifdef __x86_64__
+	case 4:
+	  if (*(int *) a != *(int *) b)
+	    return false;
+	  break;
+	default:
+	  if (*(int *) a != *(int *) b
+	      || *(int *) (a + op_len - 4) != *(int *) (b + op_len - 4))
+	    return false;
+	  break;
+#else
+	default:
+	  break;
+#endif
+	}
+      /* Align length to size of op_t.  */
+      len -= op_len;
+      if (len == 0)
+	return true;
+    }
+
+  /* Compare one op_t at a time.  */
+  do
+    {
+      if (*(op_t *) a != *(op_t *) b)
+	return false;
+      len -= sizeof (op_t);
+      if (len == 0)
+	return true;
+      a += sizeof (op_t);
+      b += sizeof (op_t);
+    }
+  while (1);
+}
+
+/* Disable a CPU feature by setting "name=0".  We don't enable a CPU
+   feature which isn't availble.  */
+#define CHECK_GLIBC_IFUNC_CPU_OFF(name)					\
+  if (equal (p, #name "=", sizeof (#name)))				\
+    {									\
+      if (p[sizeof (#name)] == '0')					\
+	cpu_features->cpuid[index_cpu_##name].reg_##name		\
+	  &= ~bit_cpu_##name;						\
+      break;								\
+    }
+
+/* Disable an ARCH feature by setting "name=0".  We don't enable an
+   ARCH feature which isn't availble or has security implication.  */
+#define CHECK_GLIBC_IFUNC_ARCH_OFF(name)				\
+  if (equal (p, #name "=", sizeof (#name)))				\
+    {									\
+      if (p[sizeof (#name)] == '0')					\
+	cpu_features->feature[index_arch_##name]			\
+	  &= ~bit_arch_##name;						\
+      break;								\
+    }
+
+/* Enable/disable an ARCH feature by setting "name=1"/"name=0".  */
+#define CHECK_GLIBC_IFUNC_ARCH_BOTH(name)				\
+  if (equal (p, #name "=", sizeof (#name)))				\
+    {									\
+      if (p[sizeof (#name)] == '0')					\
+	cpu_features->feature[index_arch_##name]			\
+	  &= ~bit_arch_##name;						\
+      else if (p[sizeof (#name)] == '1')				\
+	cpu_features->feature[index_arch_##name]			\
+	  |= bit_arch_##name;						\
+      break;								\
+    }
+
+/* Disable an ARCH feature by setting "name=0".  Enable an ARCH feature
+   by setting "name=1" if the ARCH feature NEED is also enabled.  */
+#define CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH(name, need)		\
+  if (equal (p, #name "=", sizeof (#name)))				\
+    {									\
+      if (p[sizeof (#name)] == '0')					\
+	cpu_features->feature[index_arch_##name]			\
+	  &= ~bit_arch_##name;						\
+      else if (p[sizeof (#name)] == '1'					\
+	       && CPU_FEATURES_ARCH_P (cpu_features, need))		\
+	cpu_features->feature[index_arch_##name]			\
+	  |= bit_arch_##name;						\
+      break;								\
+    }
+
+/* Disable an ARCH feature by setting "name=0".  Enable an ARCH feature
+   by setting "name=1" if the CPU feature NEED is also enabled.  */
+#define CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH(name, need)		\
+  if (equal (p, #name "=", sizeof (#name)))				\
+    {									\
+      if (p[sizeof (#name)] == '0')					\
+	cpu_features->feature[index_arch_##name]			\
+	  &= ~bit_arch_##name;						\
+      else if (p[sizeof (#name)] == '1'					\
+	       && CPU_FEATURES_CPU_P (cpu_features, need))		\
+	cpu_features->feature[index_arch_##name]			\
+	  |= bit_arch_##name;						\
+      break;								\
+    }
+
 static inline void
-init_cpu_features (struct cpu_features *cpu_features)
+init_cpu_features (struct cpu_features *cpu_features, char **env)
 {
   unsigned int ebx, ecx, edx;
   unsigned int family = 0;
@@ -268,4 +399,116 @@ no_cpuid:
   cpu_features->family = family;
   cpu_features->model = model;
   cpu_features->kind = kind;
+
+  if (env == NULL)
+    return;
+
+  /* The current IFUNC selection is based on microbenchmarks in glibc.
+     It should give the best performance for most workloads.  But other
+     choices may have better performance for a particular workload or
+     the hardware is too new to make the best choice.  We provide an
+     environment variable, GLIBC_IFUNC=xxx=0:yyy=1:zzz=0...., to enable
+     CPU/ARCH feature yyy, disable CPU/ARCH feature yyy and zzz, where
+     the feature name is case-sensitive and has to match the ones in
+     cpu-features.h.  It can be used by glibc developers to override
+     the IFUNC selection to improve performance for a particular
+     workload or tune for a new processor.  */
+  while (*env != NULL)
+    {
+      const char *p, *end;
+      size_t len = 13;
+      end = *env;
+      for (p = end; *p != '\0'; p++)
+	if (--len == 0 && equal (end, "GLIBC_IFUNC=", 12))
+	  {
+	    len = strlen (p);
+	    end = p + len;
+	    do
+	      {
+		const char *c;
+		for (c = p; *c != ':'; c++)
+		  if (c >= end)
+		    break;
+		len = c - p;
+		switch (len)
+		  {
+		  default:
+		    break;
+		  case 5:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (AVX);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (CX8);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (FMA);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (HTT);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (RTM);
+		    break;
+		  case 6:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (AVX2);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (CMOV);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (ERMS);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (FMA4);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (SSE2);
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (I586);
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (I686);
+		    break;
+		  case 7:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (SSSE3);
+		    break;
+		  case 8:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (SSE4_1);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (SSE4_2);
+		    break;
+		  case 9:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (AVX512F);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (OSXSAVE);
+		    break;
+		  case 10:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (AVX512DQ);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (POPCOUNT);
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Slow_BSF);
+		    break;
+		  case 12:
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (AVX_Usable);
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (FMA_Usable);
+		    break;
+		  case 13:
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (AVX2_Usable);
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (FMA4_Usable);
+		    CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH (Slow_SSE4_2,
+							  SSE4_2);
+		    break;
+		  case 15:
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (AVX512F_Usable);
+		    CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
+		      (AVX_Fast_Unaligned_Load, AVX_Usable);
+		    break;
+		  case 17:
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (AVX512DQ_Usable);
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Rep_String);
+		    break;
+		  case 20:
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Copy_Backward);
+		    break;
+		  case 21:
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Unaligned_Load);
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Unaligned_Copy);
+		    break;
+		  case 22:
+		    CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
+		      (Prefer_No_VZEROUPPER, AVX_Usable);
+		    break;
+		  case 23:
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (Prefer_MAP_32BIT_EXEC);
+		    break;
+		  case 28:
+		    CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH
+		      (Prefer_PMINUB_for_stringop, SSE2);
+		    break;
+		  }
+		p += len + 1;
+	      }
+	    while (p < end);
+	    return;
+	  }
+      env++;
+    }
 }
diff --git a/sysdeps/x86/libc-start.c b/sysdeps/x86/libc-start.c
index 3b5ea6e..7dec1ca 100644
--- a/sysdeps/x86/libc-start.c
+++ b/sysdeps/x86/libc-start.c
@@ -34,7 +34,7 @@ __libc_start_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
 		   void (*fini) (void),
 		   void (*rtld_fini) (void), void *stack_end)
 {
-  init_cpu_features (&_dl_x86_cpu_features);
+  init_cpu_features (&_dl_x86_cpu_features, &argv[argc + 1]);
   return generic_start_main (main, argc, argv, init, fini, rtld_fini,
 			     stack_end);
 }
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index ed0c1a8..071a2e1 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -227,7 +227,8 @@ dl_platform_init (void)
 #ifdef SHARED
   /* init_cpu_features has been called early from __libc_start_main in
      static executable.  */
-  init_cpu_features (&GLRO(dl_x86_cpu_features));
+  init_cpu_features (&GLRO(dl_x86_cpu_features),
+		     &_dl_argv[_dl_argc + 1]);
 #endif
 }
 
-- 
2.7.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]