This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Need a way to test ifunc functions


  Hi!

On Wed, Nov 10, 2010 at 09:29:10AM -0800, H.J. Lu wrote:
> With IFUNC functions, glibc testsuite can only test one implementation.
> We need a way to test SSE2/SSSE3/SSE4 implementations on SSE4
> machines.

  For similar purposes, I have written the patch below few weeks ago
(maybe not directly applicable to current Git anymore). I chose the
approach to allow function-specific override of ifunc cpuid resolution
in order to be able to also test performance impact for real-world
applications.

  If there is some interest in having this upstream, it would be easy to
modify this to enable this at compile time optionally.


--8<--

This table describes environment switches used to toggle various
functions.  The switches may be of several types:

	* Boolean - value "0" for false/disabled, non-zero for true

	* CPUID - may be used to override cpuid(1) registers by given
	  values.  For nice description of these registers, see e.g.

		http://www.flounder.com/cpuid_explorer2.htm#CPUID%281%29

	  Of particular interest are these bits:

	        (DX) SSE2       (1 << 26)
		(CX) SSSE3      (1 << 9)
		(CX) SSE4_1     (1 << 19)
		(CX) SSE4_2     (1 << 20)

	  Therefore, e.g. _IFUNC_CPUID_CX=0x100200 will enable SSSE3
	  and SSE4.1, but disable SSE4.2. SSE2 bit value (in the DX
	  register) will be left as-is (set based on current CPU).


Note that the patch currently modifies only x86_64 functions to take
heed of the function-specific switches.

The generic environment variables are as follows:

_IFUNC_CPUID_CX		CPUID	See above. Impacts all functions.
_IFUNC_CPUID_DX		CPUID	See above. Impacts all functions.
_IFUNC_FASTREP		Boolean	Toggles Fast_Rep_String. Assume
				rep-instructions are fast, tuned for
				i3/i5/i7. Impacts i686 version of
				bcopy(), bzero(), memcpy(), memmove(),
				mempcpy(), memset().
_IFUNC_FASTCPBACK	Boolean	Toggles Fast_Copy_Backward. Assume
				copy backwards is fast, tuned for
				i3/i5/i7. Impacts x86_64 version of
				memcpy() and mempcpy().
_IFUNC_SLOWBSF		Boolean	Toggles Slow_BSF. (Instruction Bit
				Scan Forward.) Tuned for Atom. Impacts
				x86_64 and i686 version of strlen().

Furthermore, functions-pecific environment variables are available.
All the variables before can be used, just with the function-specific
prefix listed below. E.g., you can use _IFUNC_STRCPY_CPUID_DX to change
the cpuid setup of DX just for the strcpy family of functions.

_IFUNC_STRCPY_*		strcpy(), strncpy(), stpcpy(), stpncpy(),
			memcpy(), mempcpy(), memmove()
_IFUNC_STRCMP_*		strcmp(), strncmp(), memcmp(), strcasecmp_l(),
			strncasecmp_l()
_IFUNC_STRSPN_*		strspn(), strcspn(), strpbrk()
_IFUNC_STRCHR_*		strchr(), strrchr(), rawmemchr()
_IFUNC_STRLEN_*		strlen()
_IFUNC_STRSTR_*		strstr(), strcasestr(), strcmp(), strncmp(),
			memcmp()



diff --git a/sysdeps/unix/sysv/linux/init-first.c b/sysdeps/unix/sysv/linux/init-first.c
index 7b2333d..6b8f3ff 100644
--- a/sysdeps/unix/sysv/linux/init-first.c
+++ b/sysdeps/unix/sysv/linux/init-first.c
@@ -28,6 +28,7 @@
 #include <libc-internal.h>
 
 #include <ldsodefs.h>
+#include "multiarch/init-arch.h"
 
 /* Set nonzero if we have to be prepared for more then one libc being
    used in the process.  Safe assumption if initializer never runs.  */
@@ -74,6 +75,9 @@ _init (int argc, char **argv, char **envp)
   __libc_argv = argv;
   __environ = envp;
 
+  /* After setting up __environ, force __cpu_features reset. */
+  __cpu_features.kind = arch_kind_unknown;
+
 #ifndef SHARED
   __libc_init_secure ();
 
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 786466d..f49549f 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -19,11 +19,19 @@
    02111-1307 USA.  */
 
 #include <atomic.h>
+#include <stdlib.h>
 #include <cpuid.h>
+#include <unistd.h>
 #include "init-arch.h"
 
 
 struct cpu_features __cpu_features attribute_hidden;
+struct cpu_features __cpu_features_strcpy attribute_hidden;
+struct cpu_features __cpu_features_strstr attribute_hidden;
+struct cpu_features __cpu_features_strchr attribute_hidden;
+struct cpu_features __cpu_features_strlen attribute_hidden;
+struct cpu_features __cpu_features_strspn attribute_hidden;
+struct cpu_features __cpu_features_strcmp attribute_hidden;
 
 
 static void
@@ -39,6 +47,25 @@ get_common_indeces (unsigned int *family, unsigned int *model)
   *model = (eax >> 4) & 0x0f;
 }
 
+char *
+bare_getenv(const char *name)
+{
+  char **ep;
+  if (!__environ) return NULL;
+  for (ep = __environ; *ep != NULL; ++ep)
+    {
+      const char *np = name; char *npe = *ep;
+      for (; *np && *npe && *npe != '='; np++, npe++)
+	if (*np != *npe)
+	  goto next;
+      if (*np || *npe != '=')
+	goto next;
+      return npe + 1;
+next:;
+    }
+  return NULL;
+}
+
 
 void
 __init_cpu_features (void)
@@ -109,6 +136,41 @@ __init_cpu_features (void)
   __cpu_features.model = model;
   atomic_write_barrier ();
   __cpu_features.kind = kind;
+
+  /* _Now_, we can safely call getenv(). */
+  const char *env;
+  if ((env = bare_getenv("_IFUNC_CPUID_CX")))
+    __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx = strtol(env, NULL, 16);
+  if ((env = bare_getenv("_IFUNC_CPUID_DX")))
+    __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx = strtol(env, NULL, 16);
+#define feattog(FEAT, ID, NAME) \
+  if ((env = bare_getenv("_IFUNC_" # NAME))) \
+    { \
+      if (*env == '0') \
+	FEAT.feature[index_##ID] &= ~bit_##ID; \
+      else \
+	FEAT.feature[index_##ID] |= bit_##ID; \
+    }
+  feattog(__cpu_features, Fast_Rep_String, FASTREP);
+  feattog(__cpu_features, Fast_Copy_Backward, FASTCPBACK);
+  feattog(__cpu_features, Slow_BSF, SLOWBSF);
+
+#define featfor(FEAT, NAME) \
+  memcpy(&FEAT, &__cpu_features, sizeof(FEAT)); \
+  if ((env = bare_getenv("_IFUNC_"#NAME"_CPUID_CX"))) \
+    FEAT.cpuid[COMMON_CPUID_INDEX_1].ecx = strtol(env, NULL, 16); \
+  if ((env = bare_getenv("_IFUNC_"#NAME"_CPUID_DX"))) \
+    FEAT.cpuid[COMMON_CPUID_INDEX_1].edx = strtol(env, NULL, 16); \
+  feattog(FEAT, Fast_Rep_String, FASTREP); \
+  feattog(FEAT, Fast_Copy_Backward, FASTCPBACK); \
+  feattog(FEAT, Slow_BSF, SLOWBSF);
+
+  featfor(__cpu_features_strcpy, STRCPY);
+  featfor(__cpu_features_strstr, STRSTR);
+  featfor(__cpu_features_strchr, STRCHR);
+  featfor(__cpu_features_strspn, STRSPN);
+  featfor(__cpu_features_strcmp, STRCMP);
+  featfor(__cpu_features_strlen, STRLEN);
 }
 
 #undef __get_cpu_features
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 783b020..b3ef9da 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -77,6 +77,14 @@ extern struct cpu_features
   unsigned int model;
   unsigned int feature[FEATURE_INDEX_MAX];
 } __cpu_features attribute_hidden;
+#define __cpu_features_common __cpu_features
+
+extern struct cpu_features __cpu_features_strcpy attribute_hidden;
+extern struct cpu_features __cpu_features_strstr attribute_hidden;
+extern struct cpu_features __cpu_features_strchr attribute_hidden;
+extern struct cpu_features __cpu_features_strlen attribute_hidden;
+extern struct cpu_features __cpu_features_strspn attribute_hidden;
+extern struct cpu_features __cpu_features_strcmp attribute_hidden;
 
 
 extern void __init_cpu_features (void) attribute_hidden;
@@ -91,35 +99,37 @@ extern const struct cpu_features *__get_cpu_features (void)
      __attribute__ ((const));
 
 # ifndef NOT_IN_libc
-#  define __get_cpu_features()	(&__cpu_features)
+#  define __get_cpu_features(family)	(&__cpu_features_ ## family)
+# else
+#  define __get_cpu_features(family)	__get_cpu_features()
 # endif
 
-# define HAS_CPU_FEATURE(idx, reg, bit) \
-  ((__get_cpu_features ()->cpuid[idx].reg & (1 << (bit))) != 0)
+# define HAS_CPU_FEATURE(family, idx, reg, bit) \
+  ((__get_cpu_features(family)->cpuid[idx].reg & (1 << (bit))) != 0)
 
 /* Following are the feature tests used throughout libc.  */
 
-# define HAS_SSE2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, 26)
-# define HAS_POPCOUNT	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 23)
-# define HAS_SSSE3	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 9)
-# define HAS_SSE4_1	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 19)
-# define HAS_SSE4_2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 20)
-# define HAS_FMA	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 12)
+# define HAS_SSE2(family)	HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, edx, 26)
+# define HAS_POPCOUNT(family)	HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, ecx, 23)
+# define HAS_SSSE3(family)	HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, ecx, 9)
+# define HAS_SSE4_1(family)	HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, ecx, 19)
+# define HAS_SSE4_2(family)	HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, ecx, 20)
+# define HAS_FMA(family)	HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, ecx, 12)
 
 # define index_Fast_Rep_String		FEATURE_INDEX_1
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1
 # define index_Slow_BSF			FEATURE_INDEX_1
 
-#define HAS_ARCH_FEATURE(idx, bit) \
-  ((__get_cpu_features ()->feature[idx] & (bit)) != 0)
+#define HAS_ARCH_FEATURE(family, idx, bit) \
+  ((__get_cpu_features (family)->feature[idx] & (bit)) != 0)
 
-#define HAS_FAST_REP_STRING \
-  HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String)
+#define HAS_FAST_REP_STRING(family) \
+  HAS_ARCH_FEATURE (family, index_Fast_Rep_String, bit_Fast_Rep_String)
 
-#define HAS_FAST_COPY_BACKWARD \
-  HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward)
+#define HAS_FAST_COPY_BACKWARD(family) \
+  HAS_ARCH_FEATURE (family, index_Fast_Copy_Backward, bit_Fast_Copy_Backward)
 
-#define HAS_SLOW_BSF \
-  HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF)
+#define HAS_SLOW_BSF(family) \
+  HAS_ARCH_FEATURE (family, index_Slow_BSF, bit_Slow_BSF)
 
 #endif	/* __ASSEMBLER__ */
diff --git a/sysdeps/x86_64/multiarch/memcmp.S b/sysdeps/x86_64/multiarch/memcmp.S
index 301ab28..679ae7a 100644
--- a/sysdeps/x86_64/multiarch/memcmp.S
+++ b/sysdeps/x86_64/multiarch/memcmp.S
@@ -26,11 +26,11 @@
 	.text
 ENTRY(memcmp)
 	.type	memcmp, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features(%rip)
+	cmpl	$0, KIND_OFFSET+__cpu_features_strcmp(%rip)
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	__memcmp_sse2(%rip), %rax
-	testl	$bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+	testl	$bit_SSE4_1, __cpu_features_strcmp+CPUID_OFFSET+index_SSE4_1(%rip)
 	jz	2f
 	leaq	__memcmp_sse4_1(%rip), %rax
 2:	ret
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index 8e9fb19..9b15c53 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -28,14 +28,14 @@
 	.text
 ENTRY(memcpy)
 	.type	memcpy, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features(%rip)
+	cmpl	$0, KIND_OFFSET+__cpu_features_strcpy(%rip)
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	__memcpy_sse2(%rip), %rax
-	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+	testl	$bit_SSSE3, __cpu_features_strcpy+CPUID_OFFSET+index_SSSE3(%rip)
 	jz	2f
 	leaq	__memcpy_ssse3(%rip), %rax
-	testl	$bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
+	testl	$bit_Fast_Copy_Backward, __cpu_features_strcpy+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
 	jz	2f
 	leaq	__memcpy_ssse3_back(%rip), %rax
 2:	ret
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S
index 948f61c..797c44d 100644
--- a/sysdeps/x86_64/multiarch/memcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/memcpy_chk.S
@@ -29,14 +29,14 @@
 	.text
 ENTRY(__memcpy_chk)
 	.type	__memcpy_chk, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features(%rip)
+	cmpl	$0, KIND_OFFSET+__cpu_features_strcpy(%rip)
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	__memcpy_chk_sse2(%rip), %rax
-	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+	testl	$bit_SSSE3, __cpu_features_strcpy+CPUID_OFFSET+index_SSSE3(%rip)
 	jz	2f
 	leaq	__memcpy_chk_ssse3(%rip), %rax
-	testl	$bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
+	testl	$bit_Fast_Copy_Backward, __cpu_features_strcpy+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
 	jz	2f
 	leaq	__memcpy_chk_ssse3_back(%rip), %rax
 2:	ret
diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c
index 3798627..0b383f0 100644
--- a/sysdeps/x86_64/multiarch/memmove.c
+++ b/sysdeps/x86_64/multiarch/memmove.c
@@ -38,8 +38,8 @@ extern __typeof (memmove) __memmove_ssse3_back attribute_hidden;
 
 #ifndef NOT_IN_libc
 libc_ifunc (memmove,
-	    HAS_SSSE3
-	    ? (HAS_FAST_COPY_BACKWARD
+	    HAS_SSSE3(strcpy)
+	    ? (HAS_FAST_COPY_BACKWARD(strcpy)
 	       ? __memmove_ssse3_back : __memmove_ssse3)
 	    : __memmove_sse2);
 #endif
diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c
index 962501d..05939c8 100644
--- a/sysdeps/x86_64/multiarch/memmove_chk.c
+++ b/sysdeps/x86_64/multiarch/memmove_chk.c
@@ -29,7 +29,7 @@ extern __typeof (__memmove_chk) __memmove_chk_ssse3_back attribute_hidden;
 #include "debug/memmove_chk.c"
 
 libc_ifunc (__memmove_chk,
-	    HAS_SSSE3
-	    ? (HAS_FAST_COPY_BACKWARD
+	    HAS_SSSE3(strcpy)
+	    ? (HAS_FAST_COPY_BACKWARD(strcpy)
 	       ? __memmove_chk_ssse3_back : __memmove_chk_ssse3)
 	    : __memmove_chk_sse2);
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index e8152d6..ce70cfa 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -27,14 +27,14 @@
 #if defined SHARED && !defined NOT_IN_libc
 ENTRY(__mempcpy)
 	.type	__mempcpy, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features(%rip)
+	cmpl	$0, KIND_OFFSET+__cpu_features_strcpy(%rip)
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	__mempcpy_sse2(%rip), %rax
-	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+	testl	$bit_SSSE3, __cpu_features_strcpy+CPUID_OFFSET+index_SSSE3(%rip)
 	jz	2f
 	leaq	__mempcpy_ssse3(%rip), %rax
-	testl	$bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
+	testl	$bit_Fast_Copy_Backward, __cpu_features_strcpy+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
 	jz	2f
 	leaq	__mempcpy_ssse3_back(%rip), %rax
 2:	ret
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S
index 024c775..cf7df6e 100644
--- a/sysdeps/x86_64/multiarch/mempcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S
@@ -29,14 +29,14 @@
 	.text
 ENTRY(__mempcpy_chk)
 	.type	__mempcpy_chk, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features(%rip)
+	cmpl	$0, KIND_OFFSET+__cpu_features_strcpy(%rip)
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	__mempcpy_chk_sse2(%rip), %rax
-	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+	testl	$bit_SSSE3, __cpu_features_strcpy+CPUID_OFFSET+index_SSSE3(%rip)
 	jz	2f
 	leaq	__mempcpy_chk_ssse3(%rip), %rax
-	testl	$bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
+	testl	$bit_Fast_Copy_Backward, __cpu_features_strcpy+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
 	jz	2f
 	leaq	__mempcpy_chk_ssse3_back(%rip), %rax
 2:	ret
diff --git a/sysdeps/x86_64/multiarch/rawmemchr.S b/sysdeps/x86_64/multiarch/rawmemchr.S
index 2a8a690..c866088 100644
--- a/sysdeps/x86_64/multiarch/rawmemchr.S
+++ b/sysdeps/x86_64/multiarch/rawmemchr.S
@@ -26,11 +26,11 @@
 	.text
 ENTRY(rawmemchr)
 	.type	rawmemchr, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	cmpl	$0, __cpu_features_strchr+KIND_OFFSET(%rip)
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	__rawmemchr_sse2(%rip), %rax
-	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+	testl	$bit_SSE4_2, __cpu_features_strchr+CPUID_OFFSET+index_SSE4_2(%rip)
 	jz	2f
 	leaq	__rawmemchr_sse42(%rip), %rax
 2:	ret
diff --git a/sysdeps/x86_64/multiarch/s_fma.c b/sysdeps/x86_64/multiarch/s_fma.c
index cfecf9b..39e940e 100644
--- a/sysdeps/x86_64/multiarch/s_fma.c
+++ b/sysdeps/x86_64/multiarch/s_fma.c
@@ -34,7 +34,7 @@ __fma_fma (double x, double y, double z)
   return x;
 }
 
-libm_ifunc (__fma, HAS_FMA ? __fma_fma : __fma_sse2);
+libm_ifunc (__fma, HAS_FMA(common) ? __fma_fma : __fma_sse2);
 weak_alias (__fma, fma)
 
 # define __fma __fma_sse2
diff --git a/sysdeps/x86_64/multiarch/s_fmaf.c b/sysdeps/x86_64/multiarch/s_fmaf.c
index de1c4b6..a70836f 100644
--- a/sysdeps/x86_64/multiarch/s_fmaf.c
+++ b/sysdeps/x86_64/multiarch/s_fmaf.c
@@ -33,7 +33,7 @@ __fmaf_fma (float x, float y, float z)
   return x;
 }
 
-libm_ifunc (__fmaf, HAS_FMA ? __fmaf_fma : __fmaf_sse2);
+libm_ifunc (__fmaf, HAS_FMA(common) ? __fmaf_fma : __fmaf_sse2);
 weak_alias (__fmaf, fmaf)
 
 # define __fmaf __fmaf_sse2
diff --git a/sysdeps/x86_64/multiarch/sched_cpucount.c b/sysdeps/x86_64/multiarch/sched_cpucount.c
index fde6dcc..bc5335f 100644
--- a/sysdeps/x86_64/multiarch/sched_cpucount.c
+++ b/sysdeps/x86_64/multiarch/sched_cpucount.c
@@ -34,4 +34,4 @@
 #undef __sched_cpucount
 
 libc_ifunc (__sched_cpucount,
-	    HAS_POPCOUNT ? popcount_cpucount : generic_cpucount);
+	    HAS_POPCOUNT(common) ? popcount_cpucount : generic_cpucount);
diff --git a/sysdeps/x86_64/multiarch/strcasestr-c.c b/sysdeps/x86_64/multiarch/strcasestr-c.c
index 551492d..577f876 100644
--- a/sysdeps/x86_64/multiarch/strcasestr-c.c
+++ b/sysdeps/x86_64/multiarch/strcasestr-c.c
@@ -9,7 +9,7 @@ extern __typeof (__strcasestr_sse2) __strcasestr_sse2 attribute_hidden;
 
 #if 1
 libc_ifunc (__strcasestr,
-	    HAS_SSE4_2 ? __strcasestr_sse42 : __strcasestr_sse2);
+	    HAS_SSE4_2(strstr) ? __strcasestr_sse42 : __strcasestr_sse2);
 #else
 libc_ifunc (__strcasestr,
 	    0 ? __strcasestr_sse42 : __strcasestr_sse2);
diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S
index 27eead9..f2f8db8 100644
--- a/sysdeps/x86_64/multiarch/strchr.S
+++ b/sysdeps/x86_64/multiarch/strchr.S
@@ -26,11 +26,11 @@
 	.text
 ENTRY(strchr)
 	.type	strchr, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	cmpl	$0, __cpu_features_strchr+KIND_OFFSET(%rip)
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	__strchr_sse2(%rip), %rax
-	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+	testl	$bit_SSE4_2, __cpu_features_strchr+CPUID_OFFSET+index_SSE4_2(%rip)
 	jz	2f
 	leaq	__strchr_sse42(%rip), %rax
 2:	ret
diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S
index 54e5bbc..661cf76 100644
--- a/sysdeps/x86_64/multiarch/strcmp.S
+++ b/sysdeps/x86_64/multiarch/strcmp.S
@@ -86,15 +86,15 @@
 	.text
 ENTRY(STRCMP)
 	.type	STRCMP, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	cmpl	$0, __cpu_features_strcmp+KIND_OFFSET(%rip)
 	jne	1f
 	call	__init_cpu_features
 1:
 	leaq	STRCMP_SSE42(%rip), %rax
-	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+	testl	$bit_SSE4_2, __cpu_features_strcmp+CPUID_OFFSET+index_SSE4_2(%rip)
 	jnz	2f
 	leaq	STRCMP_SSSE3(%rip), %rax
-	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+	testl	$bit_SSSE3, __cpu_features_strcmp+CPUID_OFFSET+index_SSSE3(%rip)
 	jnz	2f
 	leaq	STRCMP_SSE2(%rip), %rax
 2:	ret
@@ -103,15 +103,15 @@ END(STRCMP)
 # ifdef USE_AS_STRCASECMP_L
 ENTRY(__strcasecmp)
 	.type	__strcasecmp, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	cmpl	$0, __cpu_features_strcmp+KIND_OFFSET(%rip)
 	jne	1f
 	call	__init_cpu_features
 1:
 	leaq	__strcasecmp_sse42(%rip), %rax
-	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+	testl	$bit_SSE4_2, __cpu_features_strcmp+CPUID_OFFSET+index_SSE4_2(%rip)
 	jnz	2f
 	leaq	__strcasecmp_ssse3(%rip), %rax
-	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+	testl	$bit_SSSE3, __cpu_features_strcmp+CPUID_OFFSET+index_SSSE3(%rip)
 	jnz	2f
 	leaq	__strcasecmp_sse2(%rip), %rax
 2:	ret
@@ -121,15 +121,15 @@ weak_alias (__strcasecmp, strcasecmp)
 # ifdef USE_AS_STRNCASECMP_L
 ENTRY(__strncasecmp)
 	.type	__strncasecmp, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	cmpl	$0, __cpu_features_strcmp+KIND_OFFSET(%rip)
 	jne	1f
 	call	__init_cpu_features
 1:
 	leaq	__strncasecmp_sse42(%rip), %rax
-	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+	testl	$bit_SSE4_2, __cpu_features_strcmp+CPUID_OFFSET+index_SSE4_2(%rip)
 	jnz	2f
 	leaq	__strncasecmp_ssse3(%rip), %rax
-	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+	testl	$bit_SSSE3, __cpu_features_strcmp+CPUID_OFFSET+index_SSSE3(%rip)
 	jnz	2f
 	leaq	__strncasecmp_sse2(%rip), %rax
 2:	ret
diff --git a/sysdeps/x86_64/multiarch/strcpy.S b/sysdeps/x86_64/multiarch/strcpy.S
index 02fa8d0..bd4408d 100644
--- a/sysdeps/x86_64/multiarch/strcpy.S
+++ b/sysdeps/x86_64/multiarch/strcpy.S
@@ -59,11 +59,11 @@
 	.text
 ENTRY(STRCPY)
 	.type	STRCPY, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	cmpl	$0, __cpu_features_strcpy+KIND_OFFSET(%rip)
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	STRCPY_SSE2(%rip), %rax
-	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+	testl	$bit_SSSE3, __cpu_features_strcpy+CPUID_OFFSET+index_SSSE3(%rip)
 	jz	2f
 	leaq	STRCPY_SSSE3(%rip), %rax
 2:	ret
diff --git a/sysdeps/x86_64/multiarch/strcspn.S b/sysdeps/x86_64/multiarch/strcspn.S
index f00d52c..7ad325d 100644
--- a/sysdeps/x86_64/multiarch/strcspn.S
+++ b/sysdeps/x86_64/multiarch/strcspn.S
@@ -45,11 +45,11 @@
 	.text
 ENTRY(STRCSPN)
 	.type	STRCSPN, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	cmpl	$0, __cpu_features_strspn+KIND_OFFSET(%rip)
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	STRCSPN_SSE2(%rip), %rax
-	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+	testl	$bit_SSE4_2, __cpu_features_strspn+CPUID_OFFSET+index_SSE4_2(%rip)
 	jz	2f
 	leaq	STRCSPN_SSE42(%rip), %rax
 2:	ret
diff --git a/sysdeps/x86_64/multiarch/strlen.S b/sysdeps/x86_64/multiarch/strlen.S
index 83a88ec..d5d251c 100644
--- a/sysdeps/x86_64/multiarch/strlen.S
+++ b/sysdeps/x86_64/multiarch/strlen.S
@@ -29,15 +29,15 @@
 	.text
 ENTRY(strlen)
 	.type	strlen, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	cmpl	$0, __cpu_features_strlen+KIND_OFFSET(%rip)
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	__strlen_sse2(%rip), %rax
-	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+	testl	$bit_SSE4_2, __cpu_features_strlen+CPUID_OFFSET+index_SSE4_2(%rip)
 	jz	2f
 	leaq	__strlen_sse42(%rip), %rax
 	ret
-2:	testl	$bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
+2:	testl	$bit_Slow_BSF, __cpu_features_strlen+FEATURE_OFFSET+index_Slow_BSF(%rip)
 	jz	3f
 	leaq    __strlen_no_bsf(%rip), %rax
 3:	ret
diff --git a/sysdeps/x86_64/multiarch/strrchr.S b/sysdeps/x86_64/multiarch/strrchr.S
index 0d17fdb..51728da 100644
--- a/sysdeps/x86_64/multiarch/strrchr.S
+++ b/sysdeps/x86_64/multiarch/strrchr.S
@@ -28,11 +28,11 @@
 	.text
 ENTRY(strrchr)
 	.type	strrchr, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	cmpl	$0, __cpu_features_strchr+KIND_OFFSET(%rip)
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	__strrchr_sse2(%rip), %rax
-	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+	testl	$bit_SSE4_2, __cpu_features_strchr+CPUID_OFFSET+index_SSE4_2(%rip)
 	jz	2f
 	leaq	__strrchr_sse42(%rip), %rax
 2:	ret
diff --git a/sysdeps/x86_64/multiarch/strspn.S b/sysdeps/x86_64/multiarch/strspn.S
index 1c56626..9a72bfa 100644
--- a/sysdeps/x86_64/multiarch/strspn.S
+++ b/sysdeps/x86_64/multiarch/strspn.S
@@ -30,11 +30,11 @@
 	.text
 ENTRY(strspn)
 	.type	strspn, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	cmpl	$0, __cpu_features_strspn+KIND_OFFSET(%rip)
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	__strspn_sse2(%rip), %rax
-	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+	testl	$bit_SSE4_2, __cpu_features_strspn+CPUID_OFFSET+index_SSE4_2(%rip)
 	jz	2f
 	leaq	__strspn_sse42(%rip), %rax
 2:	ret
diff --git a/sysdeps/x86_64/multiarch/strstr-c.c b/sysdeps/x86_64/multiarch/strstr-c.c
index b8ed316..677dc9a 100644
--- a/sysdeps/x86_64/multiarch/strstr-c.c
+++ b/sysdeps/x86_64/multiarch/strstr-c.c
@@ -12,4 +12,4 @@
 extern char *__strstr_sse42 (const char *, const char *) attribute_hidden;
 extern __typeof (__strstr_sse2) __strstr_sse2 attribute_hidden;
 
-libc_ifunc (strstr, HAS_SSE4_2 ? __strstr_sse42 : __strstr_sse2);
+libc_ifunc (strstr, HAS_SSE4_2(strstr) ? __strstr_sse42 : __strstr_sse2);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]