This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch hjl/cpuid/master created. glibc-2.21-669-g734d442
- From: hjl at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 31 Jul 2015 23:56:10 -0000
- Subject: GNU C Library master sources branch hjl/cpuid/master created. glibc-2.21-669-g734d442
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, hjl/cpuid/master has been created
at 734d442158bf9ff7532f80081eb016f9f10718aa (commit)
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=734d442158bf9ff7532f80081eb016f9f10718aa
commit 734d442158bf9ff7532f80081eb016f9f10718aa
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Fri Jul 31 13:46:05 2015 -0700
Use LOAD_RTLD_GLOBAL_RO_RDX and HAS_XXX in libmvec
diff --git a/math/Makefile b/math/Makefile
index 6388bae..d3b483d 100644
--- a/math/Makefile
+++ b/math/Makefile
@@ -263,7 +263,7 @@ $(objpfx)libieee.a: $(objpfx)ieee-math.o
$(addprefix $(objpfx),$(filter-out $(tests-static) $(libm-vec-tests),$(tests))): $(libm)
$(addprefix $(objpfx),$(tests-static)): $(objpfx)libm.a
$(addprefix $(objpfx), $(libm-vec-tests)): $(objpfx)%: $(libm) $(libmvec) \
- $(objpfx)init-arch.o $(objpfx)%-wrappers.o
+ $(objpfx)%-wrappers.o
gmp-objs = $(patsubst %,$(common-objpfx)stdlib/%.o,\
add_n sub_n cmp addmul_1 mul_1 mul_n divmod_1 \
diff --git a/sysdeps/x86_64/fpu/math-tests-arch.h b/sysdeps/x86_64/fpu/math-tests-arch.h
index e8833bf..0de4cd8 100644
--- a/sysdeps/x86_64/fpu/math-tests-arch.h
+++ b/sysdeps/x86_64/fpu/math-tests-arch.h
@@ -19,66 +19,36 @@
#if defined REQUIRE_AVX
# include <init-arch.h>
-/* Set to 1 if AVX supported. */
-static int avx_usable;
-
-# define INIT_ARCH_EXT \
- do \
- { \
- __init_cpu_features (); \
- avx_usable = __cpu_features.feature[index_AVX_Usable] \
- & bit_AVX_Usable; \
- } \
- while (0)
+# define INIT_ARCH_EXT
# define CHECK_ARCH_EXT \
do \
{ \
- if (!avx_usable) return; \
+ if (!HAS_AVX) return; \
} \
while (0)
#elif defined REQUIRE_AVX2
# include <init-arch.h>
- /* Set to 1 if AVX2 supported. */
- static int avx2_usable;
-
-# define INIT_ARCH_EXT \
- do \
- { \
- __init_cpu_features (); \
- avx2_usable = __cpu_features.feature[index_AVX2_Usable] \
- & bit_AVX2_Usable; \
- } \
- while (0)
+# define INIT_ARCH_EXT
# define CHECK_ARCH_EXT \
do \
{ \
- if (!avx2_usable) return; \
+ if (!HAS_AVX2) return; \
} \
while (0)
#elif defined REQUIRE_AVX512F
# include <init-arch.h>
- /* Set to 1 if supported. */
- static int avx512f_usable;
-
-# define INIT_ARCH_EXT \
- do \
- { \
- __init_cpu_features (); \
- avx512f_usable = __cpu_features.feature[index_AVX512F_Usable] \
- & bit_AVX512F_Usable; \
- } \
- while (0)
+# define INIT_ARCH_EXT
# define CHECK_ARCH_EXT \
do \
{ \
- if (!avx512f_usable) return; \
+ if (!HAS_AVX512F) return; \
} \
while (0)
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S
index 5f67d83..74305fb 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN2v_cos)
.type _ZGVbN2v_cos, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN2v_cos_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN2v_cos_sse4(%rip), %rax
+ HAS_SSE4_1
jz 2f
ret
2: leaq _ZGVbN2v_cos_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S
index 5babb83..5ac3d0e 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN4v_cos)
.type _ZGVdN4v_cos, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN4v_cos_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN4v_cos_avx2(%rip), %rax
+ HAS_AVX2
jz 2f
ret
2: leaq _ZGVdN4v_cos_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S
index d0f4f27..7d8f31c 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN8v_cos)
.type _ZGVeN8v_cos, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
+ LOAD_RTLD_GLOBAL_RO_RDX
1: leaq _ZGVeN8v_cos_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ HAS_AVX512DQ
jnz 2f
leaq _ZGVeN8v_cos_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_AVX512F
jnz 2f
leaq _ZGVeN8v_cos_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S
index ef3dc49..1d625ae 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN2v_exp)
.type _ZGVbN2v_exp, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN2v_exp_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN2v_exp_sse4(%rip), %rax
+ HAS_SSE4_1
jz 2f
ret
2: leaq _ZGVbN2v_exp_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S
index 7f2ebde..a80702b 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN4v_exp)
.type _ZGVdN4v_exp, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN4v_exp_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN4v_exp_avx2(%rip), %rax
+ HAS_AVX2
jz 2f
ret
2: leaq _ZGVdN4v_exp_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S
index 7b7c07d..3389c89 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN8v_exp)
.type _ZGVeN8v_exp, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN8v_exp_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN8v_exp_skx(%rip), %rax
+ HAS_AVX512DQ
jnz 2f
leaq _ZGVeN8v_exp_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_AVX512F
jnz 2f
leaq _ZGVeN8v_exp_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S
index 38d369f..4f9d990 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN2v_log)
.type _ZGVbN2v_log, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN2v_log_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN2v_log_sse4(%rip), %rax
+ HAS_SSE4_1
jz 2f
ret
2: leaq _ZGVbN2v_log_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S
index ddb6105..594adf6 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN4v_log)
.type _ZGVdN4v_log, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN4v_log_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN4v_log_avx2(%rip), %rax
+ HAS_AVX2
jz 2f
ret
2: leaq _ZGVdN4v_log_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S
index 76375fd..ca22197 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN8v_log)
.type _ZGVeN8v_log, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN8v_log_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN8v_log_skx(%rip), %rax
+ HAS_AVX512DQ
jnz 2f
leaq _ZGVeN8v_log_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_AVX512F
jnz 2f
leaq _ZGVeN8v_log_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S
index f111388..49f1fb9 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN2vv_pow)
.type _ZGVbN2vv_pow, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN2vv_pow_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN2vv_pow_sse4(%rip), %rax
+ HAS_SSE4_1
jz 2f
ret
2: leaq _ZGVbN2vv_pow_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S
index 21e3070..dff294f 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN4vv_pow)
.type _ZGVdN4vv_pow, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN4vv_pow_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN4vv_pow_avx2(%rip), %rax
+ HAS_AVX2
jz 2f
ret
2: leaq _ZGVdN4vv_pow_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S
index c1e5e76..197925b 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN8vv_pow)
.type _ZGVeN8vv_pow, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN8vv_pow_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN8vv_pow_skx(%rip), %rax
+ HAS_AVX512DQ
jnz 2f
leaq _ZGVeN8vv_pow_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_AVX512F
jnz 2f
leaq _ZGVeN8vv_pow_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S
index 29bd0a7..80bd858 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN2v_sin)
.type _ZGVbN2v_sin, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN2v_sin_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN2v_sin_sse4(%rip), %rax
+ HAS_SSE4_1
jz 2f
ret
2: leaq _ZGVbN2v_sin_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S
index c3a453a..861c9b3 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN4v_sin)
.type _ZGVdN4v_sin, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN4v_sin_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN4v_sin_avx2(%rip), %rax
+ HAS_AVX2
jz 2f
ret
2: leaq _ZGVdN4v_sin_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S
index 131f2f4..3482ac5 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN8v_sin)
.type _ZGVeN8v_sin, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN8v_sin_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN8v_sin_skx(%rip), %rax
+ HAS_AVX512DQ
jnz 2f
leaq _ZGVeN8v_sin_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_AVX512F
jnz 2f
leaq _ZGVeN8v_sin_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S
index e8e5771..8ae0903 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN2vvv_sincos)
.type _ZGVbN2vvv_sincos, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN2vvv_sincos_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN2vvv_sincos_sse4(%rip), %rax
+ HAS_SSE4_1
jz 2f
ret
2: leaq _ZGVbN2vvv_sincos_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S
index 64744ff..671e8fc 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN4vvv_sincos)
.type _ZGVdN4vvv_sincos, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN4vvv_sincos_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN4vvv_sincos_avx2(%rip), %rax
+ HAS_AVX2
jz 2f
ret
2: leaq _ZGVdN4vvv_sincos_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S
index e331090..24922e1 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN8vvv_sincos)
.type _ZGVeN8vvv_sincos, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN8vvv_sincos_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN8vvv_sincos_skx(%rip), %rax
+ HAS_AVX512DQ
jnz 2f
leaq _ZGVeN8vvv_sincos_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_AVX512F
jnz 2f
leaq _ZGVeN8vvv_sincos_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S
index 0654d3c..fdd640c 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN16v_cosf)
.type _ZGVeN16v_cosf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN16v_cosf_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN16v_cosf_skx(%rip), %rax
+ HAS_AVX512DQ
jnz 2f
leaq _ZGVeN16v_cosf_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_AVX512F
jnz 2f
leaq _ZGVeN16v_cosf_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S
index fa2363b..b9b2210 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN4v_cosf)
.type _ZGVbN4v_cosf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN4v_cosf_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN4v_cosf_sse4(%rip), %rax
+ HAS_SSE4_1
jz 2f
ret
2: leaq _ZGVbN4v_cosf_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S
index e14bba4..b9589b3 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN8v_cosf)
.type _ZGVdN8v_cosf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN8v_cosf_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN8v_cosf_avx2(%rip), %rax
+ HAS_AVX2
jz 2f
ret
2: leaq _ZGVdN8v_cosf_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S
index 62858eb..6a1fdbb 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN16v_expf)
.type _ZGVeN16v_expf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN16v_expf_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN16v_expf_skx(%rip), %rax
+ HAS_AVX512DQ
jnz 2f
leaq _ZGVeN16v_expf_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_AVX512F
jnz 2f
leaq _ZGVeN16v_expf_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S
index 37d38bc..6ad7841 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN4v_expf)
.type _ZGVbN4v_expf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN4v_expf_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN4v_expf_sse4(%rip), %rax
+ HAS_SSE4_1
jz 2f
ret
2: leaq _ZGVbN4v_expf_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S
index e3dc1b1..a5e1917 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN8v_expf)
.type _ZGVdN8v_expf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN8v_expf_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN8v_expf_avx2(%rip), %rax
+ HAS_AVX2
jz 2f
ret
2: leaq _ZGVdN8v_expf_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S
index 68c57e4..3d2b8b1 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN16v_logf)
.type _ZGVeN16v_logf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN16v_logf_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN16v_logf_skx(%rip), %rax
+ HAS_AVX512DQ
jnz 2f
leaq _ZGVeN16v_logf_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_AVX512F
jnz 2f
leaq _ZGVeN16v_logf_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S
index 153ed8e..a8dd898 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN4v_logf)
.type _ZGVbN4v_logf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN4v_logf_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN4v_logf_sse4(%rip), %rax
+ HAS_SSE4_1
jz 2f
ret
2: leaq _ZGVbN4v_logf_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S
index 6f50bf6..f5356d8 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN8v_logf)
.type _ZGVdN8v_logf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN8v_logf_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN8v_logf_avx2(%rip), %rax
+ HAS_AVX2
jz 2f
ret
2: leaq _ZGVdN8v_logf_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S
index 3aa9f95..3d32202 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN16vv_powf)
.type _ZGVeN16vv_powf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN16vv_powf_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN16vv_powf_skx(%rip), %rax
+ HAS_AVX512DQ
jnz 2f
leaq _ZGVeN16vv_powf_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_AVX512F
jnz 2f
leaq _ZGVeN16vv_powf_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S
index f88b9ca..94f172c 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN4vv_powf)
.type _ZGVbN4vv_powf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN4vv_powf_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN4vv_powf_sse4(%rip), %rax
+ HAS_SSE4_1
jz 2f
ret
2: leaq _ZGVbN4vv_powf_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S
index 4552e57..3618adf 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN8vv_powf)
.type _ZGVdN8vv_powf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN8vv_powf_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN8vv_powf_avx2(%rip), %rax
+ HAS_AVX2
jz 2f
ret
2: leaq _ZGVdN8vv_powf_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S
index bdcabab..f20df2f 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN16vvv_sincosf)
.type _ZGVeN16vvv_sincosf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN16vvv_sincosf_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN16vvv_sincosf_skx(%rip), %rax
+ HAS_AVX512DQ
jnz 2f
leaq _ZGVeN16vvv_sincosf_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_AVX512F
jnz 2f
leaq _ZGVeN16vvv_sincosf_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S
index 610046b..a83c830 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN4vvv_sincosf)
.type _ZGVbN4vvv_sincosf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN4vvv_sincosf_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN4vvv_sincosf_sse4(%rip), %rax
+ HAS_SSE4_1
jz 2f
ret
2: leaq _ZGVbN4vvv_sincosf_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S
index 9e5be67..a20772b 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN8vvv_sincosf)
.type _ZGVdN8vvv_sincosf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN8vvv_sincosf_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN8vvv_sincosf_avx2(%rip), %rax
+ HAS_AVX2
jz 2f
ret
2: leaq _ZGVdN8vvv_sincosf_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S
index 3ec78a0..25ec834 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN16v_sinf)
.type _ZGVeN16v_sinf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN16v_sinf_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN16v_sinf_skx(%rip), %rax
+ HAS_AVX512DQ
jnz 2f
leaq _ZGVeN16v_sinf_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_AVX512F
jnz 2f
leaq _ZGVeN16v_sinf_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S
index cf1e4df..4a71052 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN4v_sinf)
.type _ZGVbN4v_sinf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN4v_sinf_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN4v_sinf_sse4(%rip), %rax
+ HAS_SSE4_1
jz 2f
ret
2: leaq _ZGVbN4v_sinf_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S
index b28bf3c..e14c5b2 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN8v_sinf)
.type _ZGVdN8v_sinf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
+ LOAD_RTLD_GLOBAL_RO_RDX
1: leaq _ZGVdN8v_sinf_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ HAS_AVX2
jz 2f
ret
2: leaq _ZGVdN8v_sinf_sse_wrapper(%rip), %rax
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=837b2ffe7c264acc384c81125b90f7457ba0efc4
commit 837b2ffe7c264acc384c81125b90f7457ba0efc4
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Fri Jul 31 16:52:19 2015 -0700
Use LOAD_GOT_AND_RTLD_GLOBAL_RO and HAS_XXX in i686/multiarch
diff --git a/sysdeps/i386/i686/multiarch/bcopy.S b/sysdeps/i386/i686/multiarch/bcopy.S
index e767d97..7657082 100644
--- a/sysdeps/i386/i686/multiarch/bcopy.S
+++ b/sysdeps/i386/i686/multiarch/bcopy.S
@@ -23,51 +23,24 @@
/* Define multiple versions only for the definition in lib. */
#if IS_IN (libc)
-# ifdef SHARED
.text
ENTRY(bcopy)
.type bcopy, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __bcopy_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__bcopy_ia32)
+ HAS_SSE2
jz 2f
- leal __bcopy_sse2_unaligned@GOTOFF(%ebx), %eax
- testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__bcopy_sse2_unaligned)
+ HAS_FAST_UNALIGNED_LOAD
jnz 2f
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ HAS_SSSE3
jz 2f
- leal __bcopy_ssse3@GOTOFF(%ebx), %eax
- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__bcopy_ssse3)
+ HAS_FAST_REP_STRING
jz 2f
- leal __bcopy_ssse3_rep@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
-END(bcopy)
-# else
- .text
-ENTRY(bcopy)
- .type bcopy, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features
- jne 1f
- call __init_cpu_features
-1: leal __bcopy_ia32, %eax
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
- jz 2f
- leal __bcopy_ssse3, %eax
- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features
- jz 2f
- leal __bcopy_ssse3_rep, %eax
+ LOAD_FUNC_GOT_EAX (__bcopy_ssse3_rep)
2: ret
END(bcopy)
-# endif
# undef ENTRY
# define ENTRY(name) \
diff --git a/sysdeps/i386/i686/multiarch/bzero.S b/sysdeps/i386/i686/multiarch/bzero.S
index e8dc85f..ac142bc 100644
--- a/sysdeps/i386/i686/multiarch/bzero.S
+++ b/sysdeps/i386/i686/multiarch/bzero.S
@@ -23,46 +23,19 @@
/* Define multiple versions only for the definition in lib. */
#if IS_IN (libc)
-# ifdef SHARED
- .text
-ENTRY(__bzero)
- .type __bzero, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __bzero_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
- jz 2f
- leal __bzero_sse2@GOTOFF(%ebx), %eax
- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
- jz 2f
- leal __bzero_sse2_rep@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
-END(__bzero)
-# else
.text
ENTRY(__bzero)
.type __bzero, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features
- jne 1f
- call __init_cpu_features
-1: leal __bzero_ia32, %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__bzero_ia32)
+ HAS_SSE2
jz 2f
- leal __bzero_sse2, %eax
- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features
+ LOAD_FUNC_GOT_EAX ( __bzero_sse2)
+ HAS_FAST_REP_STRING
jz 2f
- leal __bzero_sse2_rep, %eax
+ LOAD_FUNC_GOT_EAX (__bzero_sse2_rep)
2: ret
END(__bzero)
-# endif
# undef ENTRY
# define ENTRY(name) \
diff --git a/sysdeps/i386/i686/multiarch/memchr.S b/sysdeps/i386/i686/multiarch/memchr.S
index 02994d0..e444dd6 100644
--- a/sysdeps/i386/i686/multiarch/memchr.S
+++ b/sysdeps/i386/i686/multiarch/memchr.S
@@ -22,46 +22,22 @@
#include <init-arch.h>
#if IS_IN (libc)
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
.text
ENTRY(__memchr)
.type __memchr, @gnu_indirect_function
- pushl %ebx
- CFI_PUSH (%ebx)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-
-1: testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ HAS_SSE2
jz 2f
- testl $bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx)
+ HAS_SLOW_BSF
jz 3f
- leal __memchr_sse2@GOTOFF(%ebx), %eax
- popl %ebx
- CFI_POP (%ebx)
+ LOAD_FUNC_GOT_EAX ( __memchr_sse2)
ret
- CFI_PUSH (%ebx)
-
-2: leal __memchr_ia32@GOTOFF(%ebx), %eax
- popl %ebx
- CFI_POP (%ebx)
+2: LOAD_FUNC_GOT_EAX (__memchr_ia32)
ret
- CFI_PUSH (%ebx)
-
-3: leal __memchr_sse2_bsf@GOTOFF(%ebx), %eax
- popl %ebx
- CFI_POP (%ebx)
+3: LOAD_FUNC_GOT_EAX (__memchr_sse2_bsf)
ret
END(__memchr)
diff --git a/sysdeps/i386/i686/multiarch/memcmp.S b/sysdeps/i386/i686/multiarch/memcmp.S
index 6b607eb..aa08900 100644
--- a/sysdeps/i386/i686/multiarch/memcmp.S
+++ b/sysdeps/i386/i686/multiarch/memcmp.S
@@ -23,46 +23,19 @@
/* Define multiple versions only for the definition in libc. */
#if IS_IN (libc)
-# ifdef SHARED
- .text
-ENTRY(memcmp)
- .type memcmp, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __memcmp_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
- jz 2f
- leal __memcmp_ssse3@GOTOFF(%ebx), %eax
- testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
- jz 2f
- leal __memcmp_sse4_2@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
-END(memcmp)
-# else
.text
ENTRY(memcmp)
.type memcmp, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features
- jne 1f
- call __init_cpu_features
-1: leal __memcmp_ia32, %eax
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__memcmp_ia32)
+ HAS_SSSE3
jz 2f
- leal __memcmp_ssse3, %eax
- testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
+ LOAD_FUNC_GOT_EAX (__memcmp_ssse3)
+ HAS_SSE4_2
jz 2f
- leal __memcmp_sse4_2, %eax
+ LOAD_FUNC_GOT_EAX (__memcmp_sse4_2)
2: ret
END(memcmp)
-# endif
# undef ENTRY
# define ENTRY(name) \
diff --git a/sysdeps/i386/i686/multiarch/memcpy.S b/sysdeps/i386/i686/multiarch/memcpy.S
index c6d20bd..d92f691 100644
--- a/sysdeps/i386/i686/multiarch/memcpy.S
+++ b/sysdeps/i386/i686/multiarch/memcpy.S
@@ -28,29 +28,20 @@
.text
ENTRY(memcpy)
.type memcpy, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __memcpy_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__memcpy_ia32)
+ HAS_SSE2
jz 2f
- leal __memcpy_sse2_unaligned@GOTOFF(%ebx), %eax
- testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__memcpy_sse2_unaligned)
+ HAS_FAST_UNALIGNED_LOAD
jnz 2f
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ HAS_SSSE3
jz 2f
- leal __memcpy_ssse3@GOTOFF(%ebx), %eax
- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__memcpy_ssse3)
+ HAS_FAST_REP_STRING
jz 2f
- leal __memcpy_ssse3_rep@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
+ LOAD_FUNC_GOT_EAX (__memcpy_ssse3_rep)
+2: ret
END(memcpy)
# undef ENTRY
diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.S b/sysdeps/i386/i686/multiarch/memcpy_chk.S
index 9399587..ba99478 100644
--- a/sysdeps/i386/i686/multiarch/memcpy_chk.S
+++ b/sysdeps/i386/i686/multiarch/memcpy_chk.S
@@ -29,29 +29,20 @@
.text
ENTRY(__memcpy_chk)
.type __memcpy_chk, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __memcpy_chk_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__memcpy_chk_ia32)
+ HAS_SSE2
jz 2f
- leal __memcpy_chk_sse2_unaligned@GOTOFF(%ebx), %eax
- testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__memcpy_chk_sse2_unaligned)
+ HAS_FAST_UNALIGNED_LOAD
jnz 2f
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ HAS_SSSE3
jz 2f
- leal __memcpy_chk_ssse3@GOTOFF(%ebx), %eax
- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__memcpy_chk_ssse3)
+ HAS_FAST_REP_STRING
jz 2f
- leal __memcpy_chk_ssse3_rep@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
+ LOAD_FUNC_GOT_EAX (__memcpy_chk_ssse3_rep)
+2: ret
END(__memcpy_chk)
# else
# include "../memcpy_chk.S"
diff --git a/sysdeps/i386/i686/multiarch/memmove.S b/sysdeps/i386/i686/multiarch/memmove.S
index 7033463..6a4a5de 100644
--- a/sysdeps/i386/i686/multiarch/memmove.S
+++ b/sysdeps/i386/i686/multiarch/memmove.S
@@ -23,37 +23,28 @@
/* Define multiple versions only for the definition in lib. */
#if IS_IN (libc)
-# ifdef SHARED
.text
ENTRY(memmove)
.type memmove, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __memmove_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__memmove_ia32)
+ HAS_SSE2
jz 2f
- leal __memmove_sse2_unaligned@GOTOFF(%ebx), %eax
- testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__memmove_sse2_unaligned)
+ HAS_FAST_UNALIGNED_LOAD
jnz 2f
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ HAS_SSSE3
jz 2f
- leal __memmove_ssse3@GOTOFF(%ebx), %eax
- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__memmove_ssse3)
+ HAS_FAST_REP_STRING
jz 2f
- leal __memmove_ssse3_rep@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
+ LOAD_FUNC_GOT_EAX (__memmove_ssse3_rep)
+2: ret
END(memmove)
-# undef ENTRY
-# define ENTRY(name) \
+# ifdef SHARED
+# undef ENTRY
+# define ENTRY(name) \
.type __memmove_ia32, @function; \
.p2align 4; \
.globl __memmove_ia32; \
@@ -61,29 +52,8 @@ END(memmove)
__memmove_ia32: cfi_startproc; \
CALL_MCOUNT
# else
- .text
-ENTRY(memmove)
- .type memmove, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features
- jne 1f
- call __init_cpu_features
-1: leal __memmove_ia32, %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
- jz 2f
- leal __memmove_sse2_unaligned, %eax
- testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features
- jnz 2f
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
- jz 2f
- leal __memmove_ssse3, %eax
- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features
- jz 2f
- leal __memmove_ssse3_rep, %eax
-2: ret
-END(memmove)
-
-# undef ENTRY
-# define ENTRY(name) \
+# undef ENTRY
+# define ENTRY(name) \
.type __memmove_ia32, @function; \
.globl __memmove_ia32; \
.p2align 4; \
diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.S b/sysdeps/i386/i686/multiarch/memmove_chk.S
index 2b576d4..83a4402 100644
--- a/sysdeps/i386/i686/multiarch/memmove_chk.S
+++ b/sysdeps/i386/i686/multiarch/memmove_chk.S
@@ -23,56 +23,26 @@
/* Define multiple versions only for the definition in lib. */
#if IS_IN (libc)
-# ifdef SHARED
.text
ENTRY(__memmove_chk)
.type __memmove_chk, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __memmove_chk_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__memmove_chk_ia32)
+ HAS_SSE2
jz 2f
- leal __memmove_chk_sse2_unaligned@GOTOFF(%ebx), %eax
- testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__memmove_chk_sse2_unaligned)
+ HAS_FAST_UNALIGNED_LOAD
jnz 2f
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ HAS_SSSE3
jz 2f
- leal __memmove_chk_ssse3@GOTOFF(%ebx), %eax
- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__memmove_chk_ssse3)
+ HAS_FAST_REP_STRING
jz 2f
- leal __memmove_chk_ssse3_rep@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
-END(__memmove_chk)
-# else
- .text
-ENTRY(__memmove_chk)
- .type __memmove_chk, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features
- jne 1f
- call __init_cpu_features
-1: leal __memmove_chk_ia32, %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
- jz 2f
- leal __memmove_chk_sse2_unaligned, %eax
- testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features
- jnz 2f
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
- jz 2f
- leal __memmove_chk_ssse3, %eax
- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features
- jz 2f
- leal __memmove_chk_ssse3_rep, %eax
+ LOAD_FUNC_GOT_EAX (__memmove_chk_ssse3_rep)
2: ret
END(__memmove_chk)
+# ifndef SHARED
.type __memmove_chk_sse2_unaligned, @function
.p2align 4;
__memmove_chk_sse2_unaligned:
diff --git a/sysdeps/i386/i686/multiarch/mempcpy.S b/sysdeps/i386/i686/multiarch/mempcpy.S
index 39c934e..810d4c2 100644
--- a/sysdeps/i386/i686/multiarch/mempcpy.S
+++ b/sysdeps/i386/i686/multiarch/mempcpy.S
@@ -28,29 +28,20 @@
.text
ENTRY(__mempcpy)
.type __mempcpy, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __mempcpy_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__mempcpy_ia32)
+ HAS_SSE2
jz 2f
- leal __mempcpy_sse2_unaligned@GOTOFF(%ebx), %eax
- testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__mempcpy_sse2_unaligned)
+ HAS_FAST_UNALIGNED_LOAD
jnz 2f
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ HAS_SSSE3
jz 2f
- leal __mempcpy_ssse3@GOTOFF(%ebx), %eax
- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__mempcpy_ssse3)
+ HAS_FAST_REP_STRING
jz 2f
- leal __mempcpy_ssse3_rep@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
+ LOAD_FUNC_GOT_EAX (__mempcpy_ssse3_rep)
+2: ret
END(__mempcpy)
# undef ENTRY
diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.S b/sysdeps/i386/i686/multiarch/mempcpy_chk.S
index b6fa202..a770bc9 100644
--- a/sysdeps/i386/i686/multiarch/mempcpy_chk.S
+++ b/sysdeps/i386/i686/multiarch/mempcpy_chk.S
@@ -29,29 +29,20 @@
.text
ENTRY(__mempcpy_chk)
.type __mempcpy_chk, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __mempcpy_chk_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__mempcpy_chk_ia32)
+ HAS_SSE2
jz 2f
- leal __mempcpy_chk_sse2_unaligned@GOTOFF(%ebx), %eax
- testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__mempcpy_chk_sse2_unaligned)
+ HAS_FAST_UNALIGNED_LOAD
jnz 2f
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ HAS_SSSE3
jz 2f
- leal __mempcpy_chk_ssse3@GOTOFF(%ebx), %eax
- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__mempcpy_chk_ssse3)
+ HAS_FAST_REP_STRING
jz 2f
- leal __mempcpy_chk_ssse3_rep@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
+ LOAD_FUNC_GOT_EAX (__mempcpy_chk_ssse3_rep)
+2: ret
END(__mempcpy_chk)
# else
# include "../mempcpy_chk.S"
diff --git a/sysdeps/i386/i686/multiarch/memrchr.S b/sysdeps/i386/i686/multiarch/memrchr.S
index 321e0b7..5121a7c 100644
--- a/sysdeps/i386/i686/multiarch/memrchr.S
+++ b/sysdeps/i386/i686/multiarch/memrchr.S
@@ -22,46 +22,22 @@
#include <init-arch.h>
#if IS_IN (libc)
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
.text
ENTRY(__memrchr)
.type __memrchr, @gnu_indirect_function
- pushl %ebx
- CFI_PUSH (%ebx)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-
-1: testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ HAS_SSE2
jz 2f
- testl $bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx)
+ HAS_SLOW_BSF
jz 3f
- leal __memrchr_sse2@GOTOFF(%ebx), %eax
- popl %ebx
- CFI_POP (%ebx)
+ LOAD_FUNC_GOT_EAX (__memrchr_sse2)
ret
- CFI_PUSH (%ebx)
-
-2: leal __memrchr_ia32@GOTOFF(%ebx), %eax
- popl %ebx
- CFI_POP (%ebx)
+2: LOAD_FUNC_GOT_EAX (__memrchr_ia32)
ret
- CFI_PUSH (%ebx)
-
-3: leal __memrchr_sse2_bsf@GOTOFF(%ebx), %eax
- popl %ebx
- CFI_POP (%ebx)
+3: LOAD_FUNC_GOT_EAX (__memrchr_sse2_bsf)
ret
END(__memrchr)
diff --git a/sysdeps/i386/i686/multiarch/memset.S b/sysdeps/i386/i686/multiarch/memset.S
index 6d7d919..1cf40c2 100644
--- a/sysdeps/i386/i686/multiarch/memset.S
+++ b/sysdeps/i386/i686/multiarch/memset.S
@@ -23,46 +23,19 @@
/* Define multiple versions only for the definition in lib. */
#if IS_IN (libc)
-# ifdef SHARED
- .text
-ENTRY(memset)
- .type memset, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __memset_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
- jz 2f
- leal __memset_sse2@GOTOFF(%ebx), %eax
- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
- jz 2f
- leal __memset_sse2_rep@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
-END(memset)
-# else
.text
ENTRY(memset)
.type memset, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features
- jne 1f
- call __init_cpu_features
-1: leal __memset_ia32, %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__memset_ia32)
+ HAS_SSE2
jz 2f
- leal __memset_sse2, %eax
- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features
+ LOAD_FUNC_GOT_EAX (__memset_sse2)
+ HAS_FAST_REP_STRING
jz 2f
- leal __memset_sse2_rep, %eax
+ LOAD_FUNC_GOT_EAX (__memset_sse2_rep)
2: ret
END(memset)
-# endif
# undef ENTRY
# define ENTRY(name) \
diff --git a/sysdeps/i386/i686/multiarch/memset_chk.S b/sysdeps/i386/i686/multiarch/memset_chk.S
index a770c0d..1418853 100644
--- a/sysdeps/i386/i686/multiarch/memset_chk.S
+++ b/sysdeps/i386/i686/multiarch/memset_chk.S
@@ -23,50 +23,26 @@
/* Define multiple versions only for the definition in lib. */
#if IS_IN (libc)
-# ifdef SHARED
.text
ENTRY(__memset_chk)
.type __memset_chk, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __memset_chk_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__memset_chk_ia32)
+ HAS_SSE2
jz 2f
- leal __memset_chk_sse2@GOTOFF(%ebx), %eax
- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__memset_chk_sse2)
+ HAS_FAST_REP_STRING
jz 2f
- leal __memset_chk_sse2_rep@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
+ LOAD_FUNC_GOT_EAX (__memset_chk_sse2_rep)
+2: ret
END(__memset_chk)
+# ifdef SHARED
strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
.section .gnu.warning.__memset_zero_constant_len_parameter
.string "memset used with constant zero length parameter; this could be due to transposed parameters"
# else
.text
-ENTRY(__memset_chk)
- .type __memset_chk, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features
- jne 1f
- call __init_cpu_features
-1: leal __memset_chk_ia32, %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
- jz 2f
- leal __memset_chk_sse2, %eax
- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features
- jz 2f
- leal __memset_chk_sse2_rep, %eax
-2: ret
-END(__memset_chk)
-
.type __memset_chk_sse2, @function
.p2align 4;
__memset_chk_sse2:
diff --git a/sysdeps/i386/i686/multiarch/rawmemchr.S b/sysdeps/i386/i686/multiarch/rawmemchr.S
index c2b7ee6..7616460 100644
--- a/sysdeps/i386/i686/multiarch/rawmemchr.S
+++ b/sysdeps/i386/i686/multiarch/rawmemchr.S
@@ -22,46 +22,22 @@
#include <init-arch.h>
#if IS_IN (libc)
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
.text
ENTRY(__rawmemchr)
.type __rawmemchr, @gnu_indirect_function
- pushl %ebx
- CFI_PUSH (%ebx)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-
-1: testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ HAS_SSE2
jz 2f
- testl $bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx)
+ HAS_SLOW_BSF
jz 3f
- leal __rawmemchr_sse2@GOTOFF(%ebx), %eax
- popl %ebx
- CFI_POP (%ebx)
+ LOAD_FUNC_GOT_EAX (__rawmemchr_sse2)
ret
- CFI_PUSH (%ebx)
-
-2: leal __rawmemchr_ia32@GOTOFF(%ebx), %eax
- popl %ebx
- CFI_POP (%ebx)
+2: LOAD_FUNC_GOT_EAX (__rawmemchr_ia32)
ret
- CFI_PUSH (%ebx)
-
-3: leal __rawmemchr_sse2_bsf@GOTOFF(%ebx), %eax
- popl %ebx
- CFI_POP (%ebx)
+3: LOAD_FUNC_GOT_EAX (__rawmemchr_sse2_bsf)
ret
END(__rawmemchr)
diff --git a/sysdeps/i386/i686/multiarch/strcasecmp.S b/sysdeps/i386/i686/multiarch/strcasecmp.S
index c30ac3a..7ace685 100644
--- a/sysdeps/i386/i686/multiarch/strcasecmp.S
+++ b/sysdeps/i386/i686/multiarch/strcasecmp.S
@@ -20,49 +20,20 @@
#include <sysdep.h>
#include <init-arch.h>
-#ifdef SHARED
.text
ENTRY(__strcasecmp)
.type __strcasecmp, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __strcasecmp_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__strcasecmp_ia32)
+ HAS_SSSE3
jz 2f
- leal __strcasecmp_ssse3@GOTOFF(%ebx), %eax
- testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__strcasecmp_ssse3)
+ HAS_SSE4_2
jz 2f
- testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
+ HAS_SLOW_SSE4_2
jnz 2f
- leal __strcasecmp_sse4_2@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
-END(__strcasecmp)
-#else
- .text
-ENTRY(__strcasecmp)
- .type __strcasecmp, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features
- jne 1f
- call __init_cpu_features
-1: leal __strcasecmp_ia32, %eax
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
- jz 2f
- leal __strcasecmp_ssse3, %eax
- testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
- jz 2f
- testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features
- jnz 2f
- leal __strcasecmp_sse4_2, %eax
+ LOAD_FUNC_GOT_EAX (__strcasecmp_sse4_2)
2: ret
END(__strcasecmp)
-#endif
weak_alias (__strcasecmp, strcasecmp)
diff --git a/sysdeps/i386/i686/multiarch/strcat.S b/sysdeps/i386/i686/multiarch/strcat.S
index 474f753..e8e8f29 100644
--- a/sysdeps/i386/i686/multiarch/strcat.S
+++ b/sysdeps/i386/i686/multiarch/strcat.S
@@ -45,52 +45,22 @@
need strncat before the initialization happened. */
#if IS_IN (libc)
-# ifdef SHARED
.text
ENTRY(STRCAT)
.type STRCAT, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal STRCAT_IA32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
- jz 2f
- leal STRCAT_SSE2@GOTOFF(%ebx), %eax
- testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
- jnz 2f
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
- jz 2f
- leal STRCAT_SSSE3@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
-END(STRCAT)
-# else
-
-ENTRY(STRCAT)
- .type STRCAT, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features
- jne 1f
- call __init_cpu_features
-1: leal STRCAT_IA32, %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (STRCAT_IA32)
+ HAS_SSE2
jz 2f
- leal STRCAT_SSE2, %eax
- testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features
+ LOAD_FUNC_GOT_EAX (STRCAT_SSE2)
+ HAS_FAST_UNALIGNED_LOAD
jnz 2f
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
+ HAS_SSSE3
jz 2f
- leal STRCAT_SSSE3, %eax
+ LOAD_FUNC_GOT_EAX (STRCAT_SSSE3)
2: ret
END(STRCAT)
-# endif
-
# undef ENTRY
# define ENTRY(name) \
.type STRCAT_IA32, @function; \
diff --git a/sysdeps/i386/i686/multiarch/strchr.S b/sysdeps/i386/i686/multiarch/strchr.S
index 45624fd..83d2b84 100644
--- a/sysdeps/i386/i686/multiarch/strchr.S
+++ b/sysdeps/i386/i686/multiarch/strchr.S
@@ -25,24 +25,15 @@
.text
ENTRY(strchr)
.type strchr, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __strchr_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__strchr_ia32)
+ HAS_SSE2
jz 2f
- leal __strchr_sse2_bsf@GOTOFF(%ebx), %eax
- testl $bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__strchr_sse2_bsf)
+ HAS_SLOW_BSF
jz 2f
- leal __strchr_sse2@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4);
- cfi_restore (ebx)
- ret
+ LOAD_FUNC_GOT_EAX (__strchr_sse2)
+2: ret
END(strchr)
# undef ENTRY
diff --git a/sysdeps/i386/i686/multiarch/strcmp.S b/sysdeps/i386/i686/multiarch/strcmp.S
index 9df4008..274c7b3 100644
--- a/sysdeps/i386/i686/multiarch/strcmp.S
+++ b/sysdeps/i386/i686/multiarch/strcmp.S
@@ -51,50 +51,21 @@
define multiple versions for strncmp in static library since we
need strncmp before the initialization happened. */
#if (defined SHARED || !defined USE_AS_STRNCMP) && IS_IN (libc)
-# ifdef SHARED
.text
ENTRY(STRCMP)
.type STRCMP, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __STRCMP_IA32@GOTOFF(%ebx), %eax
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__STRCMP_IA32)
+ HAS_SSSE3
jz 2f
- leal __STRCMP_SSSE3@GOTOFF(%ebx), %eax
- testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__STRCMP_SSSE3)
+ HAS_SSE4_2
jz 2f
- testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
+ HAS_SLOW_SSE4_2
jnz 2f
- leal __STRCMP_SSE4_2@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
-END(STRCMP)
-# else
- .text
-ENTRY(STRCMP)
- .type STRCMP, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features
- jne 1f
- call __init_cpu_features
-1: leal __STRCMP_IA32, %eax
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
- jz 2f
- leal __STRCMP_SSSE3, %eax
- testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
- jz 2f
- testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features
- jnz 2f
- leal __STRCMP_SSE4_2, %eax
+ LOAD_FUNC_GOT_EAX (__STRCMP_SSE4_2)
2: ret
END(STRCMP)
-# endif
# undef ENTRY
# define ENTRY(name) \
diff --git a/sysdeps/i386/i686/multiarch/strcpy.S b/sysdeps/i386/i686/multiarch/strcpy.S
index c279d46..c3844a8 100644
--- a/sysdeps/i386/i686/multiarch/strcpy.S
+++ b/sysdeps/i386/i686/multiarch/strcpy.S
@@ -61,52 +61,22 @@
need strncpy before the initialization happened. */
#if IS_IN (libc)
-# ifdef SHARED
.text
ENTRY(STRCPY)
.type STRCPY, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal STRCPY_IA32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (STRCPY_IA32)
+ HAS_SSE2
jz 2f
- leal STRCPY_SSE2@GOTOFF(%ebx), %eax
- testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (STRCPY_SSE2)
+ HAS_FAST_UNALIGNED_LOAD
jnz 2f
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ HAS_SSSE3
jz 2f
- leal STRCPY_SSSE3@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
-END(STRCPY)
-# else
-
-ENTRY(STRCPY)
- .type STRCPY, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features
- jne 1f
- call __init_cpu_features
-1: leal STRCPY_IA32, %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
- jz 2f
- leal STRCPY_SSE2, %eax
- testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features
- jnz 2f
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
- jz 2f
- leal STRCPY_SSSE3, %eax
+ LOAD_FUNC_GOT_EAX (STRCPY_SSSE3)
2: ret
END(STRCPY)
-# endif
-
# undef ENTRY
# define ENTRY(name) \
.type STRCPY_IA32, @function; \
diff --git a/sysdeps/i386/i686/multiarch/strcspn.S b/sysdeps/i386/i686/multiarch/strcspn.S
index e6ea454..7e67f78 100644
--- a/sysdeps/i386/i686/multiarch/strcspn.S
+++ b/sysdeps/i386/i686/multiarch/strcspn.S
@@ -42,40 +42,16 @@
define multiple versions for strpbrk in static library since we
need strpbrk before the initialization happened. */
#if (defined SHARED || !defined USE_AS_STRPBRK) && IS_IN (libc)
-# ifdef SHARED
.text
ENTRY(STRCSPN)
.type STRCSPN, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal STRCSPN_IA32@GOTOFF(%ebx), %eax
- testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (STRCSPN_IA32)
+ HAS_SSE4_2
jz 2f
- leal STRCSPN_SSE42@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4);
- cfi_restore (ebx)
- ret
-END(STRCSPN)
-# else
- .text
-ENTRY(STRCSPN)
- .type STRCSPN, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features
- jne 1f
- call __init_cpu_features
-1: leal STRCSPN_IA32, %eax
- testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
- jz 2f
- leal STRCSPN_SSE42, %eax
+ LOAD_FUNC_GOT_EAX (STRCSPN_SSE42)
2: ret
END(STRCSPN)
-# endif
# undef ENTRY
# define ENTRY(name) \
diff --git a/sysdeps/i386/i686/multiarch/strlen.S b/sysdeps/i386/i686/multiarch/strlen.S
index 2e6993b..8a2fbf2 100644
--- a/sysdeps/i386/i686/multiarch/strlen.S
+++ b/sysdeps/i386/i686/multiarch/strlen.S
@@ -28,24 +28,15 @@
.text
ENTRY(strlen)
.type strlen, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __strlen_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__strlen_ia32)
+ HAS_SSE2
jz 2f
- leal __strlen_sse2_bsf@GOTOFF(%ebx), %eax
- testl $bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__strlen_sse2_bsf)
+ HAS_SLOW_BSF
jz 2f
- leal __strlen_sse2@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4);
- cfi_restore (ebx)
- ret
+ LOAD_FUNC_GOT_EAX (__strlen_sse2)
+2: ret
END(strlen)
# undef ENTRY
diff --git a/sysdeps/i386/i686/multiarch/strncase.S b/sysdeps/i386/i686/multiarch/strncase.S
index c2cb03c..5025477 100644
--- a/sysdeps/i386/i686/multiarch/strncase.S
+++ b/sysdeps/i386/i686/multiarch/strncase.S
@@ -20,49 +20,20 @@
#include <sysdep.h>
#include <init-arch.h>
-#ifdef SHARED
.text
ENTRY(__strncasecmp)
.type __strncasecmp, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __strncasecmp_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__strncasecmp_ia32)
+ HAS_SSSE3
jz 2f
- leal __strncasecmp_ssse3@GOTOFF(%ebx), %eax
- testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__strncasecmp_ssse3)
+ HAS_SSE4_2
jz 2f
- testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
+ HAS_SLOW_SSE4_2
jnz 2f
- leal __strncasecmp_sse4_2@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
-END(__strncasecmp)
-#else
- .text
-ENTRY(__strncasecmp)
- .type __strncasecmp, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features
- jne 1f
- call __init_cpu_features
-1: leal __strncasecmp_ia32, %eax
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
- jz 2f
- leal __strncasecmp_ssse3, %eax
- testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
- jz 2f
- testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features
- jnz 2f
- leal __strncasecmp_sse4_2, %eax
+ LOAD_FUNC_GOT_EAX (__strncasecmp_sse4_2)
2: ret
END(__strncasecmp)
-#endif
weak_alias (__strncasecmp, strncasecmp)
diff --git a/sysdeps/i386/i686/multiarch/strnlen.S b/sysdeps/i386/i686/multiarch/strnlen.S
index 56a5136..166c81e 100644
--- a/sysdeps/i386/i686/multiarch/strnlen.S
+++ b/sysdeps/i386/i686/multiarch/strnlen.S
@@ -25,21 +25,12 @@
.text
ENTRY(__strnlen)
.type __strnlen, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __strnlen_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__strnlen_ia32)
+ HAS_SSE2
jz 2f
- leal __strnlen_sse2@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4);
- cfi_restore (ebx)
- ret
+ LOAD_FUNC_GOT_EAX (__strnlen_sse2)
+2: ret
END(__strnlen)
weak_alias(__strnlen, strnlen)
diff --git a/sysdeps/i386/i686/multiarch/strrchr.S b/sysdeps/i386/i686/multiarch/strrchr.S
index 91074b4..984694b 100644
--- a/sysdeps/i386/i686/multiarch/strrchr.S
+++ b/sysdeps/i386/i686/multiarch/strrchr.S
@@ -25,24 +25,15 @@
.text
ENTRY(strrchr)
.type strrchr, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __strrchr_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__strrchr_ia32)
+ HAS_SSE2
jz 2f
- leal __strrchr_sse2_bsf@GOTOFF(%ebx), %eax
- testl $bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__strrchr_sse2_bsf)
+ HAS_SLOW_BSF
jz 2f
- leal __strrchr_sse2@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4);
- cfi_restore (ebx)
- ret
+ LOAD_FUNC_GOT_EAX (__strrchr_sse2)
+2: ret
END(strrchr)
# undef ENTRY
diff --git a/sysdeps/i386/i686/multiarch/strspn.S b/sysdeps/i386/i686/multiarch/strspn.S
index 9d353a2..b9e2a74 100644
--- a/sysdeps/i386/i686/multiarch/strspn.S
+++ b/sysdeps/i386/i686/multiarch/strspn.S
@@ -27,40 +27,16 @@
/* Define multiple versions only for the definition in libc. */
#if IS_IN (libc)
-# ifdef SHARED
.text
ENTRY(strspn)
.type strspn, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __strspn_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__strspn_ia32)
+ HAS_SSE4_2
jz 2f
- leal __strspn_sse42@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4);
- cfi_restore (ebx)
- ret
-END(strspn)
-# else
- .text
-ENTRY(strspn)
- .type strspn, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features
- jne 1f
- call __init_cpu_features
-1: leal __strspn_ia32, %eax
- testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
- jz 2f
- leal __strspn_sse42, %eax
+ LOAD_FUNC_GOT_EAX (__strspn_sse42)
2: ret
END(strspn)
-# endif
# undef ENTRY
# define ENTRY(name) \
diff --git a/sysdeps/i386/i686/multiarch/wcschr.S b/sysdeps/i386/i686/multiarch/wcschr.S
index 603d7d7..0c4ad2f 100644
--- a/sysdeps/i386/i686/multiarch/wcschr.S
+++ b/sysdeps/i386/i686/multiarch/wcschr.S
@@ -25,21 +25,12 @@
.text
ENTRY(__wcschr)
.type wcschr, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __wcschr_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__wcschr_ia32)
+ HAS_SSE2
jz 2f
- leal __wcschr_sse2@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4);
- cfi_restore (ebx)
- ret
+ LOAD_FUNC_GOT_EAX (__wcschr_sse2)
+2: ret
END(__wcschr)
weak_alias (__wcschr, wcschr)
#endif
diff --git a/sysdeps/i386/i686/multiarch/wcscmp.S b/sysdeps/i386/i686/multiarch/wcscmp.S
index 92c2c84..445e034 100644
--- a/sysdeps/i386/i686/multiarch/wcscmp.S
+++ b/sysdeps/i386/i686/multiarch/wcscmp.S
@@ -28,21 +28,12 @@
.text
ENTRY(__wcscmp)
.type __wcscmp, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __wcscmp_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__wcscmp_ia32)
+ HAS_SSE2
jz 2f
- leal __wcscmp_sse2@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4);
- cfi_restore (ebx)
- ret
+ LOAD_FUNC_GOT_EAX (__wcscmp_sse2)
+2: ret
END(__wcscmp)
weak_alias (__wcscmp, wcscmp)
#endif
diff --git a/sysdeps/i386/i686/multiarch/wcscpy.S b/sysdeps/i386/i686/multiarch/wcscpy.S
index f7253c7..5f9f9f4 100644
--- a/sysdeps/i386/i686/multiarch/wcscpy.S
+++ b/sysdeps/i386/i686/multiarch/wcscpy.S
@@ -26,20 +26,11 @@
.text
ENTRY(wcscpy)
.type wcscpy, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __wcscpy_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__wcscpy_ia32)
+ HAS_SSSE3
jz 2f
- leal __wcscpy_ssse3@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
+ LOAD_FUNC_GOT_EAX (__wcscpy_ssse3)
+2: ret
END(wcscpy)
#endif
diff --git a/sysdeps/i386/i686/multiarch/wcslen.S b/sysdeps/i386/i686/multiarch/wcslen.S
index 3926a50..aabacda 100644
--- a/sysdeps/i386/i686/multiarch/wcslen.S
+++ b/sysdeps/i386/i686/multiarch/wcslen.S
@@ -25,21 +25,12 @@
.text
ENTRY(__wcslen)
.type __wcslen, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __wcslen_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__wcslen_ia32)
+ HAS_SSE2
jz 2f
- leal __wcslen_sse2@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4);
- cfi_restore (ebx)
- ret
+ LOAD_FUNC_GOT_EAX (__wcslen_sse2)
+2: ret
END(__wcslen)
weak_alias(__wcslen, wcslen)
diff --git a/sysdeps/i386/i686/multiarch/wcsrchr.S b/sysdeps/i386/i686/multiarch/wcsrchr.S
index 5c96129..24f8313 100644
--- a/sysdeps/i386/i686/multiarch/wcsrchr.S
+++ b/sysdeps/i386/i686/multiarch/wcsrchr.S
@@ -25,20 +25,11 @@
.text
ENTRY(wcsrchr)
.type wcsrchr, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __wcsrchr_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__wcsrchr_ia32)
+ HAS_SSE2
jz 2f
- leal __wcsrchr_sse2@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4);
- cfi_restore (ebx)
- ret
+ LOAD_FUNC_GOT_EAX (__wcsrchr_sse2)
+2: ret
END(wcsrchr)
#endif
diff --git a/sysdeps/i386/i686/multiarch/wmemcmp.S b/sysdeps/i386/i686/multiarch/wmemcmp.S
index 6ca6053..dcf0fc0 100644
--- a/sysdeps/i386/i686/multiarch/wmemcmp.S
+++ b/sysdeps/i386/i686/multiarch/wmemcmp.S
@@ -27,23 +27,14 @@
.text
ENTRY(wmemcmp)
.type wmemcmp, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal __wmemcmp_ia32@GOTOFF(%ebx), %eax
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__wmemcmp_ia32)
+ HAS_SSSE3
jz 2f
- leal __wmemcmp_ssse3@GOTOFF(%ebx), %eax
- testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
+ LOAD_FUNC_GOT_EAX (__wmemcmp_ssse3)
+ HAS_SSE4_2
jz 2f
- leal __wmemcmp_sse4_2@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
+ LOAD_FUNC_GOT_EAX (__wmemcmp_sse4_2)
+2: ret
END(wmemcmp)
#endif
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=dcdc26382ba3019931774043cff7335a612e0642
commit dcdc26382ba3019931774043cff7335a612e0642
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Fri Jul 31 13:41:04 2015 -0700
Use LOAD_RTLD_GLOBAL_RO_RDX and HAS_XXX in x86_64/multiarch
diff --git a/sysdeps/x86_64/multiarch/memcmp.S b/sysdeps/x86_64/multiarch/memcmp.S
index f8b4636..8f0e274 100644
--- a/sysdeps/x86_64/multiarch/memcmp.S
+++ b/sysdeps/x86_64/multiarch/memcmp.S
@@ -26,16 +26,13 @@
.text
ENTRY(memcmp)
.type memcmp, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-
-1: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ HAS_SSSE3
jnz 2f
leaq __memcmp_sse2(%rip), %rax
ret
-2: testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+2: HAS_SSE4_1
jz 3f
leaq __memcmp_sse4_1(%rip), %rax
ret
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index 4e18cd3..780c1ad 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -29,19 +29,17 @@
.text
ENTRY(__new_memcpy)
.type __new_memcpy, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq __memcpy_avx_unaligned(%rip), %rax
- testl $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq __memcpy_avx_unaligned(%rip), %rax
+ HAS_AVX_FAST_UNALIGNED_LOAD
jz 1f
ret
1: leaq __memcpy_sse2(%rip), %rax
- testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
+ HAS_SLOW_BSF
jnz 2f
leaq __memcpy_sse2_unaligned(%rip), %rax
ret
-2: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+2: HAS_SSSE3
jz 3f
leaq __memcpy_ssse3(%rip), %rax
3: ret
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S
index 1e756ea..b9b157b 100644
--- a/sysdeps/x86_64/multiarch/memcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/memcpy_chk.S
@@ -29,17 +29,15 @@
.text
ENTRY(__memcpy_chk)
.type __memcpy_chk, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq __memcpy_chk_sse2(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq __memcpy_chk_sse2(%rip), %rax
+ HAS_SSSE3
jz 2f
leaq __memcpy_chk_ssse3(%rip), %rax
- testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
+ HAS_FAST_COPY_BACKWARD
jz 2f
leaq __memcpy_chk_ssse3_back(%rip), %rax
- testl $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip)
+ HAS_AVX_FAST_UNALIGNED_LOAD
jz 2f
leaq __memcpy_chk_avx_unaligned(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index 2eaacdf..f346696 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -27,17 +27,15 @@
#if defined SHARED && IS_IN (libc)
ENTRY(__mempcpy)
.type __mempcpy, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq __mempcpy_sse2(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq __mempcpy_sse2(%rip), %rax
+ HAS_SSSE3
jz 2f
leaq __mempcpy_ssse3(%rip), %rax
- testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
+ HAS_FAST_COPY_BACKWARD
jz 2f
leaq __mempcpy_ssse3_back(%rip), %rax
- testl $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip)
+ HAS_AVX_FAST_UNALIGNED_LOAD
jz 2f
leaq __mempcpy_avx_unaligned(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S
index 17b8470..a31c3b1 100644
--- a/sysdeps/x86_64/multiarch/mempcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S
@@ -29,17 +29,15 @@
.text
ENTRY(__mempcpy_chk)
.type __mempcpy_chk, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq __mempcpy_chk_sse2(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq __mempcpy_chk_sse2(%rip), %rax
+ HAS_SSSE3
jz 2f
leaq __mempcpy_chk_ssse3(%rip), %rax
- testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
+ HAS_FAST_COPY_BACKWARD
jz 2f
leaq __mempcpy_chk_ssse3_back(%rip), %rax
- testl $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip)
+ HAS_AVX_FAST_UNALIGNED_LOAD
jz 2f
leaq __mempcpy_chk_avx_unaligned(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S
index c5f1fb3..e542548 100644
--- a/sysdeps/x86_64/multiarch/memset.S
+++ b/sysdeps/x86_64/multiarch/memset.S
@@ -26,11 +26,9 @@
# if IS_IN (libc)
ENTRY(memset)
.type memset, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq __memset_sse2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq __memset_sse2(%rip), %rax
+ HAS_AVX2
jz 2f
leaq __memset_avx2(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset_chk.S
index 64fed31..63bcc89 100644
--- a/sysdeps/x86_64/multiarch/memset_chk.S
+++ b/sysdeps/x86_64/multiarch/memset_chk.S
@@ -25,11 +25,9 @@
# if defined SHARED && defined HAVE_AVX2_SUPPORT
ENTRY(__memset_chk)
.type __memset_chk, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq __memset_chk_sse2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq __memset_chk_sse2(%rip), %rax
+ HAS_AVX2
jz 2f
leaq __memset_chk_avx2(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/strcat.S b/sysdeps/x86_64/multiarch/strcat.S
index 44993fa..986b13f 100644
--- a/sysdeps/x86_64/multiarch/strcat.S
+++ b/sysdeps/x86_64/multiarch/strcat.S
@@ -47,14 +47,12 @@
.text
ENTRY(STRCAT)
.type STRCAT, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq STRCAT_SSE2_UNALIGNED(%rip), %rax
- testl $bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq STRCAT_SSE2_UNALIGNED(%rip), %rax
+ HAS_FAST_UNALIGNED_LOAD
jnz 2f
leaq STRCAT_SSE2(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ HAS_SSSE3
jz 2f
leaq STRCAT_SSSE3(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S
index af55fac..373fb87 100644
--- a/sysdeps/x86_64/multiarch/strchr.S
+++ b/sysdeps/x86_64/multiarch/strchr.S
@@ -25,11 +25,9 @@
.text
ENTRY(strchr)
.type strchr, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq __strchr_sse2(%rip), %rax
-2: testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq __strchr_sse2(%rip), %rax
+2: HAS_SLOW_BSF
jz 3f
leaq __strchr_sse2_no_bsf(%rip), %rax
3: ret
diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S
index f50f26c..b219319 100644
--- a/sysdeps/x86_64/multiarch/strcmp.S
+++ b/sysdeps/x86_64/multiarch/strcmp.S
@@ -84,24 +84,20 @@
.text
ENTRY(STRCMP)
.type STRCMP, @gnu_indirect_function
- /* Manually inlined call to __get_cpu_features. */
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1:
+ LOAD_RTLD_GLOBAL_RO_RDX
#ifdef USE_AS_STRCMP
leaq __strcmp_sse2_unaligned(%rip), %rax
- testl $bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip)
+ HAS_FAST_UNALIGNED_LOAD
jnz 3f
#else
- testl $bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip)
+ HAS_SLOW_SSE4_2
jnz 2f
leaq STRCMP_SSE42(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ HAS_SSE4_2
jnz 3f
#endif
2: leaq STRCMP_SSSE3(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ HAS_SSSE3
jnz 3f
leaq STRCMP_SSE2(%rip), %rax
3: ret
@@ -110,23 +106,19 @@ END(STRCMP)
# ifdef USE_AS_STRCASECMP_L
ENTRY(__strcasecmp)
.type __strcasecmp, @gnu_indirect_function
- /* Manually inlined call to __get_cpu_features. */
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1:
+ LOAD_RTLD_GLOBAL_RO_RDX
# ifdef HAVE_AVX_SUPPORT
leaq __strcasecmp_avx(%rip), %rax
- testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
+ HAS_AVX
jnz 3f
# endif
- testl $bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip)
+ HAS_SLOW_SSE4_2
jnz 2f
leaq __strcasecmp_sse42(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ HAS_SSE4_2
jnz 3f
2: leaq __strcasecmp_ssse3(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ HAS_SSSE3
jnz 3f
leaq __strcasecmp_sse2(%rip), %rax
3: ret
@@ -136,23 +128,19 @@ weak_alias (__strcasecmp, strcasecmp)
# ifdef USE_AS_STRNCASECMP_L
ENTRY(__strncasecmp)
.type __strncasecmp, @gnu_indirect_function
- /* Manually inlined call to __get_cpu_features. */
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1:
+ LOAD_RTLD_GLOBAL_RO_RDX
# ifdef HAVE_AVX_SUPPORT
leaq __strncasecmp_avx(%rip), %rax
- testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
+ HAS_AVX
jnz 3f
# endif
- testl $bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip)
+ HAS_SLOW_SSE4_2
jnz 2f
leaq __strncasecmp_sse42(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ HAS_SSE4_2
jnz 3f
2: leaq __strncasecmp_ssse3(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ HAS_SSSE3
jnz 3f
leaq __strncasecmp_sse2(%rip), %rax
3: ret
diff --git a/sysdeps/x86_64/multiarch/strcpy.S b/sysdeps/x86_64/multiarch/strcpy.S
index 9464ee8..5c040ae 100644
--- a/sysdeps/x86_64/multiarch/strcpy.S
+++ b/sysdeps/x86_64/multiarch/strcpy.S
@@ -61,14 +61,12 @@
.text
ENTRY(STRCPY)
.type STRCPY, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq STRCPY_SSE2_UNALIGNED(%rip), %rax
- testl $bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq STRCPY_SSE2_UNALIGNED(%rip), %rax
+ HAS_FAST_UNALIGNED_LOAD
jnz 2f
leaq STRCPY_SSE2(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ HAS_SSSE3
jz 2f
leaq STRCPY_SSSE3(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/strcspn.S b/sysdeps/x86_64/multiarch/strcspn.S
index 95e882c..340cab6 100644
--- a/sysdeps/x86_64/multiarch/strcspn.S
+++ b/sysdeps/x86_64/multiarch/strcspn.S
@@ -45,11 +45,9 @@
.text
ENTRY(STRCSPN)
.type STRCSPN, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq STRCSPN_SSE2(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq STRCSPN_SSE2(%rip), %rax
+ HAS_SSE4_2
jz 2f
leaq STRCSPN_SSE42(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/strspn.S b/sysdeps/x86_64/multiarch/strspn.S
index b734c17..c0afcf3 100644
--- a/sysdeps/x86_64/multiarch/strspn.S
+++ b/sysdeps/x86_64/multiarch/strspn.S
@@ -30,11 +30,9 @@
.text
ENTRY(strspn)
.type strspn, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq __strspn_sse2(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq __strspn_sse2(%rip), %rax
+ HAS_SSE4_2
jz 2f
leaq __strspn_sse42(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/wcscpy.S b/sysdeps/x86_64/multiarch/wcscpy.S
index ff2f5a7..40c1fc4 100644
--- a/sysdeps/x86_64/multiarch/wcscpy.S
+++ b/sysdeps/x86_64/multiarch/wcscpy.S
@@ -27,11 +27,8 @@
.text
ENTRY(wcscpy)
.type wcscpy, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-
-1: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ HAS_SSSE3
jnz 2f
leaq __wcscpy_sse2(%rip), %rax
ret
diff --git a/sysdeps/x86_64/multiarch/wmemcmp.S b/sysdeps/x86_64/multiarch/wmemcmp.S
index 109e245..c6b73aa 100644
--- a/sysdeps/x86_64/multiarch/wmemcmp.S
+++ b/sysdeps/x86_64/multiarch/wmemcmp.S
@@ -26,16 +26,13 @@
.text
ENTRY(wmemcmp)
.type wmemcmp, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-
-1: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ HAS_SSSE3
jnz 2f
leaq __wmemcmp_sse2(%rip), %rax
ret
-2: testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+2: HAS_SSE4_1
jz 3f
leaq __wmemcmp_sse4_1(%rip), %rax
ret
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=ff889a9c8c894dd91459b98cabbc483ee831fc7a
commit ff889a9c8c894dd91459b98cabbc483ee831fc7a
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Fri Jul 31 07:30:04 2015 -0700
_dl_x86_cpu_features to rtld_global for x86
diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
index 04f9247..4a28eb3 100644
--- a/sysdeps/i386/dl-machine.h
+++ b/sysdeps/i386/dl-machine.h
@@ -25,6 +25,7 @@
#include <sysdep.h>
#include <tls.h>
#include <dl-tlsdesc.h>
+#include <cpu-features.c>
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int __attribute__ ((unused))
@@ -235,6 +236,8 @@ dl_platform_init (void)
if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
/* Avoid an empty string which would disturb us. */
GLRO(dl_platform) = NULL;
+
+ init_cpu_features (&GLRO(dl_x86_cpu_features));
}
static inline Elf32_Addr
diff --git a/sysdeps/i386/dl-procinfo.c b/sysdeps/i386/dl-procinfo.c
index b673b3c..e95f335 100644
--- a/sysdeps/i386/dl-procinfo.c
+++ b/sysdeps/i386/dl-procinfo.c
@@ -43,6 +43,22 @@
# define PROCINFO_CLASS
#endif
+#if !IS_IN (ldconfig)
+# if !defined PROCINFO_DECL && defined SHARED
+ ._dl_x86_cpu_features
+# else
+PROCINFO_CLASS struct cpu_features _dl_x86_cpu_features
+# endif
+# ifndef PROCINFO_DECL
+= { }
+# endif
+# if !defined SHARED || defined PROCINFO_DECL
+;
+# else
+,
+# endif
+#endif
+
#if !defined PROCINFO_DECL && defined SHARED
._dl_x86_cap_flags
#else
diff --git a/sysdeps/i386/i686/cacheinfo.c b/sysdeps/i386/i686/cacheinfo.c
index 0f869df..0b50c6d 100644
--- a/sysdeps/i386/i686/cacheinfo.c
+++ b/sysdeps/i386/i686/cacheinfo.c
@@ -1,4 +1,3 @@
#define DISABLE_PREFETCHW
-#define DISABLE_PREFERRED_MEMORY_INSTRUCTION
#include <sysdeps/x86_64/cacheinfo.c>
diff --git a/sysdeps/i386/ldsodefs.h b/sysdeps/i386/ldsodefs.h
index d80cf01..dae2d04 100644
--- a/sysdeps/i386/ldsodefs.h
+++ b/sysdeps/i386/ldsodefs.h
@@ -20,6 +20,7 @@
#define _I386_LDSODEFS_H 1
#include <elf.h>
+#include <cpu-features.h>
struct La_i86_regs;
struct La_i86_retval;
diff --git a/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c b/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c
index 8ac351e..a3c0c19 100644
--- a/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c
+++ b/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c
@@ -1,5 +1,5 @@
#if IS_IN (ldconfig)
# include <sysdeps/i386/dl-procinfo.c>
#else
-# include <sysdeps/generic/dl-procinfo.c>
+# include <sysdeps/x86_64/dl-procinfo.c>
#endif
diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
index 19f5eca..61dfff3 100644
--- a/sysdeps/x86/Makefile
+++ b/sysdeps/x86/Makefile
@@ -8,3 +8,7 @@ $(objpfx)tst-ld-sse-use.out: ../sysdeps/x86/tst-ld-sse-use.sh $(objpfx)ld.so
$(BASH) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@; \
$(evaluate-test)
endif
+
+ifeq ($(subdir),csu)
+gen-as-const-headers += cpu-features-offsets.sym rtld-global-offsets.sym
+endif
diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym
new file mode 100644
index 0000000..a9d53d1
--- /dev/null
+++ b/sysdeps/x86/cpu-features-offsets.sym
@@ -0,0 +1,7 @@
+#define SHARED 1
+
+#include <ldsodefs.h>
+
+#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem)
+
+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features)
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86/cpu-features.c
similarity index 65%
copy from sysdeps/x86_64/multiarch/init-arch.c
copy to sysdeps/x86/cpu-features.c
index aaad5fa..cbdf4af 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86/cpu-features.c
@@ -1,7 +1,5 @@
-/* Initialize CPU feature data.
+/* Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Copyright (C) 2008-2015 Free Software Foundation, Inc.
- Contributed by Ulrich Drepper <drepper@redhat.com>.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -17,48 +15,40 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <atomic.h>
#include <cpuid.h>
-#include "init-arch.h"
+#include <cpu-features.h>
-
-struct cpu_features __cpu_features attribute_hidden;
-
-
-static void
-get_common_indeces (unsigned int *family, unsigned int *model)
+static inline void
+get_common_indeces (struct cpu_features *cpu_features,
+ unsigned int *family, unsigned int *model)
{
- __cpuid (1, __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx);
-
- unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
+ unsigned int eax;
+ __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx);
+ GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].eax = eax;
*family = (eax >> 8) & 0x0f;
*model = (eax >> 4) & 0x0f;
}
-
-void
-__init_cpu_features (void)
+static inline void
+init_cpu_features (struct cpu_features *cpu_features)
{
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
+ unsigned int ebx, ecx, edx;
unsigned int family = 0;
unsigned int model = 0;
enum cpu_features_kind kind;
- __cpuid (0, __cpu_features.max_cpuid, ebx, ecx, edx);
+ __cpuid (0, cpu_features->max_cpuid, ebx, ecx, edx);
/* This spells out "GenuineIntel". */
if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
{
kind = arch_kind_intel;
- get_common_indeces (&family, &model);
+ get_common_indeces (cpu_features, &family, &model);
- unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
+ unsigned int eax = cpu_features->cpuid[COMMON_CPUID_INDEX_1].eax;
unsigned int extended_family = (eax >> 20) & 0xff;
unsigned int extended_model = (eax >> 12) & 0xf0;
if (family == 0x0f)
@@ -68,14 +58,14 @@ __init_cpu_features (void)
}
else if (family == 0x06)
{
- ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
+ ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx;
model += extended_model;
switch (model)
{
case 0x1c:
case 0x26:
/* BSF is slow on Atom. */
- __cpu_features.feature[index_Slow_BSF] |= bit_Slow_BSF;
+ cpu_features->feature[index_Slow_BSF] |= bit_Slow_BSF;
break;
case 0x37:
@@ -91,7 +81,7 @@ __init_cpu_features (void)
#if index_Fast_Unaligned_Load != index_Slow_SSE4_2
# error index_Fast_Unaligned_Load != index_Slow_SSE4_2
#endif
- __cpu_features.feature[index_Fast_Unaligned_Load]
+ cpu_features->feature[index_Fast_Unaligned_Load]
|= (bit_Fast_Unaligned_Load
| bit_Prefer_PMINUB_for_stringop
| bit_Slow_SSE4_2);
@@ -121,7 +111,7 @@ __init_cpu_features (void)
#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
#endif
- __cpu_features.feature[index_Fast_Rep_String]
+ cpu_features->feature[index_Fast_Rep_String]
|= (bit_Fast_Rep_String
| bit_Fast_Copy_Backward
| bit_Fast_Unaligned_Load
@@ -135,28 +125,28 @@ __init_cpu_features (void)
{
kind = arch_kind_amd;
- get_common_indeces (&family, &model);
+ get_common_indeces (cpu_features, &family, &model);
- ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
+ ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx;
unsigned int eax;
__cpuid (0x80000000, eax, ebx, ecx, edx);
if (eax >= 0x80000001)
__cpuid (0x80000001,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].eax,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ebx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].edx);
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].eax,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx);
}
else
kind = arch_kind_other;
- if (__cpu_features.max_cpuid >= 7)
+ if (cpu_features->max_cpuid >= 7)
__cpuid_count (7, 0,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_7].eax,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx);
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
/* Can we call xgetbv? */
if (CPUID_OSXSAVE)
@@ -170,14 +160,14 @@ __init_cpu_features (void)
{
/* Determine if AVX is usable. */
if (CPUID_AVX)
- __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable;
+ cpu_features->feature[index_AVX_Usable] |= bit_AVX_Usable;
#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
#endif
/* Determine if AVX2 is usable. Unaligned load with 256-bit
AVX registers are faster on processors with AVX2. */
if (CPUID_AVX2)
- __cpu_features.feature[index_AVX2_Usable]
+ cpu_features->feature[index_AVX2_Usable]
|= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load;
/* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
ZMM16-ZMM31 state are enabled. */
@@ -188,36 +178,24 @@ __init_cpu_features (void)
/* Determine if AVX512F is usable. */
if (CPUID_AVX512F)
{
- __cpu_features.feature[index_AVX512F_Usable]
+ cpu_features->feature[index_AVX512F_Usable]
|= bit_AVX512F_Usable;
/* Determine if AVX512DQ is usable. */
if (CPUID_AVX512DQ)
- __cpu_features.feature[index_AVX512DQ_Usable]
+ cpu_features->feature[index_AVX512DQ_Usable]
|= bit_AVX512DQ_Usable;
}
}
/* Determine if FMA is usable. */
if (CPUID_FMA)
- __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable;
+ cpu_features->feature[index_FMA_Usable] |= bit_FMA_Usable;
/* Determine if FMA4 is usable. */
if (CPUID_FMA4)
- __cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable;
+ cpu_features->feature[index_FMA4_Usable] |= bit_FMA4_Usable;
}
}
- __cpu_features.family = family;
- __cpu_features.model = model;
- atomic_write_barrier ();
- __cpu_features.kind = kind;
-}
-
-#undef __get_cpu_features
-
-const struct cpu_features *
-__get_cpu_features (void)
-{
- if (__cpu_features.kind == arch_kind_unknown)
- __init_cpu_features ();
-
- return &__cpu_features;
+ cpu_features->family = family;
+ cpu_features->model = model;
+ cpu_features->kind = kind;
}
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86/cpu-features.h
similarity index 66%
copy from sysdeps/x86_64/multiarch/init-arch.h
copy to sysdeps/x86/cpu-features.h
index cfc6e70..c8ff30e 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86/cpu-features.h
@@ -15,6 +15,9 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#ifndef cpu_features_h
+#define cpu_features_h
+
#define bit_Fast_Rep_String (1 << 0)
#define bit_Fast_Copy_Backward (1 << 1)
#define bit_Slow_BSF (1 << 2)
@@ -56,14 +59,15 @@
#define bit_ZMM16_31_state (1 << 7)
/* The integer bit array index for the first set of internal feature bits. */
-# define FEATURE_INDEX_1 0
+#define FEATURE_INDEX_1 0
/* The current maximum size of the feature integer bit array. */
-# define FEATURE_INDEX_MAX 1
+#define FEATURE_INDEX_MAX 1
#ifdef __ASSEMBLER__
# include <ifunc-defines.h>
+# include <rtld-global-offsets.h>
# define index_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
# define index_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
@@ -86,9 +90,62 @@
# define index_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE
# define index_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE
-#else /* __ASSEMBLER__ */
+/* HAS_* evaluates to true if we may use the feature at runtime. */
+# ifdef __x86_64__
+# ifdef SHARED
+# if IS_IN (rtld)
+# define LOAD_RTLD_GLOBAL_RO_RDX
+# define HAS_FEATURE(offset, name) \
+ testl $(bit_##name), _rtld_local_ro+offset+(index_##name)(%rip)
+# else
+# define LOAD_RTLD_GLOBAL_RO_RDX \
+ mov _rtld_global_ro@GOTPCREL(%rip), %RDX_LP
+# define HAS_FEATURE(offset, name) \
+ testl $(bit_##name), \
+ RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%rdx)
+# endif
+# else /* SHARED */
+# define LOAD_RTLD_GLOBAL_RO_RDX
+# define HAS_FEATURE(offset, name) \
+ testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name)(%rip)
+# endif /* !SHARED */
+# else /* __x86_64__ */
+# ifdef SHARED
+# define LOAD_FUNC_GOT_EAX(func) \
+ leal func@GOTOFF(%edx), %eax
+# if IS_IN (rtld)
+# define LOAD_GOT_AND_RTLD_GLOBAL_RO \
+ LOAD_PIC_REG(dx)
+# define HAS_FEATURE(offset, name) \
+ testl $(bit_##name), offset+(index_##name)+_rtld_local_ro@GOTOFF(%edx)
+# else
+# define LOAD_GOT_AND_RTLD_GLOBAL_RO \
+ LOAD_PIC_REG(dx); \
+ mov _rtld_global_ro@GOT(%edx), %ecx
+# define HAS_FEATURE(offset, name) \
+ testl $(bit_##name), \
+ RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%ecx)
+# endif
+# else /* SHARED */
+# define LOAD_FUNC_GOT_EAX(func) \
+ leal func, %eax
+# define LOAD_GOT_AND_RTLD_GLOBAL_RO
+# define HAS_FEATURE(offset, name) \
+ testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name)
+# endif /* !SHARED */
+# endif /* !__x86_64__ */
-# include <sys/param.h>
+# define HAS_CPU_FEATURE(name) HAS_FEATURE (CPUID_OFFSET, name)
+# define HAS_ARCH_FEATURE(name) HAS_FEATURE (FEATURE_OFFSET, name)
+
+# define HAS_SSE2 HAS_CPU_FEATURE (SSE2)
+# define HAS_POPCOUNT HAS_CPU_FEATURE (POPCOUNT)
+# define HAS_SSSE3 HAS_CPU_FEATURE (SSSE3)
+# define HAS_SSE4_1 HAS_CPU_FEATURE (SSE4_1)
+# define HAS_SSE4_2 HAS_CPU_FEATURE (SSE4_2)
+# define HAS_RTM HAS_CPU_FEATURE (RTM)
+
+#else /* __ASSEMBLER__ */
enum
{
@@ -99,7 +156,7 @@ enum
COMMON_CPUID_INDEX_MAX
};
-extern struct cpu_features
+struct cpu_features
{
enum cpu_features_kind
{
@@ -119,22 +176,18 @@ extern struct cpu_features
unsigned int family;
unsigned int model;
unsigned int feature[FEATURE_INDEX_MAX];
-} __cpu_features attribute_hidden;
+};
+/* Unused for x86. */
+# define INIT_ARCH()
-extern void __init_cpu_features (void) attribute_hidden;
-# define INIT_ARCH() \
- do \
- if (__cpu_features.kind == arch_kind_unknown) \
- __init_cpu_features (); \
- while (0)
-
-/* Used from outside libc.so to get access to the CPU features structure. */
+/* Used from outside of glibc to get access to the CPU features
+ structure. */
extern const struct cpu_features *__get_cpu_features (void)
__attribute__ ((const));
-# if IS_IN (libc)
-# define __get_cpu_features() (&__cpu_features)
+# if defined (_LIBC) && !IS_IN (nonlib)
+# define __get_cpu_features() (&GLRO(dl_x86_cpu_features))
# endif
# define HAS_CPU_FEATURE(idx, reg, bit) \
@@ -142,12 +195,8 @@ extern const struct cpu_features *__get_cpu_features (void)
/* Following are the feature tests used throughout libc. */
-/* CPUID_* evaluates to true if the feature flag is enabled.
- We always use &__cpu_features because the HAS_CPUID_* macros
- are called only within __init_cpu_features, where we can't
- call __get_cpu_features without infinite recursion. */
# define HAS_CPUID_FLAG(idx, reg, bit) \
- (((&__cpu_features)->cpuid[idx].reg & (bit)) != 0)
+ ((__get_cpu_features ()->cpuid[idx].reg & (bit)) != 0)
# define CPUID_OSXSAVE \
HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_OSXSAVE)
@@ -191,16 +240,19 @@ extern const struct cpu_features *__get_cpu_features (void)
# define HAS_ARCH_FEATURE(name) \
((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
-# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String)
-# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward)
-# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF)
-# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-# define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable)
-# define HAS_AVX2 HAS_ARCH_FEATURE (AVX2_Usable)
-# define HAS_AVX512F HAS_ARCH_FEATURE (AVX512F_Usable)
-# define HAS_AVX512DQ HAS_ARCH_FEATURE (AVX512DQ_Usable)
-# define HAS_FMA HAS_ARCH_FEATURE (FMA_Usable)
-# define HAS_FMA4 HAS_ARCH_FEATURE (FMA4_Usable)
-# define HAS_AVX_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-
#endif /* __ASSEMBLER__ */
+
+#define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String)
+#define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward)
+#define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF)
+#define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+#define HAS_SLOW_SSE4_2 HAS_ARCH_FEATURE (Slow_SSE4_2)
+#define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable)
+#define HAS_AVX2 HAS_ARCH_FEATURE (AVX2_Usable)
+#define HAS_AVX512F HAS_ARCH_FEATURE (AVX512F_Usable)
+#define HAS_AVX512DQ HAS_ARCH_FEATURE (AVX512DQ_Usable)
+#define HAS_FMA HAS_ARCH_FEATURE (FMA_Usable)
+#define HAS_FMA4 HAS_ARCH_FEATURE (FMA4_Usable)
+#define HAS_AVX_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
+
+#endif /* cpu_features_h */
diff --git a/sysdeps/i386/ldsodefs.h b/sysdeps/x86/libc-start.c
similarity index 50%
copy from sysdeps/i386/ldsodefs.h
copy to sysdeps/x86/libc-start.c
index d80cf01..9f0c045 100644
--- a/sysdeps/i386/ldsodefs.h
+++ b/sysdeps/x86/libc-start.c
@@ -1,5 +1,4 @@
-/* Run-time dynamic linker data structures for loaded ELF shared objects.
- Copyright (C) 1995-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,25 +15,27 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#ifndef _I386_LDSODEFS_H
-#define _I386_LDSODEFS_H 1
-
-#include <elf.h>
-
-struct La_i86_regs;
-struct La_i86_retval;
-
-#define ARCH_PLTENTER_MEMBERS \
- Elf32_Addr (*i86_gnu_pltenter) (Elf32_Sym *, unsigned int, uintptr_t *, \
- uintptr_t *, struct La_i86_regs *, \
- unsigned int *, const char *name, \
- long int *framesizep)
-
-#define ARCH_PLTEXIT_MEMBERS \
- unsigned int (*i86_gnu_pltexit) (Elf32_Sym *, unsigned int, uintptr_t *, \
- uintptr_t *, const struct La_i86_regs *, \
- struct La_i86_retval *, const char *)
-
-#include_next <ldsodefs.h>
-
+#ifdef SHARED
+# include <csu/libc-start.c>
+# else
+/* The main work is done in the generic function. */
+# define LIBC_START_DISABLE_INLINE
+# define LIBC_START_MAIN generic_start_main
+# include <csu/libc-start.c>
+# include <cpu-features.h>
+# include <cpu-features.c>
+
+extern struct cpu_features _dl_x86_cpu_features;
+
+int
+__libc_start_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
+ int argc, char **argv,
+ __typeof (main) init,
+ void (*fini) (void),
+ void (*rtld_fini) (void), void *stack_end)
+{
+ init_cpu_features (&_dl_x86_cpu_features);
+ return generic_start_main (main, argc, argv, init, fini, rtld_fini,
+ stack_end);
+}
#endif
diff --git a/sysdeps/x86/rtld-global-offsets.sym b/sysdeps/x86/rtld-global-offsets.sym
new file mode 100644
index 0000000..a9d53d1
--- /dev/null
+++ b/sysdeps/x86/rtld-global-offsets.sym
@@ -0,0 +1,7 @@
+#define SHARED 1
+
+#include <ldsodefs.h>
+
+#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem)
+
+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features)
diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c
index b99fb9a..0ff5309 100644
--- a/sysdeps/x86_64/cacheinfo.c
+++ b/sysdeps/x86_64/cacheinfo.c
@@ -21,40 +21,11 @@
#include <stdlib.h>
#include <unistd.h>
#include <cpuid.h>
+#include "multiarch/init-arch.h"
-#ifndef __cpuid_count
-/* FIXME: Provide __cpuid_count if it isn't defined. Copied from gcc
- 4.4.0. Remove this if gcc 4.4 is the minimum requirement. */
-# if defined(__i386__) && defined(__PIC__)
-/* %ebx may be the PIC register. */
-# define __cpuid_count(level, count, a, b, c, d) \
- __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
- "cpuid\n\t" \
- "xchg{l}\t{%%}ebx, %1\n\t" \
- : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
- : "0" (level), "2" (count))
-# else
-# define __cpuid_count(level, count, a, b, c, d) \
- __asm__ ("cpuid\n\t" \
- : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
- : "0" (level), "2" (count))
-# endif
-#endif
-
-#ifdef USE_MULTIARCH
-# include "multiarch/init-arch.h"
-
-# define is_intel __cpu_features.kind == arch_kind_intel
-# define is_amd __cpu_features.kind == arch_kind_amd
-# define max_cpuid __cpu_features.max_cpuid
-#else
- /* This spells out "GenuineIntel". */
-# define is_intel \
- ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
- /* This spells out "AuthenticAMD". */
-# define is_amd \
- ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
-#endif
+#define is_intel GLRO(dl_x86_cpu_features).kind == arch_kind_intel
+#define is_amd GLRO(dl_x86_cpu_features).kind == arch_kind_amd
+#define max_cpuid GLRO(dl_x86_cpu_features).max_cpuid
static const struct intel_02_cache_info
{
@@ -235,21 +206,8 @@ intel_check_word (int name, unsigned int value, bool *has_level_2,
/* Intel reused this value. For family 15, model 6 it
specifies the 3rd level cache. Otherwise the 2nd
level cache. */
- unsigned int family;
- unsigned int model;
-#ifdef USE_MULTIARCH
- family = __cpu_features.family;
- model = __cpu_features.model;
-#else
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- __cpuid (1, eax, ebx, ecx, edx);
-
- family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
- model = (((eax >>16) & 0xf) << 4) + ((eax >> 4) & 0xf);
-#endif
+ unsigned int family = GLRO(dl_x86_cpu_features).family;
+ unsigned int model = GLRO(dl_x86_cpu_features).model;
if (family == 15 && model == 6)
{
@@ -476,18 +434,6 @@ long int
attribute_hidden
__cache_sysconf (int name)
{
-#ifdef USE_MULTIARCH
- if (__cpu_features.kind == arch_kind_unknown)
- __init_cpu_features ();
-#else
- /* Find out what brand of processor. */
- unsigned int max_cpuid;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- __cpuid (0, max_cpuid, ebx, ecx, edx);
-#endif
-
if (is_intel)
return handle_intel (name, max_cpuid);
@@ -523,18 +469,6 @@ long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
int __x86_prefetchw attribute_hidden;
#endif
-#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
-/* Instructions preferred for memory and string routines.
-
- 0: Regular instructions
- 1: MMX instructions
- 2: SSE2 instructions
- 3: SSSE3 instructions
-
- */
-int __x86_preferred_memory_instruction attribute_hidden;
-#endif
-
static void
__attribute__((constructor))
@@ -551,14 +485,6 @@ init_cacheinfo (void)
unsigned int level;
unsigned int threads = 0;
-#ifdef USE_MULTIARCH
- if (__cpu_features.kind == arch_kind_unknown)
- __init_cpu_features ();
-#else
- int max_cpuid;
- __cpuid (0, max_cpuid, ebx, ecx, edx);
-#endif
-
if (is_intel)
{
data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
@@ -574,34 +500,13 @@ init_cacheinfo (void)
shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
}
- unsigned int ebx_1;
-
-#ifdef USE_MULTIARCH
- eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
- ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
- ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
- edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
-#else
- __cpuid (1, eax, ebx_1, ecx, edx);
-#endif
-
- unsigned int family = (eax >> 8) & 0x0f;
- unsigned int model = (eax >> 4) & 0x0f;
- unsigned int extended_model = (eax >> 12) & 0xf0;
-
-#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
- /* Intel prefers SSSE3 instructions for memory/string routines
- if they are available. */
- if ((ecx & 0x200))
- __x86_preferred_memory_instruction = 3;
- else
- __x86_preferred_memory_instruction = 2;
-#endif
-
/* Figure out the number of logical threads that share the
highest cache level. */
if (max_cpuid >= 4)
{
+ unsigned int family = GLRO(dl_x86_cpu_features).family;
+ unsigned int model = GLRO(dl_x86_cpu_features).model;
+
int i = 0;
/* Query until desired cache level is enumerated. */
@@ -653,7 +558,6 @@ init_cacheinfo (void)
threads += 1;
if (threads > 2 && level == 2 && family == 6)
{
- model += extended_model;
switch (model)
{
case 0x57:
@@ -676,7 +580,9 @@ init_cacheinfo (void)
intel_bug_no_cache_info:
/* Assume that all logical threads share the highest cache level. */
- threads = (ebx_1 >> 16) & 0xff;
+ threads
+ = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
+ >> 16) & 0xff);
}
/* Cap usage of highest cache level to the number of supported
@@ -691,25 +597,6 @@ init_cacheinfo (void)
long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
-#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
-# ifdef USE_MULTIARCH
- eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
- ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
- ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
- edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
-# else
- __cpuid (1, eax, ebx, ecx, edx);
-# endif
-
- /* AMD prefers SSSE3 instructions for memory/string routines
- if they are avaiable, otherwise it prefers integer
- instructions. */
- if ((ecx & 0x200))
- __x86_preferred_memory_instruction = 3;
- else
- __x86_preferred_memory_instruction = 0;
-#endif
-
/* Get maximum extended function. */
__cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index cae6db3..d22359d 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -26,6 +26,7 @@
#include <sysdep.h>
#include <tls.h>
#include <dl-tlsdesc.h>
+#include <cpu-features.c>
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int __attribute__ ((unused))
@@ -205,6 +206,8 @@ dl_platform_init (void)
if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
/* Avoid an empty string which would disturb us. */
GLRO(dl_platform) = NULL;
+
+ init_cpu_features (&GLRO(dl_x86_cpu_features));
}
static inline ElfW(Addr)
diff --git a/sysdeps/i386/dl-procinfo.c b/sysdeps/x86_64/dl-procinfo.c
similarity index 61%
copy from sysdeps/i386/dl-procinfo.c
copy to sysdeps/x86_64/dl-procinfo.c
index b673b3c..851681a 100644
--- a/sysdeps/i386/dl-procinfo.c
+++ b/sysdeps/x86_64/dl-procinfo.c
@@ -1,7 +1,6 @@
-/* Data for i386 version of processor capability information.
- Copyright (C) 2001-2015 Free Software Foundation, Inc.
+/* Data for x86-64 version of processor capability information.
+ Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@redhat.com>, 2001.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -17,10 +16,7 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-/* This information must be kept in sync with the _DL_HWCAP_COUNT and
- _DL_PLATFORM_COUNT definitions in procinfo.h.
-
- If anything should be added here check whether the size of each string
+/* If anything should be added here check whether the size of each string
is still ok with the given array size.
All the #ifdefs in the definitions are quite irritating but
@@ -44,33 +40,12 @@
#endif
#if !defined PROCINFO_DECL && defined SHARED
- ._dl_x86_cap_flags
-#else
-PROCINFO_CLASS const char _dl_x86_cap_flags[32][8]
-#endif
-#ifndef PROCINFO_DECL
-= {
- "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
- "cx8", "apic", "10", "sep", "mtrr", "pge", "mca", "cmov",
- "pat", "pse36", "pn", "clflush", "20", "dts", "acpi", "mmx",
- "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe"
- }
-#endif
-#if !defined SHARED || defined PROCINFO_DECL
-;
-#else
-,
-#endif
-
-#if !defined PROCINFO_DECL && defined SHARED
- ._dl_x86_platforms
+ ._dl_x86_cpu_features
#else
-PROCINFO_CLASS const char _dl_x86_platforms[4][5]
+PROCINFO_CLASS struct cpu_features _dl_x86_cpu_features
#endif
#ifndef PROCINFO_DECL
-= {
- "i386", "i486", "i586", "i686"
- }
+= { }
#endif
#if !defined SHARED || defined PROCINFO_DECL
;
diff --git a/sysdeps/x86_64/ldsodefs.h b/sysdeps/x86_64/ldsodefs.h
index 84d36e8..e3f2da2 100644
--- a/sysdeps/x86_64/ldsodefs.h
+++ b/sysdeps/x86_64/ldsodefs.h
@@ -20,6 +20,7 @@
#define _X86_64_LDSODEFS_H 1
#include <elf.h>
+#include <cpu-features.h>
struct La_x86_64_regs;
struct La_x86_64_retval;
diff --git a/sysdeps/x86_64/multiarch/cacheinfo.c b/sysdeps/x86_64/multiarch/cacheinfo.c
deleted file mode 100644
index f87b8dc..0000000
--- a/sysdeps/x86_64/multiarch/cacheinfo.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define DISABLE_PREFERRED_MEMORY_INSTRUCTION
-#include "../cacheinfo.c"
diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym
index a410d88..7ac7acf 100644
--- a/sysdeps/x86_64/multiarch/ifunc-defines.sym
+++ b/sysdeps/x86_64/multiarch/ifunc-defines.sym
@@ -1,10 +1,9 @@
-#include "init-arch.h"
+#include "cpu-features.h"
#include <stddef.h>
--
CPU_FEATURES_SIZE sizeof (struct cpu_features)
-KIND_OFFSET offsetof (struct cpu_features, kind)
CPUID_OFFSET offsetof (struct cpu_features, cpuid)
CPUID_SIZE sizeof (struct cpuid_registers)
CPUID_EAX_OFFSET offsetof (struct cpuid_registers, eax)
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index aaad5fa..01a379c 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -17,207 +17,13 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <atomic.h>
-#include <cpuid.h>
-#include "init-arch.h"
-
-struct cpu_features __cpu_features attribute_hidden;
-
-
-static void
-get_common_indeces (unsigned int *family, unsigned int *model)
-{
- __cpuid (1, __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx);
-
- unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
- *family = (eax >> 8) & 0x0f;
- *model = (eax >> 4) & 0x0f;
-}
-
-
-void
-__init_cpu_features (void)
-{
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- unsigned int family = 0;
- unsigned int model = 0;
- enum cpu_features_kind kind;
-
- __cpuid (0, __cpu_features.max_cpuid, ebx, ecx, edx);
-
- /* This spells out "GenuineIntel". */
- if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
- {
- kind = arch_kind_intel;
-
- get_common_indeces (&family, &model);
-
- unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
- unsigned int extended_family = (eax >> 20) & 0xff;
- unsigned int extended_model = (eax >> 12) & 0xf0;
- if (family == 0x0f)
- {
- family += extended_family;
- model += extended_model;
- }
- else if (family == 0x06)
- {
- ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
- model += extended_model;
- switch (model)
- {
- case 0x1c:
- case 0x26:
- /* BSF is slow on Atom. */
- __cpu_features.feature[index_Slow_BSF] |= bit_Slow_BSF;
- break;
-
- case 0x37:
- case 0x4a:
- case 0x4d:
- case 0x5a:
- case 0x5d:
- /* Unaligned load versions are faster than SSSE3
- on Silvermont. */
-#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
-# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
-#endif
-#if index_Fast_Unaligned_Load != index_Slow_SSE4_2
-# error index_Fast_Unaligned_Load != index_Slow_SSE4_2
-#endif
- __cpu_features.feature[index_Fast_Unaligned_Load]
- |= (bit_Fast_Unaligned_Load
- | bit_Prefer_PMINUB_for_stringop
- | bit_Slow_SSE4_2);
- break;
-
- default:
- /* Unknown family 0x06 processors. Assuming this is one
- of Core i3/i5/i7 processors if AVX is available. */
- if ((ecx & bit_AVX) == 0)
- break;
-
- case 0x1a:
- case 0x1e:
- case 0x1f:
- case 0x25:
- case 0x2c:
- case 0x2e:
- case 0x2f:
- /* Rep string instructions, copy backward, unaligned loads
- and pminub are fast on Intel Core i3, i5 and i7. */
-#if index_Fast_Rep_String != index_Fast_Copy_Backward
-# error index_Fast_Rep_String != index_Fast_Copy_Backward
-#endif
-#if index_Fast_Rep_String != index_Fast_Unaligned_Load
-# error index_Fast_Rep_String != index_Fast_Unaligned_Load
-#endif
-#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
-# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
-#endif
- __cpu_features.feature[index_Fast_Rep_String]
- |= (bit_Fast_Rep_String
- | bit_Fast_Copy_Backward
- | bit_Fast_Unaligned_Load
- | bit_Prefer_PMINUB_for_stringop);
- break;
- }
- }
- }
- /* This spells out "AuthenticAMD". */
- else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
- {
- kind = arch_kind_amd;
-
- get_common_indeces (&family, &model);
-
- ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
-
- unsigned int eax;
- __cpuid (0x80000000, eax, ebx, ecx, edx);
- if (eax >= 0x80000001)
- __cpuid (0x80000001,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].eax,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ebx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].edx);
- }
- else
- kind = arch_kind_other;
-
- if (__cpu_features.max_cpuid >= 7)
- __cpuid_count (7, 0,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_7].eax,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx);
-
- /* Can we call xgetbv? */
- if (CPUID_OSXSAVE)
- {
- unsigned int xcrlow;
- unsigned int xcrhigh;
- asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
- /* Is YMM and XMM state usable? */
- if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
- (bit_YMM_state | bit_XMM_state))
- {
- /* Determine if AVX is usable. */
- if (CPUID_AVX)
- __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable;
-#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
-# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
-#endif
- /* Determine if AVX2 is usable. Unaligned load with 256-bit
- AVX registers are faster on processors with AVX2. */
- if (CPUID_AVX2)
- __cpu_features.feature[index_AVX2_Usable]
- |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load;
- /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
- ZMM16-ZMM31 state are enabled. */
- if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
- | bit_ZMM16_31_state)) ==
- (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
- {
- /* Determine if AVX512F is usable. */
- if (CPUID_AVX512F)
- {
- __cpu_features.feature[index_AVX512F_Usable]
- |= bit_AVX512F_Usable;
- /* Determine if AVX512DQ is usable. */
- if (CPUID_AVX512DQ)
- __cpu_features.feature[index_AVX512DQ_Usable]
- |= bit_AVX512DQ_Usable;
- }
- }
- /* Determine if FMA is usable. */
- if (CPUID_FMA)
- __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable;
- /* Determine if FMA4 is usable. */
- if (CPUID_FMA4)
- __cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable;
- }
- }
-
- __cpu_features.family = family;
- __cpu_features.model = model;
- atomic_write_barrier ();
- __cpu_features.kind = kind;
-}
+#include <ldsodefs.h>
#undef __get_cpu_features
const struct cpu_features *
__get_cpu_features (void)
{
- if (__cpu_features.kind == arch_kind_unknown)
- __init_cpu_features ();
-
- return &__cpu_features;
+ return &GLRO(dl_x86_cpu_features);
}
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index cfc6e70..2b9988e 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -15,192 +15,8 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#define bit_Fast_Rep_String (1 << 0)
-#define bit_Fast_Copy_Backward (1 << 1)
-#define bit_Slow_BSF (1 << 2)
-#define bit_Fast_Unaligned_Load (1 << 4)
-#define bit_Prefer_PMINUB_for_stringop (1 << 5)
-#define bit_AVX_Usable (1 << 6)
-#define bit_FMA_Usable (1 << 7)
-#define bit_FMA4_Usable (1 << 8)
-#define bit_Slow_SSE4_2 (1 << 9)
-#define bit_AVX2_Usable (1 << 10)
-#define bit_AVX_Fast_Unaligned_Load (1 << 11)
-#define bit_AVX512F_Usable (1 << 12)
-#define bit_AVX512DQ_Usable (1 << 13)
-
-/* CPUID Feature flags. */
-
-/* COMMON_CPUID_INDEX_1. */
-#define bit_SSE2 (1 << 26)
-#define bit_SSSE3 (1 << 9)
-#define bit_SSE4_1 (1 << 19)
-#define bit_SSE4_2 (1 << 20)
-#define bit_OSXSAVE (1 << 27)
-#define bit_AVX (1 << 28)
-#define bit_POPCOUNT (1 << 23)
-#define bit_FMA (1 << 12)
-#define bit_FMA4 (1 << 16)
-
-/* COMMON_CPUID_INDEX_7. */
-#define bit_RTM (1 << 11)
-#define bit_AVX2 (1 << 5)
-#define bit_AVX512F (1 << 16)
-#define bit_AVX512DQ (1 << 17)
-
-/* XCR0 Feature flags. */
-#define bit_XMM_state (1 << 1)
-#define bit_YMM_state (2 << 1)
-#define bit_Opmask_state (1 << 5)
-#define bit_ZMM0_15_state (1 << 6)
-#define bit_ZMM16_31_state (1 << 7)
-
-/* The integer bit array index for the first set of internal feature bits. */
-# define FEATURE_INDEX_1 0
-
-/* The current maximum size of the feature integer bit array. */
-# define FEATURE_INDEX_MAX 1
-
-#ifdef __ASSEMBLER__
-
-# include <ifunc-defines.h>
-
-# define index_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
-# define index_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
-# define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
-# define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
-# define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
-# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
-
-# define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE
-# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE
-# define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE
-# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
-# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
-# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE
-# define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE
-# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE
-# define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE
-# define index_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE
-# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
-# define index_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE
-# define index_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE
-
-#else /* __ASSEMBLER__ */
-
-# include <sys/param.h>
-
-enum
- {
- COMMON_CPUID_INDEX_1 = 0,
- COMMON_CPUID_INDEX_7,
- COMMON_CPUID_INDEX_80000001, /* for AMD */
- /* Keep the following line at the end. */
- COMMON_CPUID_INDEX_MAX
- };
-
-extern struct cpu_features
-{
- enum cpu_features_kind
- {
- arch_kind_unknown = 0,
- arch_kind_intel,
- arch_kind_amd,
- arch_kind_other
- } kind;
- int max_cpuid;
- struct cpuid_registers
- {
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- } cpuid[COMMON_CPUID_INDEX_MAX];
- unsigned int family;
- unsigned int model;
- unsigned int feature[FEATURE_INDEX_MAX];
-} __cpu_features attribute_hidden;
-
-
-extern void __init_cpu_features (void) attribute_hidden;
-# define INIT_ARCH() \
- do \
- if (__cpu_features.kind == arch_kind_unknown) \
- __init_cpu_features (); \
- while (0)
-
-/* Used from outside libc.so to get access to the CPU features structure. */
-extern const struct cpu_features *__get_cpu_features (void)
- __attribute__ ((const));
-
-# if IS_IN (libc)
-# define __get_cpu_features() (&__cpu_features)
-# endif
-
-# define HAS_CPU_FEATURE(idx, reg, bit) \
- ((__get_cpu_features ()->cpuid[idx].reg & (bit)) != 0)
-
-/* Following are the feature tests used throughout libc. */
-
-/* CPUID_* evaluates to true if the feature flag is enabled.
- We always use &__cpu_features because the HAS_CPUID_* macros
- are called only within __init_cpu_features, where we can't
- call __get_cpu_features without infinite recursion. */
-# define HAS_CPUID_FLAG(idx, reg, bit) \
- (((&__cpu_features)->cpuid[idx].reg & (bit)) != 0)
-
-# define CPUID_OSXSAVE \
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_OSXSAVE)
-# define CPUID_AVX \
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
-# define CPUID_FMA \
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
-# define CPUID_FMA4 \
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
-# define CPUID_RTM \
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
-# define CPUID_AVX2 \
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
-# define CPUID_AVX512F \
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX512F)
-# define CPUID_AVX512DQ \
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX512DQ)
-
-/* HAS_* evaluates to true if we may use the feature at runtime. */
-# define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
-# define HAS_POPCOUNT HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_POPCOUNT)
-# define HAS_SSSE3 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3)
-# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
-# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
-# define HAS_RTM HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
-
-# define index_Fast_Rep_String FEATURE_INDEX_1
-# define index_Fast_Copy_Backward FEATURE_INDEX_1
-# define index_Slow_BSF FEATURE_INDEX_1
-# define index_Fast_Unaligned_Load FEATURE_INDEX_1
-# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1
-# define index_AVX_Usable FEATURE_INDEX_1
-# define index_FMA_Usable FEATURE_INDEX_1
-# define index_FMA4_Usable FEATURE_INDEX_1
-# define index_Slow_SSE4_2 FEATURE_INDEX_1
-# define index_AVX2_Usable FEATURE_INDEX_1
-# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1
-# define index_AVX512F_Usable FEATURE_INDEX_1
-# define index_AVX512DQ_Usable FEATURE_INDEX_1
-
-# define HAS_ARCH_FEATURE(name) \
- ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
-
-# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String)
-# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward)
-# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF)
-# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-# define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable)
-# define HAS_AVX2 HAS_ARCH_FEATURE (AVX2_Usable)
-# define HAS_AVX512F HAS_ARCH_FEATURE (AVX512F_Usable)
-# define HAS_AVX512DQ HAS_ARCH_FEATURE (AVX512DQ_Usable)
-# define HAS_FMA HAS_ARCH_FEATURE (FMA_Usable)
-# define HAS_FMA4 HAS_ARCH_FEATURE (FMA4_Usable)
-# define HAS_AVX_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-
-#endif /* __ASSEMBLER__ */
+#ifdef __ASSEMBLER__
+# include <cpu-features.h>
+#else
+# include <ldsodefs.h>
+#endif
-----------------------------------------------------------------------
hooks/post-receive
--
GNU C Library master sources