This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] Fix misdetected Slow_SSE4_2 cpu feature bit (bug 17501)
- From: Andreas Schwab <schwab at suse dot de>
- To: libc-alpha at sourceware dot org
- Date: Tue, 21 Oct 2014 12:41:32 +0200
- Subject: [PATCH] Fix misdetected Slow_SSE4_2 cpu feature bit (bug 17501)
- Authentication-results: sourceware.org; auth=none
Several places misdetect the Slow_SSE4_2 due to using the wrong offset
into the __cpu_features structure. This is only a performance bug,
since the actual check for SSE4_2 is correct.
Tested on x86_64-suse-linux and i686-suse-linux, on a system that has
SSE4_2 but not Slow_SSE4_2.
Andreas.
[BZ #17501]
* sysdeps/i386/i686/multiarch/strcasecmp.S (__strcasecmp): Fix
check for Slow_SSE4_2 feature bit.
* sysdeps/i386/i686/multiarch/strcmp.S (STRCMP): Likewise.
* sysdeps/i386/i686/multiarch/strncase.S (__strncasecmp): Likewise.
* sysdeps/x86_64/multiarch/strcmp.S (STRCMP, __strcascmp):
Likewise. Fix check for Fast_Unaligned_Load feature bit.
---
sysdeps/i386/i686/multiarch/strcasecmp.S | 4 ++--
sysdeps/i386/i686/multiarch/strcmp.S | 4 ++--
sysdeps/i386/i686/multiarch/strncase.S | 4 ++--
sysdeps/x86_64/multiarch/strcmp.S | 8 ++++----
4 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/sysdeps/i386/i686/multiarch/strcasecmp.S b/sysdeps/i386/i686/multiarch/strcasecmp.S
index 4f2de4f..57ccef5 100644
--- a/sysdeps/i386/i686/multiarch/strcasecmp.S
+++ b/sysdeps/i386/i686/multiarch/strcasecmp.S
@@ -37,7 +37,7 @@ ENTRY(__strcasecmp)
leal __strcasecmp_ssse3@GOTOFF(%ebx), %eax
testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
jz 2f
- testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
+ testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
jnz 2f
leal __strcasecmp_sse4_2@GOTOFF(%ebx), %eax
2: popl %ebx
@@ -58,7 +58,7 @@ ENTRY(__strcasecmp)
leal __strcasecmp_ssse3, %eax
testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
jz 2f
- testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features
+ testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features
jnz 2f
leal __strcasecmp_sse4_2, %eax
2: ret
diff --git a/sysdeps/i386/i686/multiarch/strcmp.S b/sysdeps/i386/i686/multiarch/strcmp.S
index 2ad6bf4..23cbd6f 100644
--- a/sysdeps/i386/i686/multiarch/strcmp.S
+++ b/sysdeps/i386/i686/multiarch/strcmp.S
@@ -68,7 +68,7 @@ ENTRY(STRCMP)
leal __STRCMP_SSSE3@GOTOFF(%ebx), %eax
testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
jz 2f
- testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
+ testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
jnz 2f
leal __STRCMP_SSE4_2@GOTOFF(%ebx), %eax
2: popl %ebx
@@ -89,7 +89,7 @@ ENTRY(STRCMP)
leal __STRCMP_SSSE3, %eax
testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
jz 2f
- testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features
+ testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features
jnz 2f
leal __STRCMP_SSE4_2, %eax
2: ret
diff --git a/sysdeps/i386/i686/multiarch/strncase.S b/sysdeps/i386/i686/multiarch/strncase.S
index 9b4cfa0..41644a5 100644
--- a/sysdeps/i386/i686/multiarch/strncase.S
+++ b/sysdeps/i386/i686/multiarch/strncase.S
@@ -37,7 +37,7 @@ ENTRY(__strncasecmp)
leal __strncasecmp_ssse3@GOTOFF(%ebx), %eax
testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
jz 2f
- testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
+ testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
jnz 2f
leal __strncasecmp_sse4_2@GOTOFF(%ebx), %eax
2: popl %ebx
@@ -58,7 +58,7 @@ ENTRY(__strncasecmp)
leal __strncasecmp_ssse3, %eax
testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
jz 2f
- testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features
+ testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features
jnz 2f
leal __strncasecmp_sse4_2, %eax
2: ret
diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S
index f3e0ca1..bd71714 100644
--- a/sysdeps/x86_64/multiarch/strcmp.S
+++ b/sysdeps/x86_64/multiarch/strcmp.S
@@ -91,10 +91,10 @@ ENTRY(STRCMP)
1:
#ifdef USE_AS_STRCMP
leaq __strcmp_sse2_unaligned(%rip), %rax
- testl $bit_Fast_Unaligned_Load, __cpu_features+CPUID_OFFSET+index_Fast_Unaligned_Load(%rip)
+ testl $bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip)
jnz 3f
#else
- testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+ testl $bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip)
jnz 2f
leaq STRCMP_SSE42(%rip), %rax
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
@@ -120,7 +120,7 @@ ENTRY(__strcasecmp)
testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
jnz 3f
# endif
- testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+ testl $bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip)
jnz 2f
leaq __strcasecmp_sse42(%rip), %rax
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
@@ -146,7 +146,7 @@ ENTRY(__strncasecmp)
testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
jnz 3f
# endif
- testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+ testl $bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip)
jnz 2f
leaq __strncasecmp_sse42(%rip), %rax
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
--
2.1.2
--
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."