This is the mail archive of the
glibc-bugs@sourceware.org
mailing list for the glibc project.
[Bug string/20195] New: FMA4 detection requires CPUID execution with register eax=0x80000001
- From: "amit.pawar at amd dot com" <sourceware-bugzilla at sourceware dot org>
- To: glibc-bugs at sourceware dot org
- Date: Fri, 03 Jun 2016 07:21:54 +0000
- Subject: [Bug string/20195] New: FMA4 detection requires CPUID execution with register eax=0x80000001
- Auto-submitted: auto-generated
https://sourceware.org/bugzilla/show_bug.cgi?id=20195
Bug ID: 20195
Summary: FMA4 detection requires CPUID execution with register
eax=0x80000001
Product: glibc
Version: 2.23
Status: UNCONFIRMED
Severity: normal
Priority: P2
Component: string
Assignee: unassigned at sourceware dot org
Reporter: amit.pawar at amd dot com
Target Milestone: ---
FMA4 detection is failing in GLIBC (from 2.23 release onwards) due to the fix
for defect [BZ #19214].
Cause:
To detect for FMA4 support, CPUID needs to be called by setting the value
0x80000001 in $eax register. FMA4 support only found on AMD cpu's so this
requires special handling and can't be detected through generic logic. Earlier
detection logic was executed after the CPUID execution but now detection logic
is executed before the CPUID execution.
GLIBC-2.23 release also fails to detect so I am filing this defect by selecting
the version 2.23 and if this is not OK then will select version 2.24 for this
defect.
The code diff is given below.
---------------------- DIFF ------------------------
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index c8f81ef..1787716 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -19,79 +19,23 @@
#include <cpuid.h>
#include <cpu-features.h>
-static void
+static inline void
get_common_indeces (struct cpu_features *cpu_features,
unsigned int *family, unsigned int *model,
unsigned int *extended_model)
{
- if (family)
+ unsigned int eax;
+ __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx);
+ GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].eax = eax;
+ *family = (eax >> 8) & 0x0f;
+ *model = (eax >> 4) & 0x0f;
+ *extended_model = (eax >> 12) & 0xf0;
+ if (*family == 0x0f)
{
- unsigned int eax;
- __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx);
- cpu_features->cpuid[COMMON_CPUID_INDEX_1].eax = eax;
- *family = (eax >> 8) & 0x0f;
- *model = (eax >> 4) & 0x0f;
- *extended_model = (eax >> 12) & 0xf0;
- if (*family == 0x0f)
- {
- *family += (eax >> 20) & 0xff;
- *model += *extended_model;
- }
- }
-
- if (cpu_features->max_cpuid >= 7)
- __cpuid_count (7, 0,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
-
- /* Can we call xgetbv? */
- if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
- {
- unsigned int xcrlow;
- unsigned int xcrhigh;
- asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
- /* Is YMM and XMM state usable? */
- if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
- (bit_YMM_state | bit_XMM_state))
- {
- /* Determine if AVX is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX))
- cpu_features->feature[index_arch_AVX_Usable]
- |= bit_arch_AVX_Usable;
- /* Determine if AVX2 is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
- cpu_features->feature[index_arch_AVX2_Usable]
- |= bit_arch_AVX2_Usable;
- /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
- ZMM16-ZMM31 state are enabled. */
- if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
- | bit_ZMM16_31_state)) ==
- (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
- {
- /* Determine if AVX512F is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
- {
- cpu_features->feature[index_arch_AVX512F_Usable]
- |= bit_arch_AVX512F_Usable;
- /* Determine if AVX512DQ is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512DQ))
- cpu_features->feature[index_arch_AVX512DQ_Usable]
- |= bit_arch_AVX512DQ_Usable;
- }
- }
- /* Determine if FMA is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, FMA))
- cpu_features->feature[index_arch_FMA_Usable]
- |= bit_arch_FMA_Usable;
- /* Determine if FMA4 is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, FMA4))
- cpu_features->feature[index_arch_FMA4_Usable]
- |= bit_arch_FMA4_Usable;
- }
+ *family += (eax >> 20) & 0xff;
+ *model += *extended_model;
}
}
@@ -227,19 +165,73 @@ init_cpu_features (struct cpu_features *cpu_features)
}
}
else
- {
- kind = arch_kind_other;
- get_common_indeces (cpu_features, NULL, NULL, NULL);
- }
+ kind = arch_kind_other;
/* Support i586 if CX8 is available. */
- if (CPU_FEATURES_CPU_P (cpu_features, CX8))
+ if (HAS_CPU_FEATURE (CX8))
cpu_features->feature[index_arch_I586] |= bit_arch_I586;
/* Support i686 if CMOV is available. */
- if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
+ if (HAS_CPU_FEATURE (CMOV))
cpu_features->feature[index_arch_I686] |= bit_arch_I686;
+ if (cpu_features->max_cpuid >= 7)
+ __cpuid_count (7, 0,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
+
+ /* Can we call xgetbv? */
+ if (HAS_CPU_FEATURE (OSXSAVE))
+ {
+ unsigned int xcrlow;
+ unsigned int xcrhigh;
+ asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
+ /* Is YMM and XMM state usable? */
+ if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
+ (bit_YMM_state | bit_XMM_state))
+ {
+ /* Determine if AVX is usable. */
+ if (HAS_CPU_FEATURE (AVX))
+ cpu_features->feature[index_arch_AVX_Usable]
+ |= bit_arch_AVX_Usable;
+#if index_arch_AVX2_Usable != index_arch_AVX_Fast_Unaligned_Load
+# error index_arch_AVX2_Usable != index_arch_AVX_Fast_Unaligned_Load
+#endif
+ /* Determine if AVX2 is usable. Unaligned load with 256-bit
+ AVX registers are faster on processors with AVX2. */
+ if (HAS_CPU_FEATURE (AVX2))
+ cpu_features->feature[index_arch_AVX2_Usable]
+ |= bit_arch_AVX2_Usable | bit_arch_AVX_Fast_Unaligned_Load;
+ /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
+ ZMM16-ZMM31 state are enabled. */
+ if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
+ | bit_ZMM16_31_state)) ==
+ (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
+ {
+ /* Determine if AVX512F is usable. */
+ if (HAS_CPU_FEATURE (AVX512F))
+ {
+ cpu_features->feature[index_arch_AVX512F_Usable]
+ |= bit_arch_AVX512F_Usable;
+ /* Determine if AVX512DQ is usable. */
+ if (HAS_CPU_FEATURE (AVX512DQ))
+ cpu_features->feature[index_arch_AVX512DQ_Usable]
+ |= bit_arch_AVX512DQ_Usable;
+ }
+ }
+ /* Determine if FMA is usable. */
+ if (HAS_CPU_FEATURE (FMA))
+ cpu_features->feature[index_arch_FMA_Usable]
+ |= bit_arch_FMA_Usable;
+ /* Determine if FMA4 is usable. */
+ if (HAS_CPU_FEATURE (FMA4))
+ cpu_features->feature[index_arch_FMA4_Usable]
+ |= bit_arch_FMA4_Usable;
+ }
+ }
+
#if !HAS_CPUID
no_cpuid:
#endif
--
You are receiving this mail because:
You are on the CC list for the bug.