This is the mail archive of the glibc-bugs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug string/20195] New: FMA4 detection requires CPUID execution with register eax=0x80000001


https://sourceware.org/bugzilla/show_bug.cgi?id=20195

            Bug ID: 20195
           Summary: FMA4 detection requires CPUID execution with register
                    eax=0x80000001
           Product: glibc
           Version: 2.23
            Status: UNCONFIRMED
          Severity: normal
          Priority: P2
         Component: string
          Assignee: unassigned at sourceware dot org
          Reporter: amit.pawar at amd dot com
  Target Milestone: ---

FMA4 detection is failing in GLIBC (from 2.23 release onwards) due to the fix
for defect [BZ #19214].

Cause:
To detect for FMA4 support, CPUID needs to be called by setting the value
0x80000001 in $eax register. FMA4 support only found on AMD cpu's so this
requires special handling and can't be detected through generic logic. Earlier
detection logic was executed after the CPUID execution but now detection logic
is executed before the CPUID execution.

GLIBC-2.23 release also fails to detect so I am filing this defect by selecting
the version 2.23 and if this is not OK then will select version 2.24 for this
defect.

The code diff is given below.

---------------------- DIFF ------------------------
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index c8f81ef..1787716 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -19,79 +19,23 @@
 #include <cpuid.h>
 #include <cpu-features.h>

-static void
+static inline void
 get_common_indeces (struct cpu_features *cpu_features,
                    unsigned int *family, unsigned int *model,
                    unsigned int *extended_model)
 {
-  if (family)
+  unsigned int eax;
+  __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx,
+          cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx,
+          cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx);
+  GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].eax = eax;
+  *family = (eax >> 8) & 0x0f;
+  *model = (eax >> 4) & 0x0f;
+  *extended_model = (eax >> 12) & 0xf0;
+  if (*family == 0x0f)
     {
-      unsigned int eax;
-      __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx,
-              cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx,
-              cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx);
-      cpu_features->cpuid[COMMON_CPUID_INDEX_1].eax = eax;
-      *family = (eax >> 8) & 0x0f;
-      *model = (eax >> 4) & 0x0f;
-      *extended_model = (eax >> 12) & 0xf0;
-      if (*family == 0x0f)
-       {
-         *family += (eax >> 20) & 0xff;
-         *model += *extended_model;
-       }
-    }
-
-  if (cpu_features->max_cpuid >= 7)
-    __cpuid_count (7, 0,
-                  cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
-                  cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
-                  cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
-                  cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
-
-  /* Can we call xgetbv?  */
-  if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
-    {
-      unsigned int xcrlow;
-      unsigned int xcrhigh;
-      asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
-      /* Is YMM and XMM state usable?  */
-      if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
-         (bit_YMM_state | bit_XMM_state))
-       {
-         /* Determine if AVX is usable.  */
-         if (CPU_FEATURES_CPU_P (cpu_features, AVX))
-           cpu_features->feature[index_arch_AVX_Usable]
-             |= bit_arch_AVX_Usable;
-         /* Determine if AVX2 is usable.  */
-         if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
-           cpu_features->feature[index_arch_AVX2_Usable]
-             |= bit_arch_AVX2_Usable;
-         /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
-            ZMM16-ZMM31 state are enabled.  */
-         if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
-                        | bit_ZMM16_31_state)) ==
-             (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
-           {
-             /* Determine if AVX512F is usable.  */
-             if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
-               {
-                 cpu_features->feature[index_arch_AVX512F_Usable]
-                   |= bit_arch_AVX512F_Usable;
-                 /* Determine if AVX512DQ is usable.  */
-                 if (CPU_FEATURES_CPU_P (cpu_features, AVX512DQ))
-                   cpu_features->feature[index_arch_AVX512DQ_Usable]
-                     |= bit_arch_AVX512DQ_Usable;
-               }
-           }
-         /* Determine if FMA is usable.  */
-         if (CPU_FEATURES_CPU_P (cpu_features, FMA))
-           cpu_features->feature[index_arch_FMA_Usable]
-             |= bit_arch_FMA_Usable;
-         /* Determine if FMA4 is usable.  */
-         if (CPU_FEATURES_CPU_P (cpu_features, FMA4))
-           cpu_features->feature[index_arch_FMA4_Usable]
-             |= bit_arch_FMA4_Usable;
-       }
+      *family += (eax >> 20) & 0xff;
+      *model += *extended_model;
     }
 }
@@ -227,19 +165,73 @@ init_cpu_features (struct cpu_features *cpu_features)
        }
     }
   else
-    {
-      kind = arch_kind_other;
-      get_common_indeces (cpu_features, NULL, NULL, NULL);
-    }
+    kind = arch_kind_other;

   /* Support i586 if CX8 is available.  */
-  if (CPU_FEATURES_CPU_P (cpu_features, CX8))
+  if (HAS_CPU_FEATURE (CX8))
     cpu_features->feature[index_arch_I586] |= bit_arch_I586;

   /* Support i686 if CMOV is available.  */
-  if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
+  if (HAS_CPU_FEATURE (CMOV))
     cpu_features->feature[index_arch_I686] |= bit_arch_I686;
+  if (cpu_features->max_cpuid >= 7)
+    __cpuid_count (7, 0,
+                  cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
+                  cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
+                  cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
+                  cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
+
+  /* Can we call xgetbv?  */
+  if (HAS_CPU_FEATURE (OSXSAVE))
+    {
+      unsigned int xcrlow;
+      unsigned int xcrhigh;
+      asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
+      /* Is YMM and XMM state usable?  */
+      if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
+         (bit_YMM_state | bit_XMM_state))
+       {
+         /* Determine if AVX is usable.  */
+         if (HAS_CPU_FEATURE (AVX))
+           cpu_features->feature[index_arch_AVX_Usable]
+             |= bit_arch_AVX_Usable;
+#if index_arch_AVX2_Usable != index_arch_AVX_Fast_Unaligned_Load
+# error index_arch_AVX2_Usable != index_arch_AVX_Fast_Unaligned_Load
+#endif
+         /* Determine if AVX2 is usable.  Unaligned load with 256-bit
+            AVX registers are faster on processors with AVX2.  */
+         if (HAS_CPU_FEATURE (AVX2))
+           cpu_features->feature[index_arch_AVX2_Usable]
+             |= bit_arch_AVX2_Usable | bit_arch_AVX_Fast_Unaligned_Load;
+         /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
+            ZMM16-ZMM31 state are enabled.  */
+         if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
+                        | bit_ZMM16_31_state)) ==
+             (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
+           {
+             /* Determine if AVX512F is usable.  */
+             if (HAS_CPU_FEATURE (AVX512F))
+               {
+                 cpu_features->feature[index_arch_AVX512F_Usable]
+                   |= bit_arch_AVX512F_Usable;
+                 /* Determine if AVX512DQ is usable.  */
+                 if (HAS_CPU_FEATURE (AVX512DQ))
+                   cpu_features->feature[index_arch_AVX512DQ_Usable]
+                     |= bit_arch_AVX512DQ_Usable;
+               }
+           }
+         /* Determine if FMA is usable.  */
+         if (HAS_CPU_FEATURE (FMA))
+           cpu_features->feature[index_arch_FMA_Usable]
+             |= bit_arch_FMA_Usable;
+         /* Determine if FMA4 is usable.  */
+         if (HAS_CPU_FEATURE (FMA4))
+           cpu_features->feature[index_arch_FMA4_Usable]
+             |= bit_arch_FMA4_Usable;
+       }
+    }
+
 #if !HAS_CPUID
 no_cpuid:
 #endif

-- 
You are receiving this mail because:
You are on the CC list for the bug.

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]