This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] BZ #14059 - HAS_FMA4 check needs to also check for AVX


There are two things broken in checking for HAS_FMA4/AVX on x86-64 in
the multiarch code:

1. We should disable FAM4 if AVX is not available (due to disabled
   OSXSAVE)

2. commit 56f6f6a2403cfa7267cad722597113be35ecf70d reverted some changes
   from commit 08cf777f9e7f6d826658a99c7d77a359f73a45bf but forgot to
   revert the change for AVX. We really have to disable AVX if it's

The issue is easy reproduceable under Xen for the reporter.

Also, the two commits introduced YMM_Usable and removed its usage but
did not remove the code from the headers, I'm cleaning this up as well.

I'm appending a patch that I tested on Linux/x86-64 (without Xen).

Ok to commit?

Andreas

2012-05-08  Andreas Jaeger  <aj@suse.de>

	* sysdeps/x86_64/multiarch/init-arch.h (bit_YMM_Usable): Remove,
	it's unused.
	(index_YMM_Usable): Likewise.
	(index_YMM_Usable): Likewise.
	(HAS_YMM_USABLE): Likewise.

2012-05-08  Jim Westfall  <jwestfall@surrealistic.net>

	[BZ #14059]
	* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
	Disable FMA4 if AVX is disabled.
	(__init_cpu_features): Revert broken check for bit_YMM_USABLE.

diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 80527ec..dbb091f 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -146,15 +146,20 @@ __init_cpu_features (void)
   if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
     {
       /* Reset the AVX bit in case OSXSAVE is disabled.  */
-      if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
-	  && ({ unsigned int xcrlow;
-		unsigned int xcrhigh;
-		asm ("xgetbv"
-		     : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
-		(xcrlow & 6) == 6; }))
-	__cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
+      if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) == 0
+         || ({ unsigned int xcrlow;
+             unsigned int xcrhigh;
+             asm ("xgetbv"
+                  : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
+             (xcrlow & 6) != 6; }))
+       __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~bit_AVX;
     }
 
+  /* FMA4 depends on AVX support.  */
+  if (__cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx & bit_FMA4
+      && (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX) == 0)
+    __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx &= ~bit_FMA4;
+
   __cpu_features.family = family;
   __cpu_features.model = model;
   atomic_write_barrier ();
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 5054e46..2bddea3 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -21,7 +21,6 @@
 #define bit_Prefer_SSE_for_memop	(1 << 3)
 #define bit_Fast_Unaligned_Load		(1 << 4)
 #define bit_Prefer_PMINUB_for_stringop	(1 << 5)
-#define bit_YMM_Usable			(1 << 6)
 
 #define bit_SSE2	(1 << 26)
 #define bit_SSSE3	(1 << 9)
@@ -49,7 +48,6 @@
 # define index_Prefer_SSE_for_memop	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Fast_Unaligned_Load	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
-# define index_YMM_Usable		FEATURE_INDEX_1*FEATURE_SIZE
 
 #else	/* __ASSEMBLER__ */
 
@@ -127,7 +125,6 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_Slow_BSF			FEATURE_INDEX_1
 # define index_Prefer_SSE_for_memop	FEATURE_INDEX_1
 # define index_Fast_Unaligned_Load	FEATURE_INDEX_1
-# define index_YMM_Usable		FEATURE_INDEX_1
 
 # define HAS_ARCH_FEATURE(name) \
   ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
@@ -142,6 +139,4 @@ extern const struct cpu_features *__get_cpu_features (void)
 
 # define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
 
-# define HAS_YMM_USABLE		HAS_ARCH_FEATURE (YMM_Usable)
-
 #endif	/* __ASSEMBLER__ */

-- 
 Andreas Jaeger aj@{suse.com,opensuse.org} Twitter/Identica: jaegerandi
  SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 NÃrnberg, Germany
   GF: Jeff Hawn,Jennifer Guild,Felix ImendÃrffer,HRB16746 (AG NÃrnberg)
    GPG fingerprint = 93A3 365E CE47 B889 DF7F  FED1 389A 563C C272 A126


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]