This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 1/2] Append AVX2 instruction detection


From: sihai <sihai.ysh@alibaba-inc.com>

This patch appends suppport to dectect AVX2 instructions
on CPU model bigger or equal to 0x3c

Any comments are appreciated.

Thanks
Sihai
---
 sysdeps/x86_64/multiarch/init-arch.c | 30 ++++++++++++++++++++++++++++--
 sysdeps/x86_64/multiarch/init-arch.h | 11 +++++++++++
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index fed5ab8..1902a25 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -38,6 +38,24 @@ get_common_indeces (unsigned int *family, unsigned int *model)
   *model = (eax >> 4) & 0x0f;
 }
 
+static void
+get_indeces_7(void)
+{
+    unsigned int eax, ebx, ecx, edx;
+	
+    /* __cpuid in standard header has bug on cpuid index 0x7,
+         we use ourself version. */
+    eax = 7;
+    ecx = 0;
+    asm volatile("cpuid"
+		     : "+a" (eax), "=b" (ebx), "=d" (edx), "+c" (ecx));
+    
+    /* Currently we only has ebx for index 7 */
+    __cpu_features.cpuid[COMMON_CPUID_INDEX_7].eax = eax;
+    __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx = ebx;
+    __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx = ecx;
+    __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx = edx;
+}
 
 void
 __init_cpu_features (void)
@@ -96,6 +114,7 @@ __init_cpu_features (void)
 	    case 0x2c:
 	    case 0x2e:
 	    case 0x2f:
+	    case 0x3c:
 	      /* Rep string instructions, copy backward, unaligned loads
 		 and pminub are fast on Intel Core i3, i5 and i7.  */
 #if index_Fast_Rep_String != index_Fast_Copy_Backward
@@ -112,9 +131,16 @@ __init_cpu_features (void)
 		    | bit_Fast_Copy_Backward
 		    | bit_Fast_Unaligned_Load
 		    | bit_Prefer_PMINUB_for_stringop);
-	      break;
+
+	      /* Haswell or above platform should have index 7 feature */
+	      if (model >= 0x3c) {
+		    get_indeces_7();
+		    if (HAS_AVX2)
+			__cpu_features.feature[index_AVX2_Usable] |= bit_AVX2_Usable;
+	      }
+	      break;		
 	    }
-	}
+	  }
     }
   /* This spells out "AuthenticAMD".  */
   else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index f33f1c8..83f7c05 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -24,6 +24,7 @@
 #define bit_AVX_Usable			(1 << 6)
 #define bit_FMA_Usable			(1 << 7)
 #define bit_FMA4_Usable			(1 << 8)
+#define bit_AVX2_Usable			(1 << 9)
 
 /* CPUID Feature flags.  */
 #define bit_SSE2	(1 << 26)
@@ -35,6 +36,7 @@
 #define bit_POPCOUNT	(1 << 23)
 #define bit_FMA		(1 << 12)
 #define bit_FMA4	(1 << 16)
+#define bit_AVX2	(1 << 5)
 
 /* XCR0 Feature flags.  */
 #define bit_XMM_state  (1 << 1)
@@ -49,6 +51,7 @@
 # define index_SSE4_1	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
 # define index_SSE4_2	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
 # define index_AVX	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
+# define index_AVX2	COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
 
 # define index_Fast_Rep_String		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1*FEATURE_SIZE
@@ -59,6 +62,7 @@
 # define index_AVX_Usable		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_FMA_Usable		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_FMA4_Usable		FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX2_Usable		FEATURE_INDEX_7*FEATURE_SIZE
 
 #else	/* __ASSEMBLER__ */
 
@@ -67,6 +71,7 @@
 enum
   {
     COMMON_CPUID_INDEX_1 = 0,
+    COMMON_CPUID_INDEX_7,
     COMMON_CPUID_INDEX_80000001,	/* for AMD */
     /* Keep the following line at the end.  */
     COMMON_CPUID_INDEX_MAX
@@ -75,6 +80,7 @@ enum
 enum
   {
     FEATURE_INDEX_1 = 0,
+    FEATURE_INDEX_7,
     /* Keep the following line at the end.  */
     FEATURE_INDEX_MAX
   };
@@ -137,6 +143,8 @@ extern const struct cpu_features *__get_cpu_features (void)
   HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
 # define CPUID_FMA4 \
   HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
+# define CPUID_AVX2 \
+	  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
 
 /* HAS_* evaluates to true if we may use the feature at runtime.  */
 # define HAS_SSE2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
@@ -144,6 +152,8 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define HAS_SSSE3	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3)
 # define HAS_SSE4_1	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
 # define HAS_SSE4_2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
+# define HAS_AVX2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
+
 
 # define index_Fast_Rep_String		FEATURE_INDEX_1
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1
@@ -153,6 +163,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_AVX_Usable		FEATURE_INDEX_1
 # define index_FMA_Usable		FEATURE_INDEX_1
 # define index_FMA4_Usable		FEATURE_INDEX_1
+# define index_AVX2_Usable		FEATURE_INDEX_7
 
 # define HAS_ARCH_FEATURE(name) \
   ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
-- 
1.8.1.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]