This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch master updated. glibc-2.23-118-gf781a9e


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  f781a9e96138d8839663af5e88649ab1fbed74f8 (commit)
      from  b87e41378beca3c98ec3464d64835e66cc788497 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=f781a9e96138d8839663af5e88649ab1fbed74f8

commit f781a9e96138d8839663af5e88649ab1fbed74f8
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Tue Mar 22 07:46:56 2016 -0700

    Set index_arch_AVX_Fast_Unaligned_Load only for Intel processors
    
    Since only Intel processors with AVX2 have fast unaligned load, we
    should set index_arch_AVX_Fast_Unaligned_Load only for Intel processors.
    
    Move AVX, AVX2, AVX512, FMA and FMA4 detection into get_common_indeces
    and call get_common_indeces for other processors.
    
    Add CPU_FEATURES_CPU_P and CPU_FEATURES_ARCH_P to aoid loading
    GLRO(dl_x86_cpu_features) in cpu-features.c.
    
    	[BZ #19583]
    	* sysdeps/x86/cpu-features.c (get_common_indeces): Remove
    	inline.  Check family before setting family, model and
    	extended_model.  Set AVX, AVX2, AVX512, FMA and FMA4 usable
    	bits here.
    	(init_cpu_features): Replace HAS_CPU_FEATURE and
    	HAS_ARCH_FEATURE with CPU_FEATURES_CPU_P and
    	CPU_FEATURES_ARCH_P.  Set index_arch_AVX_Fast_Unaligned_Load
    	for Intel processors with usable AVX2.  Call get_common_indeces
    	for other processors with family == NULL.
    	* sysdeps/x86/cpu-features.h (CPU_FEATURES_CPU_P): New macro.
    	(CPU_FEATURES_ARCH_P): Likewise.
    	(HAS_CPU_FEATURE): Use CPU_FEATURES_CPU_P.
    	(HAS_ARCH_FEATURE): Use CPU_FEATURES_ARCH_P.

diff --git a/ChangeLog b/ChangeLog
index 36360d9..522bc4f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2016-03-22  H.J. Lu  <hongjiu.lu@intel.com>
+
+	[BZ #19583]
+	* sysdeps/x86/cpu-features.c (get_common_indeces): Remove
+	inline.  Check family before setting family, model and
+	extended_model.  Set AVX, AVX2, AVX512, FMA and FMA4 usable
+	bits here.
+	(init_cpu_features): Replace HAS_CPU_FEATURE and
+	HAS_ARCH_FEATURE with CPU_FEATURES_CPU_P and
+	CPU_FEATURES_ARCH_P.  Set index_arch_AVX_Fast_Unaligned_Load
+	for Intel processors with usable AVX2.  Call get_common_indeces
+	for other processors with family == NULL.
+	* sysdeps/x86/cpu-features.h (CPU_FEATURES_CPU_P): New macro.
+	(CPU_FEATURES_ARCH_P): Likewise.
+	(HAS_CPU_FEATURE): Use CPU_FEATURES_CPU_P.
+	(HAS_ARCH_FEATURE): Use CPU_FEATURES_ARCH_P.
+
 2016-03-22  Samuel Thibault  <samuel.thibault@ens-lyon.org>
 
 	* malloc/Makefile ($(objpfx)tst-malloc-backtrace,
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 1787716..c8f81ef 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -19,23 +19,79 @@
 #include <cpuid.h>
 #include <cpu-features.h>
 
-static inline void
+static void
 get_common_indeces (struct cpu_features *cpu_features,
 		    unsigned int *family, unsigned int *model,
 		    unsigned int *extended_model)
 {
-  unsigned int eax;
-  __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx,
-	   cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx,
-	   cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx);
-  GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].eax = eax;
-  *family = (eax >> 8) & 0x0f;
-  *model = (eax >> 4) & 0x0f;
-  *extended_model = (eax >> 12) & 0xf0;
-  if (*family == 0x0f)
+  if (family)
     {
-      *family += (eax >> 20) & 0xff;
-      *model += *extended_model;
+      unsigned int eax;
+      __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx,
+	       cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx,
+	       cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx);
+      cpu_features->cpuid[COMMON_CPUID_INDEX_1].eax = eax;
+      *family = (eax >> 8) & 0x0f;
+      *model = (eax >> 4) & 0x0f;
+      *extended_model = (eax >> 12) & 0xf0;
+      if (*family == 0x0f)
+	{
+	  *family += (eax >> 20) & 0xff;
+	  *model += *extended_model;
+	}
+    }
+
+  if (cpu_features->max_cpuid >= 7)
+    __cpuid_count (7, 0,
+		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
+		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
+		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
+		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
+
+  /* Can we call xgetbv?  */
+  if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
+    {
+      unsigned int xcrlow;
+      unsigned int xcrhigh;
+      asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
+      /* Is YMM and XMM state usable?  */
+      if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
+	  (bit_YMM_state | bit_XMM_state))
+	{
+	  /* Determine if AVX is usable.  */
+	  if (CPU_FEATURES_CPU_P (cpu_features, AVX))
+	    cpu_features->feature[index_arch_AVX_Usable]
+	      |= bit_arch_AVX_Usable;
+	  /* Determine if AVX2 is usable.  */
+	  if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
+	    cpu_features->feature[index_arch_AVX2_Usable]
+	      |= bit_arch_AVX2_Usable;
+	  /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
+	     ZMM16-ZMM31 state are enabled.  */
+	  if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
+			 | bit_ZMM16_31_state)) ==
+	      (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
+	    {
+	      /* Determine if AVX512F is usable.  */
+	      if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
+		{
+		  cpu_features->feature[index_arch_AVX512F_Usable]
+		    |= bit_arch_AVX512F_Usable;
+		  /* Determine if AVX512DQ is usable.  */
+		  if (CPU_FEATURES_CPU_P (cpu_features, AVX512DQ))
+		    cpu_features->feature[index_arch_AVX512DQ_Usable]
+		      |= bit_arch_AVX512DQ_Usable;
+		}
+	    }
+	  /* Determine if FMA is usable.  */
+	  if (CPU_FEATURES_CPU_P (cpu_features, FMA))
+	    cpu_features->feature[index_arch_FMA_Usable]
+	      |= bit_arch_FMA_Usable;
+	  /* Determine if FMA4 is usable.  */
+	  if (CPU_FEATURES_CPU_P (cpu_features, FMA4))
+	    cpu_features->feature[index_arch_FMA4_Usable]
+	      |= bit_arch_FMA4_Usable;
+	}
     }
 }
 
@@ -135,6 +191,12 @@ init_cpu_features (struct cpu_features *cpu_features)
 	      break;
 	    }
 	}
+
+      /* Unaligned load with 256-bit AVX registers are faster on
+	 Intel processors with AVX2.  */
+      if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+	cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
+	  |= bit_arch_AVX_Fast_Unaligned_Load;
     }
   /* This spells out "AuthenticAMD".  */
   else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
@@ -165,73 +227,19 @@ init_cpu_features (struct cpu_features *cpu_features)
 	}
     }
   else
-    kind = arch_kind_other;
+    {
+      kind = arch_kind_other;
+      get_common_indeces (cpu_features, NULL, NULL, NULL);
+    }
 
   /* Support i586 if CX8 is available.  */
-  if (HAS_CPU_FEATURE (CX8))
+  if (CPU_FEATURES_CPU_P (cpu_features, CX8))
     cpu_features->feature[index_arch_I586] |= bit_arch_I586;
 
   /* Support i686 if CMOV is available.  */
-  if (HAS_CPU_FEATURE (CMOV))
+  if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
     cpu_features->feature[index_arch_I686] |= bit_arch_I686;
 
-  if (cpu_features->max_cpuid >= 7)
-    __cpuid_count (7, 0,
-		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
-		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
-		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
-		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
-
-  /* Can we call xgetbv?  */
-  if (HAS_CPU_FEATURE (OSXSAVE))
-    {
-      unsigned int xcrlow;
-      unsigned int xcrhigh;
-      asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
-      /* Is YMM and XMM state usable?  */
-      if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
-	  (bit_YMM_state | bit_XMM_state))
-	{
-	  /* Determine if AVX is usable.  */
-	  if (HAS_CPU_FEATURE (AVX))
-	    cpu_features->feature[index_arch_AVX_Usable]
-	      |= bit_arch_AVX_Usable;
-#if index_arch_AVX2_Usable != index_arch_AVX_Fast_Unaligned_Load
-# error index_arch_AVX2_Usable != index_arch_AVX_Fast_Unaligned_Load
-#endif
-	  /* Determine if AVX2 is usable.  Unaligned load with 256-bit
-	     AVX registers are faster on processors with AVX2.  */
-	  if (HAS_CPU_FEATURE (AVX2))
-	    cpu_features->feature[index_arch_AVX2_Usable]
-	      |= bit_arch_AVX2_Usable | bit_arch_AVX_Fast_Unaligned_Load;
-	  /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
-	     ZMM16-ZMM31 state are enabled.  */
-	  if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
-			 | bit_ZMM16_31_state)) ==
-	      (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
-	    {
-	      /* Determine if AVX512F is usable.  */
-	      if (HAS_CPU_FEATURE (AVX512F))
-		{
-		  cpu_features->feature[index_arch_AVX512F_Usable]
-		    |= bit_arch_AVX512F_Usable;
-		  /* Determine if AVX512DQ is usable.  */
-		  if (HAS_CPU_FEATURE (AVX512DQ))
-		    cpu_features->feature[index_arch_AVX512DQ_Usable]
-		      |= bit_arch_AVX512DQ_Usable;
-		}
-	    }
-	  /* Determine if FMA is usable.  */
-	  if (HAS_CPU_FEATURE (FMA))
-	    cpu_features->feature[index_arch_FMA_Usable]
-	      |= bit_arch_FMA_Usable;
-	  /* Determine if FMA4 is usable.  */
-	  if (HAS_CPU_FEATURE (FMA4))
-	    cpu_features->feature[index_arch_FMA4_Usable]
-	      |= bit_arch_FMA4_Usable;
-	}
-    }
-
 #if !HAS_CPUID
 no_cpuid:
 #endif
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 0624a92..e06eb7e 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -204,11 +204,17 @@ extern const struct cpu_features *__get_cpu_features (void)
 # endif
 
 
+/* Only used directly in cpu-features.c.  */
+# define CPU_FEATURES_CPU_P(ptr, name) \
+  ((ptr->cpuid[index_cpu_##name].reg_##name & (bit_cpu_##name)) != 0)
+# define CPU_FEATURES_ARCH_P(ptr, name) \
+  ((ptr->feature[index_arch_##name] & (bit_arch_##name)) != 0)
+
 /* HAS_* evaluates to true if we may use the feature at runtime.  */
 # define HAS_CPU_FEATURE(name) \
-  ((__get_cpu_features ()->cpuid[index_cpu_##name].reg_##name & (bit_cpu_##name)) != 0)
+   CPU_FEATURES_CPU_P (__get_cpu_features (), name)
 # define HAS_ARCH_FEATURE(name) \
-  ((__get_cpu_features ()->feature[index_arch_##name] & (bit_arch_##name)) != 0)
+   CPU_FEATURES_ARCH_P (__get_cpu_features (), name)
 
 # define index_cpu_CX8		COMMON_CPUID_INDEX_1
 # define index_cpu_CMOV		COMMON_CPUID_INDEX_1

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                  |   17 +++++
 sysdeps/x86/cpu-features.c |  152 +++++++++++++++++++++++---------------------
 sysdeps/x86/cpu-features.h |   10 ++-
 3 files changed, 105 insertions(+), 74 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]