This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch master updated. glibc-2.23-118-gf781a9e
- From: hjl at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 22 Mar 2016 14:47:56 -0000
- Subject: GNU C Library master sources branch master updated. glibc-2.23-118-gf781a9e
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via f781a9e96138d8839663af5e88649ab1fbed74f8 (commit)
from b87e41378beca3c98ec3464d64835e66cc788497 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=f781a9e96138d8839663af5e88649ab1fbed74f8
commit f781a9e96138d8839663af5e88649ab1fbed74f8
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Tue Mar 22 07:46:56 2016 -0700
Set index_arch_AVX_Fast_Unaligned_Load only for Intel processors
Since only Intel processors with AVX2 have fast unaligned load, we
should set index_arch_AVX_Fast_Unaligned_Load only for Intel processors.
Move AVX, AVX2, AVX512, FMA and FMA4 detection into get_common_indeces
and call get_common_indeces for other processors.
Add CPU_FEATURES_CPU_P and CPU_FEATURES_ARCH_P to aoid loading
GLRO(dl_x86_cpu_features) in cpu-features.c.
[BZ #19583]
* sysdeps/x86/cpu-features.c (get_common_indeces): Remove
inline. Check family before setting family, model and
extended_model. Set AVX, AVX2, AVX512, FMA and FMA4 usable
bits here.
(init_cpu_features): Replace HAS_CPU_FEATURE and
HAS_ARCH_FEATURE with CPU_FEATURES_CPU_P and
CPU_FEATURES_ARCH_P. Set index_arch_AVX_Fast_Unaligned_Load
for Intel processors with usable AVX2. Call get_common_indeces
for other processors with family == NULL.
* sysdeps/x86/cpu-features.h (CPU_FEATURES_CPU_P): New macro.
(CPU_FEATURES_ARCH_P): Likewise.
(HAS_CPU_FEATURE): Use CPU_FEATURES_CPU_P.
(HAS_ARCH_FEATURE): Use CPU_FEATURES_ARCH_P.
diff --git a/ChangeLog b/ChangeLog
index 36360d9..522bc4f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2016-03-22 H.J. Lu <hongjiu.lu@intel.com>
+
+ [BZ #19583]
+ * sysdeps/x86/cpu-features.c (get_common_indeces): Remove
+ inline. Check family before setting family, model and
+ extended_model. Set AVX, AVX2, AVX512, FMA and FMA4 usable
+ bits here.
+ (init_cpu_features): Replace HAS_CPU_FEATURE and
+ HAS_ARCH_FEATURE with CPU_FEATURES_CPU_P and
+ CPU_FEATURES_ARCH_P. Set index_arch_AVX_Fast_Unaligned_Load
+ for Intel processors with usable AVX2. Call get_common_indeces
+ for other processors with family == NULL.
+ * sysdeps/x86/cpu-features.h (CPU_FEATURES_CPU_P): New macro.
+ (CPU_FEATURES_ARCH_P): Likewise.
+ (HAS_CPU_FEATURE): Use CPU_FEATURES_CPU_P.
+ (HAS_ARCH_FEATURE): Use CPU_FEATURES_ARCH_P.
+
2016-03-22 Samuel Thibault <samuel.thibault@ens-lyon.org>
* malloc/Makefile ($(objpfx)tst-malloc-backtrace,
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 1787716..c8f81ef 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -19,23 +19,79 @@
#include <cpuid.h>
#include <cpu-features.h>
-static inline void
+static void
get_common_indeces (struct cpu_features *cpu_features,
unsigned int *family, unsigned int *model,
unsigned int *extended_model)
{
- unsigned int eax;
- __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx);
- GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].eax = eax;
- *family = (eax >> 8) & 0x0f;
- *model = (eax >> 4) & 0x0f;
- *extended_model = (eax >> 12) & 0xf0;
- if (*family == 0x0f)
+ if (family)
{
- *family += (eax >> 20) & 0xff;
- *model += *extended_model;
+ unsigned int eax;
+ __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx);
+ cpu_features->cpuid[COMMON_CPUID_INDEX_1].eax = eax;
+ *family = (eax >> 8) & 0x0f;
+ *model = (eax >> 4) & 0x0f;
+ *extended_model = (eax >> 12) & 0xf0;
+ if (*family == 0x0f)
+ {
+ *family += (eax >> 20) & 0xff;
+ *model += *extended_model;
+ }
+ }
+
+ if (cpu_features->max_cpuid >= 7)
+ __cpuid_count (7, 0,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
+
+ /* Can we call xgetbv? */
+ if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
+ {
+ unsigned int xcrlow;
+ unsigned int xcrhigh;
+ asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
+ /* Is YMM and XMM state usable? */
+ if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
+ (bit_YMM_state | bit_XMM_state))
+ {
+ /* Determine if AVX is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX))
+ cpu_features->feature[index_arch_AVX_Usable]
+ |= bit_arch_AVX_Usable;
+ /* Determine if AVX2 is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
+ cpu_features->feature[index_arch_AVX2_Usable]
+ |= bit_arch_AVX2_Usable;
+ /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
+ ZMM16-ZMM31 state are enabled. */
+ if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
+ | bit_ZMM16_31_state)) ==
+ (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
+ {
+ /* Determine if AVX512F is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
+ {
+ cpu_features->feature[index_arch_AVX512F_Usable]
+ |= bit_arch_AVX512F_Usable;
+ /* Determine if AVX512DQ is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512DQ))
+ cpu_features->feature[index_arch_AVX512DQ_Usable]
+ |= bit_arch_AVX512DQ_Usable;
+ }
+ }
+ /* Determine if FMA is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, FMA))
+ cpu_features->feature[index_arch_FMA_Usable]
+ |= bit_arch_FMA_Usable;
+ /* Determine if FMA4 is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, FMA4))
+ cpu_features->feature[index_arch_FMA4_Usable]
+ |= bit_arch_FMA4_Usable;
+ }
}
}
@@ -135,6 +191,12 @@ init_cpu_features (struct cpu_features *cpu_features)
break;
}
}
+
+ /* Unaligned load with 256-bit AVX registers are faster on
+ Intel processors with AVX2. */
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+ cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
+ |= bit_arch_AVX_Fast_Unaligned_Load;
}
/* This spells out "AuthenticAMD". */
else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
@@ -165,73 +227,19 @@ init_cpu_features (struct cpu_features *cpu_features)
}
}
else
- kind = arch_kind_other;
+ {
+ kind = arch_kind_other;
+ get_common_indeces (cpu_features, NULL, NULL, NULL);
+ }
/* Support i586 if CX8 is available. */
- if (HAS_CPU_FEATURE (CX8))
+ if (CPU_FEATURES_CPU_P (cpu_features, CX8))
cpu_features->feature[index_arch_I586] |= bit_arch_I586;
/* Support i686 if CMOV is available. */
- if (HAS_CPU_FEATURE (CMOV))
+ if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
cpu_features->feature[index_arch_I686] |= bit_arch_I686;
- if (cpu_features->max_cpuid >= 7)
- __cpuid_count (7, 0,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
-
- /* Can we call xgetbv? */
- if (HAS_CPU_FEATURE (OSXSAVE))
- {
- unsigned int xcrlow;
- unsigned int xcrhigh;
- asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
- /* Is YMM and XMM state usable? */
- if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
- (bit_YMM_state | bit_XMM_state))
- {
- /* Determine if AVX is usable. */
- if (HAS_CPU_FEATURE (AVX))
- cpu_features->feature[index_arch_AVX_Usable]
- |= bit_arch_AVX_Usable;
-#if index_arch_AVX2_Usable != index_arch_AVX_Fast_Unaligned_Load
-# error index_arch_AVX2_Usable != index_arch_AVX_Fast_Unaligned_Load
-#endif
- /* Determine if AVX2 is usable. Unaligned load with 256-bit
- AVX registers are faster on processors with AVX2. */
- if (HAS_CPU_FEATURE (AVX2))
- cpu_features->feature[index_arch_AVX2_Usable]
- |= bit_arch_AVX2_Usable | bit_arch_AVX_Fast_Unaligned_Load;
- /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
- ZMM16-ZMM31 state are enabled. */
- if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
- | bit_ZMM16_31_state)) ==
- (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
- {
- /* Determine if AVX512F is usable. */
- if (HAS_CPU_FEATURE (AVX512F))
- {
- cpu_features->feature[index_arch_AVX512F_Usable]
- |= bit_arch_AVX512F_Usable;
- /* Determine if AVX512DQ is usable. */
- if (HAS_CPU_FEATURE (AVX512DQ))
- cpu_features->feature[index_arch_AVX512DQ_Usable]
- |= bit_arch_AVX512DQ_Usable;
- }
- }
- /* Determine if FMA is usable. */
- if (HAS_CPU_FEATURE (FMA))
- cpu_features->feature[index_arch_FMA_Usable]
- |= bit_arch_FMA_Usable;
- /* Determine if FMA4 is usable. */
- if (HAS_CPU_FEATURE (FMA4))
- cpu_features->feature[index_arch_FMA4_Usable]
- |= bit_arch_FMA4_Usable;
- }
- }
-
#if !HAS_CPUID
no_cpuid:
#endif
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 0624a92..e06eb7e 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -204,11 +204,17 @@ extern const struct cpu_features *__get_cpu_features (void)
# endif
+/* Only used directly in cpu-features.c. */
+# define CPU_FEATURES_CPU_P(ptr, name) \
+ ((ptr->cpuid[index_cpu_##name].reg_##name & (bit_cpu_##name)) != 0)
+# define CPU_FEATURES_ARCH_P(ptr, name) \
+ ((ptr->feature[index_arch_##name] & (bit_arch_##name)) != 0)
+
/* HAS_* evaluates to true if we may use the feature at runtime. */
# define HAS_CPU_FEATURE(name) \
- ((__get_cpu_features ()->cpuid[index_cpu_##name].reg_##name & (bit_cpu_##name)) != 0)
+ CPU_FEATURES_CPU_P (__get_cpu_features (), name)
# define HAS_ARCH_FEATURE(name) \
- ((__get_cpu_features ()->feature[index_arch_##name] & (bit_arch_##name)) != 0)
+ CPU_FEATURES_ARCH_P (__get_cpu_features (), name)
# define index_cpu_CX8 COMMON_CPUID_INDEX_1
# define index_cpu_CMOV COMMON_CPUID_INDEX_1
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 17 +++++
sysdeps/x86/cpu-features.c | 152 +++++++++++++++++++++++---------------------
sysdeps/x86/cpu-features.h | 10 ++-
3 files changed, 105 insertions(+), 74 deletions(-)
hooks/post-receive
--
GNU C Library master sources