This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch release/2.24/master updated. glibc-2.24-47-ga293317


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, release/2.24/master has been updated
       via  a29331702f3b58a2f4afb051b219a7e0deb1f487 (commit)
       via  5f85ab28690a94355a0eaf75206cdf237fe6009d (commit)
      from  b2e8c40afcb83f9a6dbbb543ce0951b6c890d350 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=a29331702f3b58a2f4afb051b219a7e0deb1f487

commit a29331702f3b58a2f4afb051b219a7e0deb1f487
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Apr 28 10:27:22 2017 -0700

    x86: Use AVX2 memcpy/memset on Skylake server [BZ #21396]
    
    On Skylake server, AVX512 load/store instructions in memcpy/memset may
    lead to lower CPU turbo frequency in certain situations.  Use of AVX2
    in memcpy/memset has been observed to have improved overall performance
    in many workloads due to the higher frequency.
    
    Since AVX512ER is unique to Xeon Phi, this patch sets Prefer_No_AVX512
    if AVX512ER isn't available so that AVX2 versions of memcpy/memset are
    used on Skylake server.
    
    	[BZ #21396]
    	* sysdeps/x86/cpu-features.c (init_cpu_features): Set
    	Prefer_No_AVX512 if AVX512ER isn't available.
    	* sysdeps/x86/cpu-features.h (bit_arch_Prefer_No_AVX512): New.
    	(index_arch_Prefer_No_AVX512): Likewise.
    	* sysdeps/x86_64/multiarch/memcpy.S (__new_memcpy): Don't use
    	AVX512 version if Prefer_No_AVX512 is set.
    	* sysdeps/x86_64/multiarch/memcpy_chk.S (__memcpy_chk):
    	Likewise.
    	* sysdeps/x86_64/multiarch/memmove.S (__libc_memmove): Likewise.
    	* sysdeps/x86_64/multiarch/memmove_chk.S (__memmove_chk):
    	Likewise.
    	* sysdeps/x86_64/multiarch/mempcpy.S (__mempcpy): Likewise.
    	* sysdeps/x86_64/multiarch/mempcpy_chk.S (__mempcpy_chk):
    	Likewise.
    	* sysdeps/x86_64/multiarch/memset.S (memset): Likewise.
    	* sysdeps/x86_64/multiarch/memset_chk.S (__memset_chk):
    	Likewise.
    
    (cherry picked from commit 4cb334c4d6249686653137ec273d081371b3672d)

diff --git a/ChangeLog b/ChangeLog
index eecd213..8734292 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,26 @@
 2017-04-28  H.J. Lu  <hongjiu.lu@intel.com>
 
+	[BZ #21396]
+	* sysdeps/x86/cpu-features.c (init_cpu_features): Set
+	Prefer_No_AVX512 if AVX512ER isn't available.
+	* sysdeps/x86/cpu-features.h (bit_arch_Prefer_No_AVX512): New.
+	(index_arch_Prefer_No_AVX512): Likewise.
+	* sysdeps/x86_64/multiarch/memcpy.S (__new_memcpy): Don't use
+	AVX512 version if Prefer_No_AVX512 is set.
+	* sysdeps/x86_64/multiarch/memcpy_chk.S (__memcpy_chk):
+	Likewise.
+	* sysdeps/x86_64/multiarch/memmove.S (__libc_memmove): Likewise.
+	* sysdeps/x86_64/multiarch/memmove_chk.S (__memmove_chk):
+	Likewise.
+	* sysdeps/x86_64/multiarch/mempcpy.S (__mempcpy): Likewise.
+	* sysdeps/x86_64/multiarch/mempcpy_chk.S (__mempcpy_chk):
+	Likewise.
+	* sysdeps/x86_64/multiarch/memset.S (memset): Likewise.
+	* sysdeps/x86_64/multiarch/memset_chk.S (__memset_chk):
+	Likewise.
+
+2017-04-28  H.J. Lu  <hongjiu.lu@intel.com>
+
 	* sysdeps/x86/cpu-features.c (init_cpu_features): Set
 	Prefer_No_VZEROUPPER if AVX512ER is available.
 	* sysdeps/x86/cpu-features.h
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 8f8cfce..d1ee922 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -205,10 +205,14 @@ init_cpu_features (struct cpu_features *cpu_features)
 	  |= bit_arch_AVX_Fast_Unaligned_Load;
 
       /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
-         if AVX512ER is available.  */
+         if AVX512ER is available.  Don't use AVX512 to avoid lower CPU
+	 frequency if AVX512ER isn't available.  */
       if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
 	cpu_features->feature[index_arch_Prefer_No_VZEROUPPER]
 	  |= bit_arch_Prefer_No_VZEROUPPER;
+      else
+	cpu_features->feature[index_arch_Prefer_No_AVX512]
+	  |= bit_arch_Prefer_No_AVX512;
 
       /* To avoid SSE transition penalty, use _dl_runtime_resolve_slow.
          If XGETBV suports ECX == 1, use _dl_runtime_resolve_opt.  */
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 265bc6c..2609ac0 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -39,6 +39,7 @@
 #define bit_arch_Prefer_ERMS			(1 << 19)
 #define bit_arch_Use_dl_runtime_resolve_opt	(1 << 20)
 #define bit_arch_Use_dl_runtime_resolve_slow	(1 << 21)
+#define bit_arch_Prefer_No_AVX512		(1 << 22)
 
 /* CPUID Feature flags.  */
 
@@ -116,6 +117,7 @@
 # define index_arch_Prefer_ERMS		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1*FEATURE_SIZE
 # define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1*FEATURE_SIZE
+# define index_arch_Prefer_No_AVX512	FEATURE_INDEX_1*FEATURE_SIZE
 
 
 # if defined (_LIBC) && !IS_IN (nonlib)
@@ -298,6 +300,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_arch_Prefer_ERMS		FEATURE_INDEX_1
 # define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1
 # define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1
+# define index_arch_Prefer_No_AVX512	FEATURE_INDEX_1
 
 #endif	/* !__ASSEMBLER__ */
 
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index b867759..ea4ec70 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -32,6 +32,8 @@ ENTRY(__new_memcpy)
 	lea	__memcpy_erms(%rip), %RAX_LP
 	HAS_ARCH_FEATURE (Prefer_ERMS)
 	jnz	2f
+	HAS_ARCH_FEATURE (Prefer_No_AVX512)
+	jnz	1f
 	HAS_ARCH_FEATURE (AVX512F_Usable)
 	jz	1f
 	lea	__memcpy_avx512_no_vzeroupper(%rip), %RAX_LP
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S
index 9d92c8a..26b49de 100644
--- a/sysdeps/x86_64/multiarch/memcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/memcpy_chk.S
@@ -30,6 +30,8 @@
 ENTRY(__memcpy_chk)
 	.type	__memcpy_chk, @gnu_indirect_function
 	LOAD_RTLD_GLOBAL_RO_RDX
+	HAS_ARCH_FEATURE (Prefer_No_AVX512)
+	jnz	1f
 	HAS_ARCH_FEATURE (AVX512F_Usable)
 	jz	1f
 	lea	__memcpy_chk_avx512_no_vzeroupper(%rip), %RAX_LP
diff --git a/sysdeps/x86_64/multiarch/memmove.S b/sysdeps/x86_64/multiarch/memmove.S
index ff5e041..ef92afd 100644
--- a/sysdeps/x86_64/multiarch/memmove.S
+++ b/sysdeps/x86_64/multiarch/memmove.S
@@ -30,6 +30,8 @@ ENTRY(__libc_memmove)
 	lea	__memmove_erms(%rip), %RAX_LP
 	HAS_ARCH_FEATURE (Prefer_ERMS)
 	jnz	2f
+	HAS_ARCH_FEATURE (Prefer_No_AVX512)
+	jnz	1f
 	HAS_ARCH_FEATURE (AVX512F_Usable)
 	jz	1f
 	lea	__memmove_avx512_no_vzeroupper(%rip), %RAX_LP
diff --git a/sysdeps/x86_64/multiarch/memmove_chk.S b/sysdeps/x86_64/multiarch/memmove_chk.S
index 7f86120..a9129c4 100644
--- a/sysdeps/x86_64/multiarch/memmove_chk.S
+++ b/sysdeps/x86_64/multiarch/memmove_chk.S
@@ -29,6 +29,8 @@
 ENTRY(__memmove_chk)
 	.type	__memmove_chk, @gnu_indirect_function
 	LOAD_RTLD_GLOBAL_RO_RDX
+	HAS_ARCH_FEATURE (Prefer_No_AVX512)
+	jnz	1f
 	HAS_ARCH_FEATURE (AVX512F_Usable)
 	jz	1f
 	lea	__memmove_chk_avx512_no_vzeroupper(%rip), %RAX_LP
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index 5197068..87c8299 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -32,6 +32,8 @@ ENTRY(__mempcpy)
 	lea	__mempcpy_erms(%rip), %RAX_LP
 	HAS_ARCH_FEATURE (Prefer_ERMS)
 	jnz	2f
+	HAS_ARCH_FEATURE (Prefer_No_AVX512)
+	jnz	1f
 	HAS_ARCH_FEATURE (AVX512F_Usable)
 	jz	1f
 	lea	__mempcpy_avx512_no_vzeroupper(%rip), %RAX_LP
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S
index 9e49f6f..642c679 100644
--- a/sysdeps/x86_64/multiarch/mempcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S
@@ -30,6 +30,8 @@
 ENTRY(__mempcpy_chk)
 	.type	__mempcpy_chk, @gnu_indirect_function
 	LOAD_RTLD_GLOBAL_RO_RDX
+	HAS_ARCH_FEATURE (Prefer_No_AVX512)
+	jnz	1f
 	HAS_ARCH_FEATURE (AVX512F_Usable)
 	jz	1f
 	lea	__mempcpy_chk_avx512_no_vzeroupper(%rip), %RAX_LP
diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S
index 96e9934..eae39e2 100644
--- a/sysdeps/x86_64/multiarch/memset.S
+++ b/sysdeps/x86_64/multiarch/memset.S
@@ -41,6 +41,8 @@ ENTRY(memset)
 	jnz	L(AVX512F)
 	lea	__memset_avx2_unaligned(%rip), %RAX_LP
 L(AVX512F):
+	HAS_ARCH_FEATURE (Prefer_No_AVX512)
+	jnz	2f
 	HAS_ARCH_FEATURE (AVX512F_Usable)
 	jz	2f
 	lea	__memset_avx512_no_vzeroupper(%rip), %RAX_LP
diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset_chk.S
index 2efe6ed..38d7bef 100644
--- a/sysdeps/x86_64/multiarch/memset_chk.S
+++ b/sysdeps/x86_64/multiarch/memset_chk.S
@@ -38,6 +38,8 @@ ENTRY(__memset_chk)
 	jnz	L(AVX512F)
 	lea	__memset_chk_avx2_unaligned(%rip), %RAX_LP
 L(AVX512F):
+	HAS_ARCH_FEATURE (Prefer_No_AVX512)
+	jnz	2f
 	HAS_ARCH_FEATURE (AVX512F_Usable)
 	jz	2f
 	lea	__memset_chk_avx512_no_vzeroupper(%rip), %RAX_LP

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=5f85ab28690a94355a0eaf75206cdf237fe6009d

commit 5f85ab28690a94355a0eaf75206cdf237fe6009d
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Apr 28 10:26:58 2017 -0700

    x86: Set Prefer_No_VZEROUPPER if AVX512ER is available
    
    AVX512ER won't be implemented in any Xeon processors and will be in
    all Xeon Phi processors.  Don't check CPU model number when setting
    Prefer_No_VZEROUPPER for Xeon Phi.  Instead, set Prefer_No_VZEROUPPER
    if AVX512ER is available.  It works with current and future Xeon Phi
    and non-Xeon Phi processors.
    
    	* sysdeps/x86/cpu-features.c (init_cpu_features): Set
    	Prefer_No_VZEROUPPER if AVX512ER is available.
    	* sysdeps/x86/cpu-features.h
    	(bit_cpu_AVX512PF): New.
    	(bit_cpu_AVX512ER): Likewise.
    	(bit_cpu_AVX512CD): Likewise.
    	(bit_cpu_AVX512BW): Likewise.
    	(bit_cpu_AVX512VL): Likewise.
    	(index_cpu_AVX512PF): Likewise.
    	(index_cpu_AVX512ER): Likewise.
    	(index_cpu_AVX512CD): Likewise.
    	(index_cpu_AVX512BW): Likewise.
    	(index_cpu_AVX512VL): Likewise.
    	(reg_AVX512PF): Likewise.
    	(reg_AVX512ER): Likewise.
    	(reg_AVX512CD): Likewise.
    	(reg_AVX512BW): Likewise.
    	(reg_AVX512VL): Likewise.
    
    (cherry picked from commit 1c53cb49de6d82d9469ccbd5aa0c55924502bd8b)

diff --git a/ChangeLog b/ChangeLog
index 13f809d..eecd213 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,24 @@
+2017-04-28  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* sysdeps/x86/cpu-features.c (init_cpu_features): Set
+	Prefer_No_VZEROUPPER if AVX512ER is available.
+	* sysdeps/x86/cpu-features.h
+	(bit_cpu_AVX512PF): New.
+	(bit_cpu_AVX512ER): Likewise.
+	(bit_cpu_AVX512CD): Likewise.
+	(bit_cpu_AVX512BW): Likewise.
+	(bit_cpu_AVX512VL): Likewise.
+	(index_cpu_AVX512PF): Likewise.
+	(index_cpu_AVX512ER): Likewise.
+	(index_cpu_AVX512CD): Likewise.
+	(index_cpu_AVX512BW): Likewise.
+	(index_cpu_AVX512VL): Likewise.
+	(reg_AVX512PF): Likewise.
+	(reg_AVX512ER): Likewise.
+	(reg_AVX512CD): Likewise.
+	(reg_AVX512BW): Likewise.
+	(reg_AVX512VL): Likewise.
+
 2017-01-05  Joseph Myers  <joseph@codesourcery.com>
 
 	[BZ #21026]
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 11b9af2..8f8cfce 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -133,8 +133,6 @@ init_cpu_features (struct cpu_features *cpu_features)
 
 	    case 0x57:
 	      /* Knights Landing.  Enable Silvermont optimizations.  */
-	      cpu_features->feature[index_arch_Prefer_No_VZEROUPPER]
-		|= bit_arch_Prefer_No_VZEROUPPER;
 
 	    case 0x5c:
 	    case 0x5f:
@@ -206,6 +204,12 @@ init_cpu_features (struct cpu_features *cpu_features)
 	cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
 	  |= bit_arch_AVX_Fast_Unaligned_Load;
 
+      /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
+         if AVX512ER is available.  */
+      if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
+	cpu_features->feature[index_arch_Prefer_No_VZEROUPPER]
+	  |= bit_arch_Prefer_No_VZEROUPPER;
+
       /* To avoid SSE transition penalty, use _dl_runtime_resolve_slow.
          If XGETBV suports ECX == 1, use _dl_runtime_resolve_opt.  */
       cpu_features->feature[index_arch_Use_dl_runtime_resolve_slow]
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index a8b5a73..265bc6c 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -62,6 +62,11 @@
 #define bit_cpu_AVX2		(1 << 5)
 #define bit_cpu_AVX512F		(1 << 16)
 #define bit_cpu_AVX512DQ	(1 << 17)
+#define bit_cpu_AVX512PF	(1 << 26)
+#define bit_cpu_AVX512ER	(1 << 27)
+#define bit_cpu_AVX512CD	(1 << 28)
+#define bit_cpu_AVX512BW	(1 << 30)
+#define bit_cpu_AVX512VL	(1u << 31)
 
 /* XCR0 Feature flags.  */
 #define bit_XMM_state		(1 << 1)
@@ -236,6 +241,11 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_cpu_AVX2		COMMON_CPUID_INDEX_7
 # define index_cpu_AVX512F	COMMON_CPUID_INDEX_7
 # define index_cpu_AVX512DQ	COMMON_CPUID_INDEX_7
+# define index_cpu_AVX512PF	COMMON_CPUID_INDEX_7
+# define index_cpu_AVX512ER	COMMON_CPUID_INDEX_7
+# define index_cpu_AVX512CD	COMMON_CPUID_INDEX_7
+# define index_cpu_AVX512BW	COMMON_CPUID_INDEX_7
+# define index_cpu_AVX512VL	COMMON_CPUID_INDEX_7
 # define index_cpu_ERMS		COMMON_CPUID_INDEX_7
 # define index_cpu_RTM		COMMON_CPUID_INDEX_7
 # define index_cpu_FMA		COMMON_CPUID_INDEX_1
@@ -254,6 +264,11 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define reg_AVX2		ebx
 # define reg_AVX512F		ebx
 # define reg_AVX512DQ		ebx
+# define reg_AVX512PF		ebx
+# define reg_AVX512ER		ebx
+# define reg_AVX512CD		ebx
+# define reg_AVX512BW		ebx
+# define reg_AVX512VL		ebx
 # define reg_ERMS		ebx
 # define reg_RTM		ebx
 # define reg_FMA		ecx

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                              |   42 ++++++++++++++++++++++++++++++++
 sysdeps/x86/cpu-features.c             |   12 +++++++-
 sysdeps/x86/cpu-features.h             |   18 +++++++++++++
 sysdeps/x86_64/multiarch/memcpy.S      |    2 +
 sysdeps/x86_64/multiarch/memcpy_chk.S  |    2 +
 sysdeps/x86_64/multiarch/memmove.S     |    2 +
 sysdeps/x86_64/multiarch/memmove_chk.S |    2 +
 sysdeps/x86_64/multiarch/mempcpy.S     |    2 +
 sysdeps/x86_64/multiarch/mempcpy_chk.S |    2 +
 sysdeps/x86_64/multiarch/memset.S      |    2 +
 sysdeps/x86_64/multiarch/memset_chk.S  |    2 +
 11 files changed, 86 insertions(+), 2 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]