This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch hjl/erms/2.23 updated. glibc-2.23-55-g5a4f524


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, hjl/erms/2.23 has been updated
       via  5a4f52493bc18322749ab15c4f5ae8deb7977201 (commit)
       via  5879566b641cf573099ce9200eb44148e40b9b34 (commit)
       via  756fa392db3930cec9330169aef2c647df365432 (commit)
       via  4c197f7a6b4ae1d57d1ad154ddae9db95b0abe86 (commit)
       via  5531ff0e927969eaf2f8fab12912e1a3a023ebc9 (commit)
       via  2af76a7db8bc8eafccf3c78ec503283aa5249277 (commit)
       via  9c7a58e7f94cae195e85003b21c6bb7d2a0ec9a5 (commit)
       via  9faebb60d146ade176ca5ad63e34ce67bacbec21 (commit)
       via  a29574292ac84a9882553aee676f0f5c73b064ed (commit)
       via  601a5ed0cfe7ec825664ccd8112918f879eeeebc (commit)
      from  9e1ddc1180ca0619d12b620b227726233a48b9bc (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=5a4f52493bc18322749ab15c4f5ae8deb7977201

commit 5a4f52493bc18322749ab15c4f5ae8deb7977201
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri May 27 15:16:22 2016 -0700

    Count number of logical processors sharing L2 cache
    
    For Intel processors, when there are both L2 and L3 caches, SMT level
    type should be ued to count number of available logical processors
    sharing L2 cache.  If there is only L2 cache, core level type should
    be used to count number of available logical processors sharing L2
    cache.  Number of available logical processors sharing L2 cache should
    be used for non-inclusive L2 and L3 caches.
    
    	* sysdeps/x86/cacheinfo.c (init_cacheinfo): Count number of
    	available logical processors with SMT level type sharing L2
    	cache for Intel processors.

diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
index 182426b..cf4f64b 100644
--- a/sysdeps/x86/cacheinfo.c
+++ b/sysdeps/x86/cacheinfo.c
@@ -499,11 +499,24 @@ init_cacheinfo (void)
       level  = 3;
       shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
 
+      /* Number of logical processors sharing L2 cache.  */
+      int threads_l2;
+
+      /* Number of logical processors sharing L3 cache.  */
+      int threads_l3;
+
       if (shared <= 0)
 	{
 	  /* Try L2 otherwise.  */
 	  level  = 2;
 	  shared = core;
+	  threads_l2 = 0;
+	  threads_l3 = -1;
+	}
+      else
+	{
+	  threads_l2 = 0;
+	  threads_l3 = 0;
 	}
 
       /* A value of 0 for the HTT bit indicates there is only a single
@@ -519,7 +532,8 @@ init_cacheinfo (void)
 
 	      int i = 0;
 
-	      /* Query until desired cache level is enumerated.  */
+	      /* Query until cache level 2 and 3 are enumerated.  */
+	      int check = 0x1 | (threads_l3 == 0) << 1;
 	      do
 		{
 		  __cpuid_count (4, i++, eax, ebx, ecx, edx);
@@ -530,24 +544,53 @@ init_cacheinfo (void)
 		     assume there is no such information.  */
 		  if ((eax & 0x1f) == 0)
 		    goto intel_bug_no_cache_info;
-		}
-	      while (((eax >> 5) & 0x7) != level);
 
-	      /* Check if cache is inclusive of lower cache levels.  */
-	      inclusive_cache = (edx & 0x2) != 0;
+		  switch ((eax >> 5) & 0x7)
+		    {
+		    default:
+		      break;
+		    case 2:
+		      if ((check & 0x1))
+			{
+			  /* Get maximum number of logical processors
+			     sharing L2 cache.  */
+			  threads_l2 = (eax >> 14) & 0x3ff;
+			  check &= ~0x1;
+			}
+		      break;
+		    case 3:
+		      if ((check & (0x1 << 1)))
+			{
+			  /* Get maximum number of logical processors
+			     sharing L3 cache.  */
+			  threads_l3 = (eax >> 14) & 0x3ff;
 
-	      threads = (eax >> 14) & 0x3ff;
+			  /* Check if L2 and L3 caches are inclusive.  */
+			  inclusive_cache = (edx & 0x2) != 0;
+			  check &= ~(0x1 << 1);
+			}
+		      break;
+		    }
+		}
+	      while (check);
 
-	      /* If max_cpuid >= 11, THREADS is the maximum number of
-		 addressable IDs for logical processors sharing the
-		 cache, instead of the maximum number of threads
+	      /* If max_cpuid >= 11, THREADS_L2/THREADS_L3 are the maximum
+		 numbers of addressable IDs for logical processors sharing
+		 the cache, instead of the maximum number of threads
 		 sharing the cache.  */
-	      if (threads && max_cpuid >= 11)
+	      if (max_cpuid >= 11)
 		{
 		  /* Find the number of logical processors shipped in
 		     one core and apply count mask.  */
 		  i = 0;
-		  while (1)
+
+		  /* Count SMT only if there is L3 cache.  Always count
+		     core if there is no L3 cache.  */
+		  int count = ((threads_l2 > 0 && level == 3)
+			       | ((threads_l3 > 0
+				   || (threads_l2 > 0 && level == 2)) << 1));
+
+		  while (count)
 		    {
 		      __cpuid_count (11, i++, eax, ebx, ecx, edx);
 
@@ -555,36 +598,71 @@ init_cacheinfo (void)
 		      int type = ecx & 0xff00;
 		      if (shipped == 0 || type == 0)
 			break;
+		      else if (type == 0x100)
+			{
+			  /* Count SMT.  */
+			  if ((count & 0x1))
+			    {
+			      int count_mask;
+
+			      /* Compute count mask.  */
+			      asm ("bsr %1, %0"
+				   : "=r" (count_mask) : "g" (threads_l2));
+			      count_mask = ~(-1 << (count_mask + 1));
+			      threads_l2 = (shipped - 1) & count_mask;
+			      count &= ~0x1;
+			    }
+			}
 		      else if (type == 0x200)
 			{
-			  int count_mask;
-
-			  /* Compute count mask.  */
-			  asm ("bsr %1, %0"
-			       : "=r" (count_mask) : "g" (threads));
-			  count_mask = ~(-1 << (count_mask + 1));
-			  threads = (shipped - 1) & count_mask;
-			  break;
+			  /* Count core.  */
+			  if ((count & (0x1 << 1)))
+			    {
+			      int count_mask;
+			      int threads_core
+				= (level == 2 ? threads_l2 : threads_l3);
+
+			      /* Compute count mask.  */
+			      asm ("bsr %1, %0"
+				   : "=r" (count_mask) : "g" (threads_core));
+			      count_mask = ~(-1 << (count_mask + 1));
+			      threads_core = (shipped - 1) & count_mask;
+			      if (level == 2)
+				threads_l2 = threads_core;
+			      else
+				threads_l3 = threads_core;
+			      count &= ~(0x1 << 1);
+			    }
 			}
 		    }
 		}
-	      threads += 1;
-	      if (threads > 2 && level == 2 && family == 6)
+	      if (threads_l2 > 0)
+		threads_l2 += 1;
+	      if (threads_l3 > 0)
+		threads_l3 += 1;
+	      if (level == 2)
 		{
-		  switch (model)
+		  if (threads_l2)
 		    {
-		    case 0x37:
-		    case 0x4a:
-		    case 0x4d:
-		    case 0x5a:
-		    case 0x5d:
-		      /* Silvermont has L2 cache shared by 2 cores.  */
-		      threads = 2;
-		      break;
-		    default:
-		      break;
+		      threads = threads_l2;
+		      if (threads > 2 && family == 6)
+			switch (model)
+			  {
+			  case 0x37:
+			  case 0x4a:
+			  case 0x4d:
+			  case 0x5a:
+			  case 0x5d:
+			    /* Silvermont has L2 cache shared by 2 cores.  */
+			    threads = 2;
+			    break;
+			  default:
+			    break;
+			  }
 		    }
 		}
+	      else if (threads_l3)
+		threads = threads_l3;
 	    }
 	  else
 	    {
@@ -604,8 +682,12 @@ intel_bug_no_cache_info:
 	}
 
       /* Account for non-inclusive L2 and L3 caches.  */
-      if (level == 3 && !inclusive_cache)
-	shared += core;
+      if (!inclusive_cache)
+	{
+	  if (threads_l2 > 0)
+	    core /= threads_l2;
+	  shared += core;
+	}
     }
   /* This spells out "AuthenticAMD".  */
   else if (is_amd)

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=5879566b641cf573099ce9200eb44148e40b9b34

commit 5879566b641cf573099ce9200eb44148e40b9b34
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri May 20 14:41:14 2016 -0700

    Remove special L2 cache case for Knights Landing
    
    L2 cache is shared by 2 cores on Knights Landing, which has 4 threads
    per core:
    
    https://en.wikipedia.org/wiki/Xeon_Phi#Knights_Landing
    
    So L2 cache is shared by 8 threads on Knights Landing as reported by
    CPUID.  We should remove special L2 cache case for Knights Landing.
    
    	[BZ #18185]
    	* sysdeps/x86/cacheinfo.c (init_cacheinfo): Don't limit threads
    	sharing L2 cache to 2 for Knights Landing.

diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
index 020d3fd..182426b 100644
--- a/sysdeps/x86/cacheinfo.c
+++ b/sysdeps/x86/cacheinfo.c
@@ -573,8 +573,6 @@ init_cacheinfo (void)
 		{
 		  switch (model)
 		    {
-		    case 0x57:
-		      /* Knights Landing has L2 cache shared by 2 cores.  */
 		    case 0x37:
 		    case 0x4a:
 		    case 0x4d:

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=756fa392db3930cec9330169aef2c647df365432

commit 756fa392db3930cec9330169aef2c647df365432
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Thu May 19 10:02:36 2016 -0700

    Correct Intel processor level type mask from CPUID
    
    Intel CPUID with EAX == 11 returns:
    
    ECX Bits 07 - 00: Level number. Same value in ECX input.
        Bits 15 - 08: Level type.
        ^^^^^^^^^^^^^^^^^^^^^^^^ This is level type.
        Bits 31 - 16: Reserved.
    
    Intel processor level type mask should be 0xff00, not 0xff0.
    
    	[BZ #20119]
    	* sysdeps/x86/cacheinfo.c (init_cacheinfo): Correct Intel
    	processor level type mask for CPUID with EAX == 11.

diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
index 1f46d9d..020d3fd 100644
--- a/sysdeps/x86/cacheinfo.c
+++ b/sysdeps/x86/cacheinfo.c
@@ -552,7 +552,7 @@ init_cacheinfo (void)
 		      __cpuid_count (11, i++, eax, ebx, ecx, edx);
 
 		      int shipped = ebx & 0xff;
-		      int type = ecx & 0xff0;
+		      int type = ecx & 0xff00;
 		      if (shipped == 0 || type == 0)
 			break;
 		      else if (type == 0x200)

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=4c197f7a6b4ae1d57d1ad154ddae9db95b0abe86

commit 4c197f7a6b4ae1d57d1ad154ddae9db95b0abe86
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Thu May 19 09:09:00 2016 -0700

    Check the HTT bit before counting logical threads
    
    Skip counting logical threads for Intel processors if the HTT bit is 0
    which indicates there is only a single logical processor.
    
    	* sysdeps/x86/cacheinfo.c (init_cacheinfo): Skip counting
    	logical threads if the HTT bit is 0.
    	* sysdeps/x86/cpu-features.h (bit_cpu_HTT): New.
    	(index_cpu_HTT): Likewise.
    	(reg_HTT): Likewise.

diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
index 8408624..1f46d9d 100644
--- a/sysdeps/x86/cacheinfo.c
+++ b/sysdeps/x86/cacheinfo.c
@@ -506,99 +506,105 @@ init_cacheinfo (void)
 	  shared = core;
 	}
 
-      /* Figure out the number of logical threads that share the
-	 highest cache level.  */
-      if (max_cpuid >= 4)
+      /* A value of 0 for the HTT bit indicates there is only a single
+	 logical processor.  */
+      if (HAS_CPU_FEATURE (HTT))
 	{
-	  unsigned int family = GLRO(dl_x86_cpu_features).family;
-	  unsigned int model = GLRO(dl_x86_cpu_features).model;
+	  /* Figure out the number of logical threads that share the
+	     highest cache level.  */
+	  if (max_cpuid >= 4)
+	    {
+	      unsigned int family = GLRO(dl_x86_cpu_features).family;
+	      unsigned int model = GLRO(dl_x86_cpu_features).model;
 
-	  int i = 0;
+	      int i = 0;
 
-	  /* Query until desired cache level is enumerated.  */
-	  do
-	    {
-	      __cpuid_count (4, i++, eax, ebx, ecx, edx);
-
-	      /* There seems to be a bug in at least some Pentium Ds
-		 which sometimes fail to iterate all cache parameters.
-		 Do not loop indefinitely here, stop in this case and
-		 assume there is no such information.  */
-	      if ((eax & 0x1f) == 0)
-		goto intel_bug_no_cache_info;
-	    }
-	  while (((eax >> 5) & 0x7) != level);
+	      /* Query until desired cache level is enumerated.  */
+	      do
+		{
+		  __cpuid_count (4, i++, eax, ebx, ecx, edx);
+
+		  /* There seems to be a bug in at least some Pentium Ds
+		     which sometimes fail to iterate all cache parameters.
+		     Do not loop indefinitely here, stop in this case and
+		     assume there is no such information.  */
+		  if ((eax & 0x1f) == 0)
+		    goto intel_bug_no_cache_info;
+		}
+	      while (((eax >> 5) & 0x7) != level);
 
-	  /* Check if cache is inclusive of lower cache levels.  */
-	  inclusive_cache = (edx & 0x2) != 0;
+	      /* Check if cache is inclusive of lower cache levels.  */
+	      inclusive_cache = (edx & 0x2) != 0;
 
-	  threads = (eax >> 14) & 0x3ff;
+	      threads = (eax >> 14) & 0x3ff;
 
-	  /* If max_cpuid >= 11, THREADS is the maximum number of
-	      addressable IDs for logical processors sharing the
-	      cache, instead of the maximum number of threads
-	      sharing the cache.  */
-	  if (threads && max_cpuid >= 11)
-	    {
-	      /* Find the number of logical processors shipped in
-		 one core and apply count mask.  */
-	      i = 0;
-	      while (1)
+	      /* If max_cpuid >= 11, THREADS is the maximum number of
+		 addressable IDs for logical processors sharing the
+		 cache, instead of the maximum number of threads
+		 sharing the cache.  */
+	      if (threads && max_cpuid >= 11)
 		{
-		  __cpuid_count (11, i++, eax, ebx, ecx, edx);
-
-		  int shipped = ebx & 0xff;
-		  int type = ecx & 0xff0;
-		  if (shipped == 0 || type == 0)
-		    break;
-		  else if (type == 0x200)
+		  /* Find the number of logical processors shipped in
+		     one core and apply count mask.  */
+		  i = 0;
+		  while (1)
 		    {
-		      int count_mask;
-
-		      /* Compute count mask.  */
-		      asm ("bsr %1, %0"
-			   : "=r" (count_mask) : "g" (threads));
-		      count_mask = ~(-1 << (count_mask + 1));
-		      threads = (shipped - 1) & count_mask;
+		      __cpuid_count (11, i++, eax, ebx, ecx, edx);
+
+		      int shipped = ebx & 0xff;
+		      int type = ecx & 0xff0;
+		      if (shipped == 0 || type == 0)
+			break;
+		      else if (type == 0x200)
+			{
+			  int count_mask;
+
+			  /* Compute count mask.  */
+			  asm ("bsr %1, %0"
+			       : "=r" (count_mask) : "g" (threads));
+			  count_mask = ~(-1 << (count_mask + 1));
+			  threads = (shipped - 1) & count_mask;
+			  break;
+			}
+		    }
+		}
+	      threads += 1;
+	      if (threads > 2 && level == 2 && family == 6)
+		{
+		  switch (model)
+		    {
+		    case 0x57:
+		      /* Knights Landing has L2 cache shared by 2 cores.  */
+		    case 0x37:
+		    case 0x4a:
+		    case 0x4d:
+		    case 0x5a:
+		    case 0x5d:
+		      /* Silvermont has L2 cache shared by 2 cores.  */
+		      threads = 2;
+		      break;
+		    default:
 		      break;
 		    }
 		}
 	    }
-	  threads += 1;
-	  if (threads > 2 && level == 2 && family == 6)
+	  else
 	    {
-	      switch (model)
-		{
-		case 0x57:
-		  /* Knights Landing has L2 cache shared by 2 cores.  */
-		case 0x37:
-		case 0x4a:
-		case 0x4d:
-		case 0x5a:
-		case 0x5d:
-		  /* Silvermont has L2 cache shared by 2 cores.  */
-		  threads = 2;
-		  break;
-		default:
-		  break;
-		}
+intel_bug_no_cache_info:
+	      /* Assume that all logical threads share the highest cache
+		 level.  */
+
+	      threads
+		= ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
+		    >> 16) & 0xff);
 	    }
-	}
-      else
-	{
-	intel_bug_no_cache_info:
-	  /* Assume that all logical threads share the highest cache level.  */
 
-	  threads
-	    = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
-		>> 16) & 0xff);
+	  /* Cap usage of highest cache level to the number of supported
+	     threads.  */
+	  if (shared > 0 && threads > 0)
+	    shared /= threads;
 	}
 
-      /* Cap usage of highest cache level to the number of supported
-	 threads.  */
-      if (shared > 0 && threads > 0)
-	shared /= threads;
-
       /* Account for non-inclusive L2 and L3 caches.  */
       if (level == 3 && !inclusive_cache)
 	shared += core;
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 9529d61..2bd9371 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -51,6 +51,7 @@
 #define bit_cpu_POPCOUNT	(1 << 23)
 #define bit_cpu_FMA		(1 << 12)
 #define bit_cpu_FMA4		(1 << 16)
+#define bit_cpu_HTT		(1 << 28)
 
 /* COMMON_CPUID_INDEX_7.  */
 #define bit_cpu_ERMS		(1 << 9)
@@ -235,6 +236,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_cpu_FMA4		COMMON_CPUID_INDEX_80000001
 # define index_cpu_POPCOUNT	COMMON_CPUID_INDEX_1
 # define index_cpu_OSXSAVE	COMMON_CPUID_INDEX_1
+# define index_cpu_HTT		COMMON_CPUID_INDEX_1
 
 # define reg_CX8		edx
 # define reg_CMOV		edx
@@ -252,6 +254,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define reg_FMA4		ecx
 # define reg_POPCOUNT		ecx
 # define reg_OSXSAVE		ecx
+# define reg_HTT		edx
 
 # define index_arch_Fast_Rep_String	FEATURE_INDEX_1
 # define index_arch_Fast_Copy_Backward	FEATURE_INDEX_1

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=5531ff0e927969eaf2f8fab12912e1a3a023ebc9

commit 5531ff0e927969eaf2f8fab12912e1a3a023ebc9
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Thu May 19 08:49:45 2016 -0700

    Remove alignments on jump targets in memset
    
    X86-64 memset-vec-unaligned-erms.S aligns many jump targets, which
    increases code sizes, but not necessarily improve performance.  As
    memset benchtest data of align vs no align on various Intel and AMD
    processors
    
    https://sourceware.org/bugzilla/attachment.cgi?id=9277
    
    shows that aligning jump targets isn't necessary.
    
    	[BZ #20115]
    	* sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S (__memset):
    	Remove alignments on jump targets.

diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
index 89f6886..28e71fd 100644
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
@@ -18,12 +18,10 @@
 
 /* memset is implemented as:
    1. Use overlapping store to avoid branch.
-   2. Force 32-bit displacement for branches to avoid long nop between
-      instructions.
-   3. If size is less than VEC, use integer register stores.
-   4. If size is from VEC_SIZE to 2 * VEC_SIZE, use 2 VEC stores.
-   5. If size is from 2 * VEC_SIZE to 4 * VEC_SIZE, use 4 VEC stores.
-   6. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
+   2. If size is less than VEC, use integer register stores.
+   3. If size is from VEC_SIZE to 2 * VEC_SIZE, use 2 VEC stores.
+   4. If size is from 2 * VEC_SIZE to 4 * VEC_SIZE, use 4 VEC stores.
+   5. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
       4 VEC stores and store 4 * VEC at a time until done.  */
 
 #include <sysdep.h>
@@ -143,14 +141,10 @@ ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
 	VZEROUPPER
 	ret
 
-	.p2align 4
 L(stosb_more_2x_vec):
 	cmpq	$REP_STOSB_THRESHOLD, %rdx
-	/* Force 32-bit displacement to avoid long nop between
-	   instructions.  */
-	ja.d32	L(stosb)
+	ja	L(stosb)
 #endif
-	.p2align 4
 L(more_2x_vec):
 	cmpq  $(VEC_SIZE * 4), %rdx
 	ja	L(loop_start)
@@ -162,26 +156,12 @@ L(return):
 	VZEROUPPER
 	ret
 
-	.p2align 4
 L(loop_start):
 	leaq	(VEC_SIZE * 4)(%rdi), %rcx
-# if VEC_SIZE == 32 || VEC_SIZE == 64
-	/* Force 32-bit displacement to avoid long nop between
-	   instructions.  */
-	VMOVU.d32 %VEC(0), (%rdi)
-# else
 	VMOVU	%VEC(0), (%rdi)
-# endif
 	andq	$-(VEC_SIZE * 4), %rcx
-# if VEC_SIZE == 32
-	/* Force 32-bit displacement to avoid long nop between
-	   instructions.  */
-	VMOVU.d32 %VEC(0), -VEC_SIZE(%rdi,%rdx)
-	VMOVU.d32 %VEC(0), VEC_SIZE(%rdi)
-# else
 	VMOVU	%VEC(0), -VEC_SIZE(%rdi,%rdx)
 	VMOVU	%VEC(0), VEC_SIZE(%rdi)
-# endif
 	VMOVU	%VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx)
 	VMOVU	%VEC(0), (VEC_SIZE * 2)(%rdi)
 	VMOVU	%VEC(0), -(VEC_SIZE * 3)(%rdi,%rdx)
@@ -190,14 +170,7 @@ L(loop_start):
 	addq	%rdi, %rdx
 	andq	$-(VEC_SIZE * 4), %rdx
 	cmpq	%rdx, %rcx
-# if VEC_SIZE == 32 || VEC_SIZE == 64
-	/* Force 32-bit displacement to avoid long nop between
-	   instructions.  */
-	je.d32	L(return)
-# else
 	je	L(return)
-# endif
-	.p2align 4
 L(loop):
 	VMOVA	%VEC(0), (%rcx)
 	VMOVA	%VEC(0), VEC_SIZE(%rcx)

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=2af76a7db8bc8eafccf3c78ec503283aa5249277

commit 2af76a7db8bc8eafccf3c78ec503283aa5249277
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri May 13 08:29:22 2016 -0700

    Call init_cpu_features only if SHARED is defined
    
    In static executable, since init_cpu_features is called early from
    __libc_start_main, there is no need to call it again in dl_platform_init.
    
    	[BZ #20072]
    	* sysdeps/i386/dl-machine.h (dl_platform_init): Call
    	init_cpu_features only if SHARED is defined.
    	* sysdeps/x86_64/dl-machine.h (dl_platform_init): Likewise.

diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
index 130bcf5..4e3968a 100644
--- a/sysdeps/i386/dl-machine.h
+++ b/sysdeps/i386/dl-machine.h
@@ -237,7 +237,11 @@ dl_platform_init (void)
     /* Avoid an empty string which would disturb us.  */
     GLRO(dl_platform) = NULL;
 
+#ifdef SHARED
+  /* init_cpu_features has been called early from __libc_start_main in
+     static executable.  */
   init_cpu_features (&GLRO(dl_x86_cpu_features));
+#endif
 }
 
 static inline Elf32_Addr
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index 980ca73..ed0c1a8 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -224,7 +224,11 @@ dl_platform_init (void)
     /* Avoid an empty string which would disturb us.  */
     GLRO(dl_platform) = NULL;
 
+#ifdef SHARED
+  /* init_cpu_features has been called early from __libc_start_main in
+     static executable.  */
   init_cpu_features (&GLRO(dl_x86_cpu_features));
+#endif
 }
 
 static inline ElfW(Addr)

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=9c7a58e7f94cae195e85003b21c6bb7d2a0ec9a5

commit 9c7a58e7f94cae195e85003b21c6bb7d2a0ec9a5
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri May 13 07:18:25 2016 -0700

    Support non-inclusive caches on Intel processors
    
    	* sysdeps/x86/cacheinfo.c (init_cacheinfo): Check and support
    	non-inclusive caches on Intel processors.

diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
index 143b333..8408624 100644
--- a/sysdeps/x86/cacheinfo.c
+++ b/sysdeps/x86/cacheinfo.c
@@ -492,6 +492,9 @@ init_cacheinfo (void)
     {
       data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
 
+      long int core = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
+      bool inclusive_cache = true;
+
       /* Try L3 first.  */
       level  = 3;
       shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
@@ -500,7 +503,7 @@ init_cacheinfo (void)
 	{
 	  /* Try L2 otherwise.  */
 	  level  = 2;
-	  shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
+	  shared = core;
 	}
 
       /* Figure out the number of logical threads that share the
@@ -526,6 +529,9 @@ init_cacheinfo (void)
 	    }
 	  while (((eax >> 5) & 0x7) != level);
 
+	  /* Check if cache is inclusive of lower cache levels.  */
+	  inclusive_cache = (edx & 0x2) != 0;
+
 	  threads = (eax >> 14) & 0x3ff;
 
 	  /* If max_cpuid >= 11, THREADS is the maximum number of
@@ -592,6 +598,10 @@ init_cacheinfo (void)
 	 threads.  */
       if (shared > 0 && threads > 0)
 	shared /= threads;
+
+      /* Account for non-inclusive L2 and L3 caches.  */
+      if (level == 3 && !inclusive_cache)
+	shared += core;
     }
   /* This spells out "AuthenticAMD".  */
   else if (is_amd)

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=9faebb60d146ade176ca5ad63e34ce67bacbec21

commit 9faebb60d146ade176ca5ad63e34ce67bacbec21
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Wed May 11 05:49:09 2016 -0700

    Remove x86 ifunc-defines.sym and rtld-global-offsets.sym
    
    Merge x86 ifunc-defines.sym with x86 cpu-features-offsets.sym.  Remove
    x86 ifunc-defines.sym and rtld-global-offsets.sym.  No code changes on
    i686 and x86-64.
    
    	* sysdeps/i386/i686/multiarch/Makefile (gen-as-const-headers):
    	Remove ifunc-defines.sym.
    	* sysdeps/x86_64/multiarch/Makefile (gen-as-const-headers):
    	Likewise.
    	* sysdeps/i386/i686/multiarch/ifunc-defines.sym: Removed.
    	* sysdeps/x86/rtld-global-offsets.sym: Likewise.
    	* sysdeps/x86_64/multiarch/ifunc-defines.sym: Likewise.
    	* sysdeps/x86/Makefile (gen-as-const-headers): Remove
    	rtld-global-offsets.sym.
    	* sysdeps/x86_64/multiarch/ifunc-defines.sym: Merged with ...
    	* sysdeps/x86/cpu-features-offsets.sym: This.
    	* sysdeps/x86/cpu-features.h: Include <cpu-features-offsets.h>
    	instead of <ifunc-defines.h> and <rtld-global-offsets.h>.

diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index 700010d..4a0c20c 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -1,6 +1,5 @@
 ifeq ($(subdir),csu)
 tests += test-multiarch
-gen-as-const-headers += ifunc-defines.sym
 endif
 
 ifeq ($(subdir),string)
diff --git a/sysdeps/i386/i686/multiarch/ifunc-defines.sym b/sysdeps/i386/i686/multiarch/ifunc-defines.sym
deleted file mode 100644
index 96e9cfa..0000000
--- a/sysdeps/i386/i686/multiarch/ifunc-defines.sym
+++ /dev/null
@@ -1,19 +0,0 @@
-#include "init-arch.h"
-#include <stddef.h>
-
---
-
-CPU_FEATURES_SIZE	sizeof (struct cpu_features)
-CPUID_OFFSET		offsetof (struct cpu_features, cpuid)
-CPUID_SIZE		sizeof (struct cpuid_registers)
-CPUID_EAX_OFFSET	offsetof (struct cpuid_registers, eax)
-CPUID_EBX_OFFSET	offsetof (struct cpuid_registers, ebx)
-CPUID_ECX_OFFSET	offsetof (struct cpuid_registers, ecx)
-CPUID_EDX_OFFSET	offsetof (struct cpuid_registers, edx)
-FAMILY_OFFSET		offsetof (struct cpu_features, family)
-MODEL_OFFSET		offsetof (struct cpu_features, model)
-FEATURE_OFFSET		offsetof (struct cpu_features, feature)
-FEATURE_SIZE		sizeof (unsigned int)
-
-COMMON_CPUID_INDEX_1
-FEATURE_INDEX_1
diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
index 0de4f42..0d0326c 100644
--- a/sysdeps/x86/Makefile
+++ b/sysdeps/x86/Makefile
@@ -1,5 +1,5 @@
 ifeq ($(subdir),csu)
-gen-as-const-headers += cpu-features-offsets.sym rtld-global-offsets.sym
+gen-as-const-headers += cpu-features-offsets.sym
 endif
 
 ifeq ($(subdir),elf)
diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym
index a9d53d1..f6739fa 100644
--- a/sysdeps/x86/cpu-features-offsets.sym
+++ b/sysdeps/x86/cpu-features-offsets.sym
@@ -5,3 +5,19 @@
 #define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem)
 
 RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features)
+
+CPU_FEATURES_SIZE	sizeof (struct cpu_features)
+CPUID_OFFSET		offsetof (struct cpu_features, cpuid)
+CPUID_SIZE		sizeof (struct cpuid_registers)
+CPUID_EAX_OFFSET	offsetof (struct cpuid_registers, eax)
+CPUID_EBX_OFFSET	offsetof (struct cpuid_registers, ebx)
+CPUID_ECX_OFFSET	offsetof (struct cpuid_registers, ecx)
+CPUID_EDX_OFFSET	offsetof (struct cpuid_registers, edx)
+FAMILY_OFFSET		offsetof (struct cpu_features, family)
+MODEL_OFFSET		offsetof (struct cpu_features, model)
+FEATURE_OFFSET		offsetof (struct cpu_features, feature)
+FEATURE_SIZE		sizeof (unsigned int)
+
+COMMON_CPUID_INDEX_1
+COMMON_CPUID_INDEX_7
+FEATURE_INDEX_1
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 8f946c4..9529d61 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -74,8 +74,7 @@
 
 #ifdef	__ASSEMBLER__
 
-# include <ifunc-defines.h>
-# include <rtld-global-offsets.h>
+# include <cpu-features-offsets.h>
 
 # define index_cpu_CX8	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
 # define index_cpu_CMOV	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
diff --git a/sysdeps/x86/rtld-global-offsets.sym b/sysdeps/x86/rtld-global-offsets.sym
deleted file mode 100644
index a9d53d1..0000000
--- a/sysdeps/x86/rtld-global-offsets.sym
+++ /dev/null
@@ -1,7 +0,0 @@
-#define SHARED 1
-
-#include <ldsodefs.h>
-
-#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem)
-
-RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features)
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index d6636cb..3736f54 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -1,6 +1,5 @@
 ifeq ($(subdir),csu)
 tests += test-multiarch
-gen-as-const-headers += ifunc-defines.sym
 endif
 
 ifeq ($(subdir),string)
diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym
deleted file mode 100644
index 3df946f..0000000
--- a/sysdeps/x86_64/multiarch/ifunc-defines.sym
+++ /dev/null
@@ -1,20 +0,0 @@
-#include "init-arch.h"
-#include <stddef.h>
-
---
-
-CPU_FEATURES_SIZE	sizeof (struct cpu_features)
-CPUID_OFFSET		offsetof (struct cpu_features, cpuid)
-CPUID_SIZE		sizeof (struct cpuid_registers)
-CPUID_EAX_OFFSET	offsetof (struct cpuid_registers, eax)
-CPUID_EBX_OFFSET	offsetof (struct cpuid_registers, ebx)
-CPUID_ECX_OFFSET	offsetof (struct cpuid_registers, ecx)
-CPUID_EDX_OFFSET	offsetof (struct cpuid_registers, edx)
-FAMILY_OFFSET		offsetof (struct cpu_features, family)
-MODEL_OFFSET		offsetof (struct cpu_features, model)
-FEATURE_OFFSET		offsetof (struct cpu_features, feature)
-FEATURE_SIZE		sizeof (unsigned int)
-
-COMMON_CPUID_INDEX_1
-COMMON_CPUID_INDEX_7
-FEATURE_INDEX_1

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=a29574292ac84a9882553aee676f0f5c73b064ed

commit a29574292ac84a9882553aee676f0f5c73b064ed
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Sun May 8 08:49:02 2016 -0700

    Move sysdeps/x86_64/cacheinfo.c to sysdeps/x86
    
    Move sysdeps/x86_64/cacheinfo.c to sysdeps/x86.  No code changes on x86
    and x86_64.
    
    	* sysdeps/i386/cacheinfo.c: Include <sysdeps/x86/cacheinfo.c>
    	instead of <sysdeps/x86_64/cacheinfo.c>.
    	* sysdeps/x86_64/cacheinfo.c: Moved to ...
    	* sysdeps/x86/cacheinfo.c: Here.

diff --git a/sysdeps/i386/cacheinfo.c b/sysdeps/i386/cacheinfo.c
index 0b50c6d..f15fe07 100644
--- a/sysdeps/i386/cacheinfo.c
+++ b/sysdeps/i386/cacheinfo.c
@@ -1,3 +1,3 @@
 #define DISABLE_PREFETCHW
 
-#include <sysdeps/x86_64/cacheinfo.c>
+#include <sysdeps/x86/cacheinfo.c>
diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86/cacheinfo.c
similarity index 100%
rename from sysdeps/x86_64/cacheinfo.c
rename to sysdeps/x86/cacheinfo.c

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=601a5ed0cfe7ec825664ccd8112918f879eeeebc

commit 601a5ed0cfe7ec825664ccd8112918f879eeeebc
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Apr 15 05:22:53 2016 -0700

    Detect Intel Goldmont and Airmont processors
    
    Updated from the model numbers of Goldmont and Airmont processors in
    Intel64 And IA-32 Processor Architectures Software Developer's Manual
    Volume 3 Revision 058.
    
    	* sysdeps/x86/cpu-features.c (init_cpu_features): Detect Intel
    	Goldmont and Airmont processors.

diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 963b845..a5fa81f 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -140,6 +140,14 @@ init_cpu_features (struct cpu_features *cpu_features)
 	      cpu_features->feature[index_arch_Prefer_No_VZEROUPPER]
 		|= bit_arch_Prefer_No_VZEROUPPER;
 
+	    case 0x5c:
+	    case 0x5f:
+	      /* Unaligned load versions are faster than SSSE3
+		 on Goldmont.  */
+
+	    case 0x4c:
+	      /* Airmont is a die shrink of Silvermont.  */
+
 	    case 0x37:
 	    case 0x4a:
 	    case 0x4d:

-----------------------------------------------------------------------

Summary of changes:
 sysdeps/i386/cacheinfo.c                           |    2 +-
 sysdeps/i386/dl-machine.h                          |    4 +
 sysdeps/i386/i686/multiarch/Makefile               |    1 -
 sysdeps/i386/i686/multiarch/ifunc-defines.sym      |   19 --
 sysdeps/x86/Makefile                               |    2 +-
 sysdeps/{x86_64 => x86}/cacheinfo.c                |  244 ++++++++++++++------
 sysdeps/x86/cpu-features-offsets.sym               |   16 ++
 sysdeps/x86/cpu-features.c                         |    8 +
 sysdeps/x86/cpu-features.h                         |    6 +-
 sysdeps/x86/rtld-global-offsets.sym                |    7 -
 sysdeps/x86_64/dl-machine.h                        |    4 +
 sysdeps/x86_64/multiarch/Makefile                  |    1 -
 sysdeps/x86_64/multiarch/ifunc-defines.sym         |   20 --
 .../x86_64/multiarch/memset-vec-unaligned-erms.S   |   37 +---
 14 files changed, 213 insertions(+), 158 deletions(-)
 delete mode 100644 sysdeps/i386/i686/multiarch/ifunc-defines.sym
 rename sysdeps/{x86_64 => x86}/cacheinfo.c (79%)
 delete mode 100644 sysdeps/x86/rtld-global-offsets.sym
 delete mode 100644 sysdeps/x86_64/multiarch/ifunc-defines.sym


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]