This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch master updated. glibc-2.21-474-gc71c89e


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  c71c89e5c72baf43fd44d08dda8ab846eec5b1d6 (commit)
       via  08325735c2efb0257b8c07ac0ff91e44c27ecbf8 (commit)
      from  2a8c2c7b335ed07f63c246077fa672d8eaed23e4 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=c71c89e5c72baf43fd44d08dda8ab846eec5b1d6

commit c71c89e5c72baf43fd44d08dda8ab846eec5b1d6
Author: Szabolcs Nagy <nsz@port70.net>
Date:   Wed Jun 17 12:44:53 2015 +0100

    [AArch64] Fix cfi_adjust_cfa_offset usage in dl-tlsdesc.S
    
    Some of the cfi annotations used incorrect sign.
    
    	* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Fix
    	cfi_adjust_cfa_offset argument.
    	(_dl_tlsdesc_undefweak, _dl_tlsdesc_dynamic): Likewise.
    	(_dl_tlsdesc_resolve_rela, _dl_tlsdesc_resolve_hold): Likewise.

diff --git a/ChangeLog b/ChangeLog
index 1d4a376..aedec5a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2015-06-17  Szabolcs Nagy  <szabolcs.nagy@arm.com>
 
+	* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Fix
+	cfi_adjust_cfa_offset argument.
+	(_dl_tlsdesc_undefweak, _dl_tlsdesc_dynamic): Likewise.
+	(_dl_tlsdesc_resolve_rela, _dl_tlsdesc_resolve_hold): Likewise.
+
 	[BZ #18034]
 	* sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_return_lazy): Declare.
 	* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Define.
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index c7adf79..64719eb 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -118,7 +118,7 @@ _dl_tlsdesc_return_lazy:
 	.align  2
 _dl_tlsdesc_undefweak:
 	str	x1, [sp, #-16]!
-	cfi_adjust_cfa_offset(16)
+	cfi_adjust_cfa_offset (16)
 	/* The ldar here happens after the load from [x0] at the call site
 	   (that is generated by the compiler as part of the TLS access ABI),
 	   so it reads the same value (this function is the final value of
@@ -130,7 +130,7 @@ _dl_tlsdesc_undefweak:
 	mrs	x1, tpidr_el0
 	sub	x0, x0, x1
 	ldr	x1, [sp], #16
-	cfi_adjust_cfa_offset(16)
+	cfi_adjust_cfa_offset (-16)
 	RET
 	cfi_endproc
 	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
@@ -208,7 +208,7 @@ _dl_tlsdesc_dynamic:
 	ldp	 x3,  x4, [sp, #32+16*1]
 
 	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
-	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
+	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
 # undef NSAVEXREGPAIRS
 	RET
 2:
@@ -297,7 +297,7 @@ _dl_tlsdesc_resolve_rela:
 	ldp	x15, x16, [sp, #32+16*6]
 	ldp	x17, x18, [sp, #32+16*7]
 	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
-	cfi_adjust_cfa_offset (-32+16*NSAVEXREGPAIRS)
+	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
 	ldp	x2, x3, [sp], #16
 	cfi_adjust_cfa_offset (-16)
 	RET
@@ -359,7 +359,7 @@ _dl_tlsdesc_resolve_hold:
 	ldp	x15, x16, [sp, #32+16*7]
 	ldp	x17, x18, [sp, #32+16*8]
 	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
-	cfi_adjust_cfa_offset (-32+16*NSAVEXREGPAIRS)
+	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
 	RET
 	cfi_endproc
 	.size	_dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=08325735c2efb0257b8c07ac0ff91e44c27ecbf8

commit 08325735c2efb0257b8c07ac0ff91e44c27ecbf8
Author: Szabolcs Nagy <nsz@port70.net>
Date:   Wed Jun 17 12:37:49 2015 +0100

    [BZ 18034][AArch64] Lazy TLSDESC relocation data race fix
    
    Lazy TLSDESC initialization needs to be synchronized with concurrent TLS
    accesses.  The TLS descriptor contains a function pointer (entry) and an
    argument that is accessed from the entry function.  With lazy initialization
    the first call to the entry function updates the entry and the argument to
    their final value.  A final entry function must make sure that it accesses an
    initialized argument, this needs synchronization on systems with weak memory
    ordering otherwise the writes of the first call can be observed out of order.
    
    There are at least two issues with the current code:
    
    tlsdesc.c (i386, x86_64, arm, aarch64) uses volatile memory accesses on the
    write side (in the initial entry function) instead of C11 atomics.
    
    And on systems with weak memory ordering (arm, aarch64) the read side
    synchronization is missing from the final entry functions (dl-tlsdesc.S).
    
    This patch only deals with aarch64.
    
    * Write side:
    
    Volatile accesses were replaced with C11 relaxed atomics, and a release
    store was used for the initialization of entry so the read side can
    synchronize with it.
    
    * Read side:
    
    TLS access generated by the compiler and an entry function code is roughly
    
      ldr x1, [x0]    // load the entry
      blr x1          // call it
    
    entryfunc:
      ldr x0, [x0,#8] // load the arg
      ret
    
    Various alternatives were considered to force the ordering in the entry
    function between the two loads:
    
    (1) barrier
    
    entryfunc:
      dmb ishld
      ldr x0, [x0,#8]
    
    (2) address dependency (if the address of the second load depends on the
    result of the first one the ordering is guaranteed):
    
    entryfunc:
      ldr x1,[x0]
      and x1,x1,#8
      orr x1,x1,#8
      ldr x0,[x0,x1]
    
    (3) load-acquire (ARMv8 instruction that is ordered before subsequent
    loads and stores)
    
    entryfunc:
      ldar xzr,[x0]
      ldr x0,[x0,#8]
    
    Option (1) is the simplest but slowest (note: this runs at every TLS
    access), options (2) and (3) do one extra load from [x0] (same address
    loads are ordered so it happens-after the load on the call site),
    option (2) clobbers x1 which is problematic because existing gcc does
    not expect that, so approach (3) was chosen.
    
    A new _dl_tlsdesc_return_lazy entry function was introduced for lazily
    relocated static TLS, so non-lazy static TLS can avoid the synchronization
    cost.
    
    	[BZ #18034]
    	* sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_return_lazy): Declare.
    	* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Define.
    	(_dl_tlsdesc_undefweak): Guarantee TLSDESC entry and argument load-load
    	ordering using ldar.
    	(_dl_tlsdesc_dynamic): Likewise.
    	(_dl_tlsdesc_return_lazy): Likewise.
    	* sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_resolve_rela_fixup): Use
    	relaxed atomics instead of volatile and synchronize with release store.
    	(_dl_tlsdesc_resolve_hold_fixup): Use relaxed atomics instead of
    	volatile.
    	* elf/tlsdeschtab.h (_dl_tlsdesc_resolve_early_return_p): Likewise.

diff --git a/ChangeLog b/ChangeLog
index 5e93d9e..1d4a376 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2015-06-17  Szabolcs Nagy  <szabolcs.nagy@arm.com>
+
+	[BZ #18034]
+	* sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_return_lazy): Declare.
+	* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Define.
+	(_dl_tlsdesc_undefweak): Guarantee TLSDESC entry and argument load-load
+	ordering using ldar.
+	(_dl_tlsdesc_dynamic): Likewise.
+	(_dl_tlsdesc_return_lazy): Likewise.
+	* sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_resolve_rela_fixup): Use
+	relaxed atomics instead of volatile and synchronize with release store.
+	(_dl_tlsdesc_resolve_hold_fixup): Use relaxed atomics instead of
+	volatile.
+	* elf/tlsdeschtab.h (_dl_tlsdesc_resolve_early_return_p): Likewise.
+
 2015-06-15  Andrew Senkevich  <andrew.senkevich@intel.com>
 
 	* sysdeps/unix/sysv/linux/x86_64/libmvec.abilist: New symbols added.
diff --git a/NEWS b/NEWS
index 33cba7b..b215276 100644
--- a/NEWS
+++ b/NEWS
@@ -15,13 +15,13 @@ Version 2.22
   17581, 17588, 17596, 17620, 17621, 17628, 17631, 17692, 17711, 17715,
   17776, 17779, 17792, 17836, 17912, 17916, 17930, 17932, 17944, 17949,
   17964, 17965, 17967, 17969, 17978, 17987, 17991, 17996, 17998, 17999,
-  18007, 18019, 18020, 18029, 18030, 18032, 18036, 18038, 18039, 18042,
-  18043, 18046, 18047, 18049, 18068, 18080, 18093, 18100, 18104, 18110,
-  18111, 18116, 18125, 18128, 18138, 18185, 18196, 18197, 18206, 18210,
-  18211, 18217, 18220, 18221, 18234, 18244, 18247, 18287, 18319, 18324,
-  18333, 18346, 18397, 18409, 18410, 18412, 18418, 18422, 18434, 18444,
-  18468, 18469, 18470, 18479, 18483, 18495, 18496, 18497, 18498, 18507,
-  18512, 18519, 18520, 18522, 18527, 18528, 18529, 18530.
+  18007, 18019, 18020, 18029, 18030, 18032, 18034, 18036, 18038, 18039,
+  18042, 18043, 18046, 18047, 18049, 18068, 18080, 18093, 18100, 18104,
+  18110, 18111, 18116, 18125, 18128, 18138, 18185, 18196, 18197, 18206,
+  18210, 18211, 18217, 18220, 18221, 18234, 18244, 18247, 18287, 18319,
+  18324, 18333, 18346, 18397, 18409, 18410, 18412, 18418, 18422, 18434,
+  18444, 18468, 18469, 18470, 18479, 18483, 18495, 18496, 18497, 18498,
+  18507, 18512, 18519, 18520, 18522, 18527, 18528, 18529, 18530.
 
 * Cache information can be queried via sysconf() function on s390 e.g. with
   _SC_LEVEL1_ICACHE_SIZE as argument.
diff --git a/elf/tlsdeschtab.h b/elf/tlsdeschtab.h
index d13b4e5..fb0eb88 100644
--- a/elf/tlsdeschtab.h
+++ b/elf/tlsdeschtab.h
@@ -20,6 +20,8 @@
 #ifndef TLSDESCHTAB_H
 # define TLSDESCHTAB_H 1
 
+#include <atomic.h>
+
 # ifdef SHARED
 
 #  include <inline-hashtab.h>
@@ -138,17 +140,17 @@ _dl_make_tlsdesc_dynamic (struct link_map *map, size_t ti_offset)
 static int
 _dl_tlsdesc_resolve_early_return_p (struct tlsdesc volatile *td, void *caller)
 {
-  if (caller != td->entry)
+  if (caller != atomic_load_relaxed (&td->entry))
     return 1;
 
   __rtld_lock_lock_recursive (GL(dl_load_lock));
-  if (caller != td->entry)
+  if (caller != atomic_load_relaxed (&td->entry))
     {
       __rtld_lock_unlock_recursive (GL(dl_load_lock));
       return 1;
     }
 
-  td->entry = _dl_tlsdesc_resolve_hold;
+  atomic_store_relaxed (&td->entry, _dl_tlsdesc_resolve_hold);
 
   return 0;
 }
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index be9b9b3..c7adf79 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -79,6 +79,29 @@ _dl_tlsdesc_return:
 	cfi_endproc
 	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
 
+	/* Same as _dl_tlsdesc_return but with synchronization for
+	   lazy relocation.
+	   Prototype:
+	   _dl_tlsdesc_return_lazy (tlsdesc *) ;
+	 */
+	.hidden _dl_tlsdesc_return_lazy
+	.global	_dl_tlsdesc_return_lazy
+	.type	_dl_tlsdesc_return_lazy,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_return_lazy:
+	/* The ldar here happens after the load from [x0] at the call site
+	   (that is generated by the compiler as part of the TLS access ABI),
+	   so it reads the same value (this function is the final value of
+	   td->entry) and thus it synchronizes with the release store to
+	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
+	   from [x0,#8] here happens after the initialization of td->arg.  */
+	ldar	xzr, [x0]
+	ldr	x0, [x0, #8]
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy
+
 	/* Handler for undefined weak TLS symbols.
 	   Prototype:
 	   _dl_tlsdesc_undefweak (tlsdesc *);
@@ -96,6 +119,13 @@ _dl_tlsdesc_return:
 _dl_tlsdesc_undefweak:
 	str	x1, [sp, #-16]!
 	cfi_adjust_cfa_offset(16)
+	/* The ldar here happens after the load from [x0] at the call site
+	   (that is generated by the compiler as part of the TLS access ABI),
+	   so it reads the same value (this function is the final value of
+	   td->entry) and thus it synchronizes with the release store to
+	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
+	   from [x0,#8] here happens after the initialization of td->arg.  */
+	ldar	xzr, [x0]
 	ldr	x0, [x0, #8]
 	mrs	x1, tpidr_el0
 	sub	x0, x0, x1
@@ -152,6 +182,13 @@ _dl_tlsdesc_dynamic:
 	stp	x3,  x4, [sp, #32+16*1]
 
 	mrs	x4, tpidr_el0
+	/* The ldar here happens after the load from [x0] at the call site
+	   (that is generated by the compiler as part of the TLS access ABI),
+	   so it reads the same value (this function is the final value of
+	   td->entry) and thus it synchronizes with the release store to
+	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
+	   from [x0,#8] here happens after the initialization of td->arg.  */
+	ldar	xzr, [x0]
 	ldr	x1, [x0,#8]
 	ldr	x0, [x4]
 	ldr	x3, [x1,#16]
diff --git a/sysdeps/aarch64/dl-tlsdesc.h b/sysdeps/aarch64/dl-tlsdesc.h
index 7a1285e..e6c0078 100644
--- a/sysdeps/aarch64/dl-tlsdesc.h
+++ b/sysdeps/aarch64/dl-tlsdesc.h
@@ -46,6 +46,9 @@ extern ptrdiff_t attribute_hidden
 _dl_tlsdesc_return (struct tlsdesc *);
 
 extern ptrdiff_t attribute_hidden
+_dl_tlsdesc_return_lazy (struct tlsdesc *);
+
+extern ptrdiff_t attribute_hidden
 _dl_tlsdesc_undefweak (struct tlsdesc *);
 
 extern ptrdiff_t attribute_hidden
diff --git a/sysdeps/aarch64/tlsdesc.c b/sysdeps/aarch64/tlsdesc.c
index 4821f8c..9f3ff9b 100644
--- a/sysdeps/aarch64/tlsdesc.c
+++ b/sysdeps/aarch64/tlsdesc.c
@@ -25,6 +25,7 @@
 #include <dl-tlsdesc.h>
 #include <dl-unmap-segments.h>
 #include <tlsdeschtab.h>
+#include <atomic.h>
 
 /* The following functions take an entry_check_offset argument.  It's
    computed by the caller as an offset between its entry point and the
@@ -39,11 +40,15 @@
 
 void
 attribute_hidden
-_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
-				struct link_map *l)
+_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc *td, struct link_map *l)
 {
-  const ElfW(Rela) *reloc = td->arg;
+  const ElfW(Rela) *reloc = atomic_load_relaxed (&td->arg);
 
+  /* After GL(dl_load_lock) is grabbed only one caller can see td->entry in
+     initial state in _dl_tlsdesc_resolve_early_return_p, other concurrent
+     callers will return and retry calling td->entry.  The updated td->entry
+     synchronizes with the single writer so all read accesses here can use
+     relaxed order.  */
   if (_dl_tlsdesc_resolve_early_return_p
       (td, (void*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_PLT)]) + l->l_addr)))
     return;
@@ -86,8 +91,10 @@ _dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
 
   if (!sym)
     {
-      td->arg = (void*) reloc->r_addend;
-      td->entry = _dl_tlsdesc_undefweak;
+      atomic_store_relaxed (&td->arg, (void *) reloc->r_addend);
+      /* This release store synchronizes with the ldar acquire load
+	 instruction in _dl_tlsdesc_undefweak.  */
+      atomic_store_release (&td->entry, _dl_tlsdesc_undefweak);
     }
   else
     {
@@ -96,16 +103,22 @@ _dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
 #  else
       if (!TRY_STATIC_TLS (l, result))
 	{
-	  td->arg = _dl_make_tlsdesc_dynamic (result, sym->st_value
+	  void *p = _dl_make_tlsdesc_dynamic (result, sym->st_value
 					      + reloc->r_addend);
-	  td->entry = _dl_tlsdesc_dynamic;
+	  atomic_store_relaxed (&td->arg, p);
+	  /* This release store synchronizes with the ldar acquire load
+	     instruction in _dl_tlsdesc_dynamic.  */
+	  atomic_store_release (&td->entry, _dl_tlsdesc_dynamic);
 	}
       else
 #  endif
 	{
-	  td->arg = (void*) (sym->st_value + result->l_tls_offset
+	  void *p = (void*) (sym->st_value + result->l_tls_offset
 			     + reloc->r_addend);
-	  td->entry = _dl_tlsdesc_return;
+	  atomic_store_relaxed (&td->arg, p);
+	  /* This release store synchronizes with the ldar acquire load
+	     instruction in _dl_tlsdesc_return_lazy.  */
+	  atomic_store_release (&td->entry, _dl_tlsdesc_return_lazy);
 	}
     }
 
@@ -120,11 +133,10 @@ _dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
 
 void
 attribute_hidden
-_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc volatile *td,
-				void *caller)
+_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc *td, void *caller)
 {
   /* Maybe we're lucky and can return early.  */
-  if (caller != td->entry)
+  if (caller != atomic_load_relaxed (&td->entry))
     return;
 
   /* Locking here will stop execution until the running resolver runs

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                    |   20 +++++++++++++++++
 NEWS                         |   14 ++++++------
 elf/tlsdeschtab.h            |    8 ++++--
 sysdeps/aarch64/dl-tlsdesc.S |   47 +++++++++++++++++++++++++++++++++++++----
 sysdeps/aarch64/dl-tlsdesc.h |    3 ++
 sysdeps/aarch64/tlsdesc.c    |   36 +++++++++++++++++++++----------
 6 files changed, 101 insertions(+), 27 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]