This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch master updated. glibc-2.26.9000-716-g659ca26


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  659ca267360e1c1f64eea9205bb81cb5e9049908 (commit)
       via  94d2f0af157d8c02e688a315039f8044aedbcc89 (commit)
       via  28e1ddf340e205cd8e1eff58e92957318c6e9966 (commit)
       via  0ca3d1d6d096e222346c74601d50e9013c8bb25d (commit)
       via  2c1d4e5fe4e722e0b747d6bddd7ce3a6b1766c52 (commit)
       via  91c5a366d8d398d2fc4542f961c93058a92ade6f (commit)
       via  b7cf203b5c17dd6d9878537d41e0c7cc3d270a67 (commit)
       via  0cc5b022f817eeaa81735ae58717b5dabae92941 (commit)
      from  43ddff2e364c69847f5f698f6a43f9dde328b76a (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=659ca267360e1c1f64eea9205bb81cb5e9049908

commit 659ca267360e1c1f64eea9205bb81cb5e9049908
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Tue Oct 24 17:49:14 2017 +0100

    aarch64: optimize _dl_tlsdesc_dynamic fast path
    
    Remove some load/store instructions from the dynamic tlsdesc resolver
    fast path.  This gives around 20% faster tls access in dlopened shared
    libraries (assuming glibc ran out of static tls space).
    
    	* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_dynamic): Optimize.

diff --git a/ChangeLog b/ChangeLog
index 2cc266b..e1146ba 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
 2017-11-03  Szabolcs Nagy  <szabolcs.nagy@arm.com>
 
+	* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_dynamic): Optimize.
+
+2017-11-03  Szabolcs Nagy  <szabolcs.nagy@arm.com>
+
 	* sysdeps/arm/dl-machine.h (elf_machine_runtime_setup): Remove
 	DT_TLSDESC_GOT initialization.
 	* sysdeps/arm/dl-tlsdesc.S (_dl_tlsdesc_lazy_resolver): Remove.
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index 70550c7..1d2008c 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -142,23 +142,17 @@ _dl_tlsdesc_undefweak:
 	cfi_startproc
 	.align 2
 _dl_tlsdesc_dynamic:
-# define NSAVEXREGPAIRS 2
-	stp	x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]!
-	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
-	cfi_rel_offset (x29, 0)
-	cfi_rel_offset (x30, 8)
-	mov	x29, sp
 	DELOUSE (0)
 
 	/* Save just enough registers to support fast path, if we fall
 	   into slow path we will save additional registers.  */
-
-	stp	x1,  x2, [sp, #32+16*0]
-	stp	x3,  x4, [sp, #32+16*1]
-	cfi_rel_offset (x1, 32)
-	cfi_rel_offset (x2, 32+8)
-	cfi_rel_offset (x3, 32+16)
-	cfi_rel_offset (x4, 32+24)
+	stp	x1,  x2, [sp, #-32]!
+	stp	x3,  x4, [sp, #16]
+	cfi_adjust_cfa_offset (32)
+	cfi_rel_offset (x1, 0)
+	cfi_rel_offset (x2, 8)
+	cfi_rel_offset (x3, 16)
+	cfi_rel_offset (x4, 24)
 
 	mrs	x4, tpidr_el0
 	ldr	PTR_REG (1), [x0,#TLSDESC_ARG]
@@ -167,23 +161,18 @@ _dl_tlsdesc_dynamic:
 	ldr	PTR_REG (2), [x0,#DTV_COUNTER]
 	cmp	PTR_REG (3), PTR_REG (2)
 	b.hi	2f
-	ldr	PTR_REG (2), [x1,#TLSDESC_MODID]
+	/* Load r2 = td->tlsinfo.ti_module and r3 = td->tlsinfo.ti_offset.  */
+	ldp	PTR_REG (2), PTR_REG (3), [x1,#TLSDESC_MODID]
 	add	PTR_REG (0), PTR_REG (0), PTR_REG (2), lsl #(PTR_LOG_SIZE + 1)
 	ldr	PTR_REG (0), [x0] /* Load val member of DTV entry.  */
 	cmp	PTR_REG (0), #TLS_DTV_UNALLOCATED
 	b.eq	2f
-	ldr	PTR_REG (1), [x1,#TLSDESC_MODOFF]
-	add	PTR_REG (0), PTR_REG (0), PTR_REG (1)
-	sub	PTR_REG (0), PTR_REG (0), PTR_REG (4)
+	sub	PTR_REG (3), PTR_REG (3), PTR_REG (4)
+	add	PTR_REG (0), PTR_REG (0), PTR_REG (3)
 1:
-	ldp	 x1,  x2, [sp, #32+16*0]
-	ldp	 x3,  x4, [sp, #32+16*1]
-
-	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
-	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
-	cfi_restore (x29)
-	cfi_restore (x30)
-# undef NSAVEXREGPAIRS
+	ldp	 x3,  x4, [sp, #16]
+	ldp	 x1,  x2, [sp], #32
+	cfi_adjust_cfa_offset (-32)
 	RET
 2:
 	/* This is the slow path. We need to call __tls_get_addr() which
@@ -191,29 +180,33 @@ _dl_tlsdesc_dynamic:
 	   callee will trash.  */
 
 	/* Save the remaining registers that we must treat as caller save.  */
-# define NSAVEXREGPAIRS 7
-	stp	 x5,  x6, [sp, #-16*NSAVEXREGPAIRS]!
+# define NSAVEXREGPAIRS 8
+	stp	x29, x30, [sp,#-16*NSAVEXREGPAIRS]!
 	cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS)
-	stp	 x7,  x8, [sp, #16*1]
-	stp	 x9, x10, [sp, #16*2]
-	stp	x11, x12, [sp, #16*3]
-	stp	x13, x14, [sp, #16*4]
-	stp	x15, x16, [sp, #16*5]
-	stp	x17, x18, [sp, #16*6]
-	cfi_rel_offset (x5, 0)
-	cfi_rel_offset (x6, 8)
-	cfi_rel_offset (x7, 16)
-	cfi_rel_offset (x8, 16+8)
-	cfi_rel_offset (x9, 16*2)
-	cfi_rel_offset (x10, 16*2+8)
-	cfi_rel_offset (x11, 16*3)
-	cfi_rel_offset (x12, 16*3+8)
-	cfi_rel_offset (x13, 16*4)
-	cfi_rel_offset (x14, 16*4+8)
-	cfi_rel_offset (x15, 16*5)
-	cfi_rel_offset (x16, 16*5+8)
-	cfi_rel_offset (x17, 16*6)
-	cfi_rel_offset (x18, 16*6+8)
+	cfi_rel_offset (x29, 0)
+	cfi_rel_offset (x30, 8)
+	mov	x29, sp
+	stp	 x5,  x6, [sp, #16*1]
+	stp	 x7,  x8, [sp, #16*2]
+	stp	 x9, x10, [sp, #16*3]
+	stp	x11, x12, [sp, #16*4]
+	stp	x13, x14, [sp, #16*5]
+	stp	x15, x16, [sp, #16*6]
+	stp	x17, x18, [sp, #16*7]
+	cfi_rel_offset (x5, 16*1)
+	cfi_rel_offset (x6, 16*1+8)
+	cfi_rel_offset (x7, 16*2)
+	cfi_rel_offset (x8, 16*2+8)
+	cfi_rel_offset (x9, 16*3)
+	cfi_rel_offset (x10, 16*3+8)
+	cfi_rel_offset (x11, 16*4)
+	cfi_rel_offset (x12, 16*4+8)
+	cfi_rel_offset (x13, 16*5)
+	cfi_rel_offset (x14, 16*5+8)
+	cfi_rel_offset (x15, 16*6)
+	cfi_rel_offset (x16, 16*6+8)
+	cfi_rel_offset (x17, 16*7)
+	cfi_rel_offset (x18, 16*7+8)
 
 	SAVE_Q_REGISTERS
 
@@ -225,14 +218,18 @@ _dl_tlsdesc_dynamic:
 
 	RESTORE_Q_REGISTERS
 
-	ldp	 x7,  x8, [sp, #16*1]
-	ldp	 x9, x10, [sp, #16*2]
-	ldp	x11, x12, [sp, #16*3]
-	ldp	x13, x14, [sp, #16*4]
-	ldp	x15, x16, [sp, #16*5]
-	ldp	x17, x18, [sp, #16*6]
-	ldp	 x5,  x6, [sp], #16*NSAVEXREGPAIRS
+	ldp	 x5,  x6, [sp, #16*1]
+	ldp	 x7,  x8, [sp, #16*2]
+	ldp	 x9, x10, [sp, #16*3]
+	ldp	x11, x12, [sp, #16*4]
+	ldp	x13, x14, [sp, #16*5]
+	ldp	x15, x16, [sp, #16*6]
+	ldp	x17, x18, [sp, #16*7]
+
+	ldp	x29, x30, [sp], #16*NSAVEXREGPAIRS
 	cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS)
+	cfi_restore (x29)
+	cfi_restore (x30)
 	b	1b
 	cfi_endproc
 	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=94d2f0af157d8c02e688a315039f8044aedbcc89

commit 94d2f0af157d8c02e688a315039f8044aedbcc89
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Fri Oct 20 17:53:44 2017 +0100

    arm: Remove lazy tlsdesc initialization related code
    
    Lazy tlsdesc initialization is no longer used in the dynamic linker
    so all related code can be removed.
    
    	* sysdeps/arm/dl-machine.h (elf_machine_runtime_setup): Remove
    	DT_TLSDESC_GOT initialization.
    	* sysdeps/arm/dl-tlsdesc.S (_dl_tlsdesc_lazy_resolver): Remove.
    	(_dl_tlsdesc_resolve_hold): Likewise.
    	* sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_lazy_resolver): Remove.
    	(_dl_tlsdesc_resolve_hold): Likewise.
    	* sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_lazy_resolver_fixup): Remove.
    	(_dl_tlsdesc_resolve_hold_fixup): Likewise.

diff --git a/ChangeLog b/ChangeLog
index 9598180..2cc266b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,16 @@
 2017-11-03  Szabolcs Nagy  <szabolcs.nagy@arm.com>
 
+	* sysdeps/arm/dl-machine.h (elf_machine_runtime_setup): Remove
+	DT_TLSDESC_GOT initialization.
+	* sysdeps/arm/dl-tlsdesc.S (_dl_tlsdesc_lazy_resolver): Remove.
+	(_dl_tlsdesc_resolve_hold): Likewise.
+	* sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_lazy_resolver): Remove.
+	(_dl_tlsdesc_resolve_hold): Likewise.
+	* sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_lazy_resolver_fixup): Remove.
+	(_dl_tlsdesc_resolve_hold_fixup): Likewise.
+
+2017-11-03  Szabolcs Nagy  <szabolcs.nagy@arm.com>
+
 	* sysdeps/arm/dl-machine.h (elf_machine_rel): Remove volatile.
 
 2017-11-03  Szabolcs Nagy  <szabolcs.nagy@arm.com>
diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h
index ec3a027..fb5468a 100644
--- a/sysdeps/arm/dl-machine.h
+++ b/sysdeps/arm/dl-machine.h
@@ -127,10 +127,6 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 	got[2] = (Elf32_Addr) &_dl_runtime_resolve;
     }
 
-  if (l->l_info[ADDRIDX (DT_TLSDESC_GOT)] && lazy)
-    *(Elf32_Addr*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_GOT)]) + l->l_addr)
-      = (Elf32_Addr) &_dl_tlsdesc_lazy_resolver;
-
   return lazy;
 }
 
diff --git a/sysdeps/arm/dl-tlsdesc.S b/sysdeps/arm/dl-tlsdesc.S
index e7bed02..99d4b83 100644
--- a/sysdeps/arm/dl-tlsdesc.S
+++ b/sysdeps/arm/dl-tlsdesc.S
@@ -132,87 +132,3 @@ _dl_tlsdesc_dynamic:
 	cfi_endproc
 	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
 #endif /* SHARED */
-
-/* lazy resolved for tls descriptors.  */
-	.hidden _dl_tlsdesc_lazy_resolver
-	.global	_dl_tlsdesc_lazy_resolver
-	.type	_dl_tlsdesc_lazy_resolver,#function
-	cfi_startproc
-	eabi_fnstart
-	.align 2
-_dl_tlsdesc_lazy_resolver:
-	/* r0 points at the tlsdesc,
-	   r1 points at the GOT
-	   r2 was pushed by the trampoline and used as a temp,
-	      we need to pop it here.
-	  We push the remaining call-clobbered registers here, and also
-	  R1 -- to keep the stack correctly aligned.  */
-	/* Tell the unwinder that r2 has already been pushed.  */
-	eabi_save ({r2})
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (r2, 0)
-	eabi_save ({r0,r1,r3,ip,lr})
-	push	{r0, r1, r3, ip, lr}
-	cfi_adjust_cfa_offset (20)
-	cfi_rel_offset (r0, 0)
-	cfi_rel_offset (r1, 4)
-	cfi_rel_offset (r3, 8)
-	cfi_rel_offset (ip, 12)
-	cfi_rel_offset (lr, 16)
-	bl	_dl_tlsdesc_lazy_resolver_fixup
-	pop	{r0, r1, r3, ip, lr}
-	cfi_adjust_cfa_offset (-20)
-	cfi_restore (lr)
-	cfi_restore (ip)
-	cfi_restore (r3)
-	cfi_restore (r1)
-	cfi_restore (r0)
-	pop	{r2}
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (r2)
-	ldr	r1, [r0, #4]
-	BX	(r1)
-	eabi_fnend
-	cfi_endproc
-	.size	_dl_tlsdesc_lazy_resolver, .-_dl_tlsdesc_lazy_resolver
-
-/* Holder for lazy tls descriptors being resolve in another thread.
-
-   Our calling convention is to clobber r0, r1 and the processor
-   flags.  All others that are modified must be saved */
-	.hidden _dl_tlsdesc_resolve_hold
-	.global	_dl_tlsdesc_resolve_hold
-	.type	_dl_tlsdesc_resolve_hold,#function
-	cfi_startproc
-	eabi_fnstart
-	.align 2
-_dl_tlsdesc_resolve_hold:
-	/* r0 is saved so its original value can be used after the call and
-	   r1 is saved only to keep the stack aligned.  (r0 points to the tls
-	   descriptor, it is passed to _dl_tlsdesc_resolve_hold_fixup which
-	   is a void function that may clobber r0, later r0 is used to load
-	   the new resolver.)  */
-	eabi_save ({r0,r1,r2,r3,ip,lr})
-	push	{r0, r1, r2, r3, ip, lr}
-	cfi_adjust_cfa_offset (24)
-	cfi_rel_offset (r0, 0)
-	cfi_rel_offset (r1, 4)
-	cfi_rel_offset (r2, 8)
-	cfi_rel_offset (r3, 12)
-	cfi_rel_offset (ip, 16)
-	cfi_rel_offset (lr, 20)
-	adr	r1, _dl_tlsdesc_resolve_hold
-	bl	_dl_tlsdesc_resolve_hold_fixup
-	pop	{r0, r1, r2, r3, ip, lr}
-	cfi_adjust_cfa_offset (-24)
-	cfi_restore (lr)
-	cfi_restore (ip)
-	cfi_restore (r3)
-	cfi_restore (r2)
-	cfi_restore (r1)
-	cfi_restore (r0)
-	ldr     r1, [r0, #4]
-	BX      (r1)
-	eabi_fnend
-	cfi_endproc
-	.size	_dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
diff --git a/sysdeps/arm/dl-tlsdesc.h b/sysdeps/arm/dl-tlsdesc.h
index 2770af0..bb2c731 100644
--- a/sysdeps/arm/dl-tlsdesc.h
+++ b/sysdeps/arm/dl-tlsdesc.h
@@ -48,9 +48,7 @@ struct tlsdesc_dynamic_arg
 
 extern ptrdiff_t attribute_hidden
   _dl_tlsdesc_return(struct tlsdesc *),
-  _dl_tlsdesc_undefweak(struct tlsdesc *),
-  _dl_tlsdesc_resolve_hold(struct tlsdesc *),
-  _dl_tlsdesc_lazy_resolver(struct tlsdesc *);
+  _dl_tlsdesc_undefweak(struct tlsdesc *);
 
 # ifdef SHARED
 extern void *_dl_make_tlsdesc_dynamic (struct link_map *map, size_t ti_offset);
diff --git a/sysdeps/arm/tlsdesc.c b/sysdeps/arm/tlsdesc.c
index ad79506..329f26e 100644
--- a/sysdeps/arm/tlsdesc.c
+++ b/sysdeps/arm/tlsdesc.c
@@ -16,130 +16,13 @@
    License along with the GNU C Library.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <link.h>
 #include <ldsodefs.h>
-#include <elf/dynamic-link.h>
 #include <tls.h>
 #include <dl-tlsdesc.h>
 #include <dl-unmap-segments.h>
+#define _dl_tlsdesc_resolve_hold 0
 #include <tlsdeschtab.h>
 
-/* This function is used to lazily resolve TLS_DESC REL relocations
-   Besides the TLS descriptor itself, we get the module's got address
-   as the second parameter. */
-
-void
-attribute_hidden
-_dl_tlsdesc_lazy_resolver_fixup (struct tlsdesc volatile *td,
-				 Elf32_Addr *got)
-{
-  struct link_map *l = (struct link_map *)got[1];
-  lookup_t result;
-  unsigned long value;
-
-  if (_dl_tlsdesc_resolve_early_return_p
-      (td, (void*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_PLT)]) + l->l_addr)))
-    return;
-
-  if (td->argument.value & 0x80000000)
-    {
-      /* A global symbol, this is the symbol index.  */
-      /* The code below was borrowed from _dl_fixup().  */
-      const Elf_Symndx symndx = td->argument.value ^ 0x80000000;
-      const ElfW(Sym) *const symtab
-	= (const void *) D_PTR (l, l_info[DT_SYMTAB]);
-      const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]);
-      const ElfW(Sym) *sym = &symtab[symndx];
-
-      /* Look up the target symbol.  If the normal lookup rules are not
-	 used don't look in the global scope.  */
-      if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL
-	  && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0)
-	{
-	  const struct r_found_version *version = NULL;
-
-	  if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
-	    {
-	      const ElfW(Half) *vernum =
-		(const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]);
-	      ElfW(Half) ndx = vernum[symndx] & 0x7fff;
-	      version = &l->l_versions[ndx];
-	      if (version->hash == 0)
-		version = NULL;
-	    }
-
-	  result = _dl_lookup_symbol_x
-	    (strtab + sym->st_name, l, &sym,
-	     l->l_scope, version, ELF_RTYPE_CLASS_PLT,
-	     DL_LOOKUP_ADD_DEPENDENCY, NULL);
-	  if (sym)
-	    value = sym->st_value;
-	  else
-	    {
-	      td->entry = _dl_tlsdesc_undefweak;
-	      goto done;
-	    }
-	}
-      else
-	{
-	  /* We already found the symbol.  The module (and therefore its load
-	     address) is also known.  */
-	  result = l;
-	  value = sym->st_value;
-	}
-    }
-  else
-    {
-      /* A local symbol, this is the offset within our tls section.
-	 */
-      value = td->argument.value;
-      result = l;
-    }
-
-#ifndef SHARED
-  CHECK_STATIC_TLS (l, result);
-#else
-  if (!TRY_STATIC_TLS (l, result))
-    {
-      td->argument.pointer = _dl_make_tlsdesc_dynamic (result, value);
-      td->entry = _dl_tlsdesc_dynamic;
-    }
-  else
-#endif
-    {
-      td->argument.value = value + result->l_tls_offset;
-      td->entry = _dl_tlsdesc_return;
-    }
-
- done:
-  _dl_tlsdesc_wake_up_held_fixups ();
-}
-
-/* This function is used to avoid busy waiting for other threads to
-   complete the lazy relocation.  Once another thread wins the race to
-   relocate a TLS descriptor, it sets the descriptor up such that this
-   function is called to wait until the resolver releases the
-   lock.  */
-
-void
-attribute_hidden
-_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc volatile *td,
-				void *caller)
-{
-  /* Maybe we're lucky and can return early.  */
-  if (caller != td->entry)
-    return;
-
-  /* Locking here will stop execution until the running resolver runs
-     _dl_tlsdesc_wake_up_held_fixups(), releasing the lock.
-
-     FIXME: We'd be better off waiting on a condition variable, such
-     that we didn't have to hold the lock throughout the relocation
-     processing.  */
-  __rtld_lock_lock_recursive (GL(dl_load_lock));
-  __rtld_lock_unlock_recursive (GL(dl_load_lock));
-}
-
 /* Unmap the dynamic object, but also release its TLS descriptor table
    if there is one.  */
 

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=28e1ddf340e205cd8e1eff58e92957318c6e9966

commit 28e1ddf340e205cd8e1eff58e92957318c6e9966
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Fri Oct 20 17:44:18 2017 +0100

    arm: Remove unnecessary volatile qualifier
    
    There is no reason to treat tlsdesc entries as volatile objects.
    
    	* sysdeps/arm/dl-machine.h (elf_machine_rel): Remove volatile.

diff --git a/ChangeLog b/ChangeLog
index e1dc25c..9598180 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
 2017-11-03  Szabolcs Nagy  <szabolcs.nagy@arm.com>
 
+	* sysdeps/arm/dl-machine.h (elf_machine_rel): Remove volatile.
+
+2017-11-03  Szabolcs Nagy  <szabolcs.nagy@arm.com>
+
 	[BZ #18572]
 	* sysdeps/arm/dl-machine.h (elf_machine_lazy_rel): Do symbol binding
 	non-lazily for R_ARM_TLS_DESC.
diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h
index c59386f..ec3a027 100644
--- a/sysdeps/arm/dl-machine.h
+++ b/sysdeps/arm/dl-machine.h
@@ -464,8 +464,7 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc,
 	  }
 	case R_ARM_TLS_DESC:
 	  {
-	    struct tlsdesc volatile *td =
-	      (struct tlsdesc volatile *)reloc_addr;
+	    struct tlsdesc *td = (struct tlsdesc *)reloc_addr;
 
 # ifndef RTLD_BOOTSTRAP
 	    if (! sym)

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=0ca3d1d6d096e222346c74601d50e9013c8bb25d

commit 0ca3d1d6d096e222346c74601d50e9013c8bb25d
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Fri Oct 20 17:35:12 2017 +0100

    [BZ #18572] arm: Disable lazy initialization of tlsdesc entries
    
    Follow up to
    https://sourceware.org/ml/libc-alpha/2015-11/msg00272.html
    
    Always do tls descriptor initialization at load time during relocation
    processing (as if DF_BIND_NOW were set for the binary) to avoid barriers
    at every tls access.  This patch mimics bind-now semantics in the lazy
    relocation code of the arm target (elf_machine_lazy_rel).
    
    Ideally the static linker should be updated too to not emit tlsdesc
    relocs in DT_REL*, so elf_machine_lazy_rel is not called on them at all.
    
    	[BZ #18572]
    	* sysdeps/arm/dl-machine.h (elf_machine_lazy_rel): Do symbol binding
    	non-lazily for R_ARM_TLS_DESC.

diff --git a/ChangeLog b/ChangeLog
index 778e41f..e1dc25c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
 2017-11-03  Szabolcs Nagy  <szabolcs.nagy@arm.com>
 
+	[BZ #18572]
+	* sysdeps/arm/dl-machine.h (elf_machine_lazy_rel): Do symbol binding
+	non-lazily for R_ARM_TLS_DESC.
+
+2017-11-03  Szabolcs Nagy  <szabolcs.nagy@arm.com>
+
 	[BZ #17078]
 	* sysdeps/arm/dl-machine.h (elf_machine_rela): Remove the
 	R_ARM_TLS_DESC case.
diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h
index bf5f5d2..c59386f 100644
--- a/sysdeps/arm/dl-machine.h
+++ b/sysdeps/arm/dl-machine.h
@@ -669,15 +669,21 @@ elf_machine_lazy_rel (struct link_map *map,
     }
   else if (__builtin_expect (r_type == R_ARM_TLS_DESC, 1))
     {
-      struct tlsdesc volatile *td =
-	(struct tlsdesc volatile *)reloc_addr;
-
-      /* The linker must have given us the parameter we need in the
-	 first GOT entry, and left the second one empty.  We fill the
-	 latter with the resolver address.  */
-      assert (td->entry == 0);
-      td->entry = (void*)(D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_PLT)])
-			  + map->l_addr);
+      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
+      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
+      const ElfW (Sym) *sym = &symtab[symndx];
+      const struct r_found_version *version = NULL;
+
+      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+	{
+	  const ElfW (Half) *vernum =
+	    (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
+	  version = &map->l_versions[vernum[symndx] & 0x7fff];
+	}
+
+      /* Always initialize TLS descriptors completely, because lazy
+	 initialization requires synchronization at every TLS access.  */
+      elf_machine_rel (map, reloc, sym, version, reloc_addr, skip_ifunc);
     }
   else
     _dl_reloc_bad_type (map, r_type, 1);

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=2c1d4e5fe4e722e0b747d6bddd7ce3a6b1766c52

commit 2c1d4e5fe4e722e0b747d6bddd7ce3a6b1766c52
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Fri Oct 20 17:10:50 2017 +0100

    [BZ #17078] arm: remove prelinker support for R_ARM_TLS_DESC
    
    This patch reverts
    
    commit 9c82da17b5794efebe005de2fd22d61a3ea4b58a
    Author: Maciej W. Rozycki <macro@codesourcery.com>
    Date:   2014-07-17 19:22:05 +0100
    
        [BZ #17078] ARM: R_ARM_TLS_DESC prelinker support
    
    This only implemented support for the lazy binding case (and thus
    closed the bugzilla ticket prematurely), however tlsdesc on arm is
    not correct with lazy binding because there is a data race between
    the lazy initialization code and tlsdesc resolver functions.
    
    Lazy initialization of tlsdesc entries will be removed from arm to
    fix the data races and thus this half-finished prelinker support
    is no longer useful.
    
    	[BZ #17078]
    	* sysdeps/arm/dl-machine.h (elf_machine_rela): Remove the
    	R_ARM_TLS_DESC case.
    	(elf_machine_lazy_rel): Remove the prelink check.

diff --git a/ChangeLog b/ChangeLog
index 8499ada..778e41f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
 2017-11-03  Szabolcs Nagy  <szabolcs.nagy@arm.com>
 
+	[BZ #17078]
+	* sysdeps/arm/dl-machine.h (elf_machine_rela): Remove the
+	R_ARM_TLS_DESC case.
+	(elf_machine_lazy_rel): Remove the prelink check.
+
+2017-11-03  Szabolcs Nagy  <szabolcs.nagy@arm.com>
+
 	* sysdeps/aarch64/dl-machine.h (elf_machine_runtime_setup): Remove
 	DT_TLSDESC_GOT initialization.
 	* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Remove.
diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h
index 7e2d73e..bf5f5d2 100644
--- a/sysdeps/arm/dl-machine.h
+++ b/sysdeps/arm/dl-machine.h
@@ -594,32 +594,6 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
 	case R_ARM_ABS32:
 	  *reloc_addr = value + reloc->r_addend;
 	  break;
-#  ifdef RESOLVE_CONFLICT_FIND_MAP
-	case R_ARM_TLS_DESC:
-	  {
-	    struct tlsdesc volatile *td __attribute__ ((unused)) =
-	      (struct tlsdesc volatile *) reloc_addr;
-
-	    RESOLVE_CONFLICT_FIND_MAP (map, reloc_addr);
-
-	    /* Make sure we know what's going on.  */
-	    assert (td->entry
-		    == (void *) (D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_PLT)])
-				 + map->l_addr));
-	    assert (map->l_info[ADDRIDX (DT_TLSDESC_GOT)]);
-
-	    /* Set up the lazy resolver and store the pointer to our link
-	       map in _GLOBAL_OFFSET_TABLE[1] now as for a prelinked
-	       binary elf_machine_runtime_setup() is not called and hence
-	       neither has been initialized.  */
-	    *(Elf32_Addr *) (D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_GOT)])
-			     + map->l_addr)
-	      = (Elf32_Addr) &_dl_tlsdesc_lazy_resolver;
-	    ((Elf32_Addr *) D_PTR (map, l_info[DT_PLTGOT]))[1]
-	      = (Elf32_Addr) map;
-	  }
-	  break;
-#  endif /* RESOLVE_CONFLICT_FIND_MAP */
 	case R_ARM_PC24:
           relocate_pc24 (map, value, reloc_addr, reloc->r_addend);
 	  break;
@@ -699,11 +673,9 @@ elf_machine_lazy_rel (struct link_map *map,
 	(struct tlsdesc volatile *)reloc_addr;
 
       /* The linker must have given us the parameter we need in the
-	 first GOT entry, and left the second one empty.  The latter
-	 will have been preset by the prelinker if used though.
-	 We fill it with the resolver address.  */
-      assert (td->entry == 0
-	      || map->l_info[VALIDX (DT_GNU_PRELINKED)] != NULL);
+	 first GOT entry, and left the second one empty.  We fill the
+	 latter with the resolver address.  */
+      assert (td->entry == 0);
       td->entry = (void*)(D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_PLT)])
 			  + map->l_addr);
     }

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=91c5a366d8d398d2fc4542f961c93058a92ade6f

commit 91c5a366d8d398d2fc4542f961c93058a92ade6f
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Wed Sep 27 18:14:21 2017 +0100

    aarch64: Remove barriers from TLS descriptor functions
    
    Remove ldar synchronization and most lazy TLSDESC initialization
    related code.
    
    	* sysdeps/aarch64/dl-machine.h (elf_machine_runtime_setup): Remove
    	DT_TLSDESC_GOT initialization.
    	* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Remove.
    	(_dl_tlsdesc_resolve_rela): Likewise.
    	(_dl_tlsdesc_resolve_hold): Likewise.
    	(_dl_tlsdesc_undefweak): Remove ldar.
    	(_dl_tlsdesc_dynamic): Likewise.
    	* sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_return_lazy): Remove.
    	(_dl_tlsdesc_resolve_rela): Likewise.
    	(_dl_tlsdesc_resolve_hold): Likewise.
    	* sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_resolve_rela_fixup): Remove.
    	(_dl_tlsdesc_resolve_hold_fixup): Likewise.
    	(_dl_tlsdesc_resolve_rela): Likewise.
    	(_dl_tlsdesc_resolve_hold): Likewise.

diff --git a/ChangeLog b/ChangeLog
index 0126d77..8499ada 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,22 @@
 2017-11-03  Szabolcs Nagy  <szabolcs.nagy@arm.com>
 
+	* sysdeps/aarch64/dl-machine.h (elf_machine_runtime_setup): Remove
+	DT_TLSDESC_GOT initialization.
+	* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Remove.
+	(_dl_tlsdesc_resolve_rela): Likewise.
+	(_dl_tlsdesc_resolve_hold): Likewise.
+	(_dl_tlsdesc_undefweak): Remove ldar.
+	(_dl_tlsdesc_dynamic): Likewise.
+	* sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_return_lazy): Remove.
+	(_dl_tlsdesc_resolve_rela): Likewise.
+	(_dl_tlsdesc_resolve_hold): Likewise.
+	* sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_resolve_rela_fixup): Remove.
+	(_dl_tlsdesc_resolve_hold_fixup): Likewise.
+	(_dl_tlsdesc_resolve_rela): Likewise.
+	(_dl_tlsdesc_resolve_hold): Likewise.
+
+2017-11-03  Szabolcs Nagy  <szabolcs.nagy@arm.com>
+
 	* sysdeps/aarch64/dl-machine.h (elf_machine_lazy_rel): Do symbol
 	binding and initialization non-lazily for R_AARCH64_TLSDESC.
 
diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
index 837e281..acbd3e3 100644
--- a/sysdeps/aarch64/dl-machine.h
+++ b/sysdeps/aarch64/dl-machine.h
@@ -102,10 +102,6 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 	}
     }
 
-  if (l->l_info[ADDRIDX (DT_TLSDESC_GOT)] && lazy)
-    *(ElfW(Addr)*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_GOT)]) + l->l_addr)
-      = (ElfW(Addr)) &_dl_tlsdesc_resolve_rela;
-
   return lazy;
 }
 
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index 3b2da62..70550c7 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -80,30 +80,6 @@ _dl_tlsdesc_return:
 	cfi_endproc
 	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
 
-	/* Same as _dl_tlsdesc_return but with synchronization for
-	   lazy relocation.
-	   Prototype:
-	   _dl_tlsdesc_return_lazy (tlsdesc *) ;
-	 */
-	.hidden _dl_tlsdesc_return_lazy
-	.global	_dl_tlsdesc_return_lazy
-	.type	_dl_tlsdesc_return_lazy,%function
-	cfi_startproc
-	.align 2
-_dl_tlsdesc_return_lazy:
-	/* The ldar here happens after the load from [x0] at the call site
-	   (that is generated by the compiler as part of the TLS access ABI),
-	   so it reads the same value (this function is the final value of
-	   td->entry) and thus it synchronizes with the release store to
-	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
-	   from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
-	DELOUSE (0)
-	ldar	PTR_REG (zr), [x0]
-	ldr	PTR_REG (0), [x0, #PTR_SIZE]
-	RET
-	cfi_endproc
-	.size	_dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy
-
 	/* Handler for undefined weak TLS symbols.
 	   Prototype:
 	   _dl_tlsdesc_undefweak (tlsdesc *);
@@ -121,14 +97,7 @@ _dl_tlsdesc_return_lazy:
 _dl_tlsdesc_undefweak:
 	str	x1, [sp, #-16]!
 	cfi_adjust_cfa_offset (16)
-	/* The ldar here happens after the load from [x0] at the call site
-	   (that is generated by the compiler as part of the TLS access ABI),
-	   so it reads the same value (this function is the final value of
-	   td->entry) and thus it synchronizes with the release store to
-	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
-	   from [x0,#8] here happens after the initialization of td->arg.  */
 	DELOUSE (0)
-	ldar	PTR_REG (zr), [x0]
 	ldr	PTR_REG (0), [x0, #PTR_SIZE]
 	mrs	x1, tpidr_el0
 	sub	PTR_REG (0), PTR_REG (0), PTR_REG (1)
@@ -192,13 +161,6 @@ _dl_tlsdesc_dynamic:
 	cfi_rel_offset (x4, 32+24)
 
 	mrs	x4, tpidr_el0
-	/* The ldar here happens after the load from [x0] at the call site
-	   (that is generated by the compiler as part of the TLS access ABI),
-	   so it reads the same value (this function is the final value of
-	   td->entry) and thus it synchronizes with the release store to
-	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
-	   from [x0,#PTR_SIZE] here happens after the initialization of td->arg.  */
-	ldar	PTR_REG (zr), [x0]
 	ldr	PTR_REG (1), [x0,#TLSDESC_ARG]
 	ldr	PTR_REG (0), [x4,#TCBHEAD_DTV]
 	ldr	PTR_REG (3), [x1,#TLSDESC_GEN_COUNT]
@@ -276,168 +238,3 @@ _dl_tlsdesc_dynamic:
 	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
 # undef NSAVEXREGPAIRS
 #endif
-
-	/* This function is a wrapper for a lazy resolver for TLS_DESC
-	   RELA relocations.
-	   When the actual resolver returns, it will have adjusted the
-	   TLS descriptor such that we can tail-call it for it to return
-	   the TP offset of the symbol.  */
-
-	.hidden _dl_tlsdesc_resolve_rela
-	.global	_dl_tlsdesc_resolve_rela
-	.type	_dl_tlsdesc_resolve_rela,%function
-	cfi_startproc
-	.align 2
-_dl_tlsdesc_resolve_rela:
-#define	NSAVEXREGPAIRS 9
-	/* The tlsdesc PLT entry pushes x2 and x3 to the stack.  */
-	cfi_adjust_cfa_offset (16)
-	cfi_rel_offset (x2, 0)
-	cfi_rel_offset (x3, 8)
-	stp	x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
-	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
-	cfi_rel_offset (x29, 0)
-	cfi_rel_offset (x30, 8)
-	mov	x29, sp
-	stp	 x1,  x4, [sp, #32+16*0]
-	stp	 x5,  x6, [sp, #32+16*1]
-	stp	 x7,  x8, [sp, #32+16*2]
-	stp	 x9, x10, [sp, #32+16*3]
-	stp	x11, x12, [sp, #32+16*4]
-	stp	x13, x14, [sp, #32+16*5]
-	stp	x15, x16, [sp, #32+16*6]
-	stp	x17, x18, [sp, #32+16*7]
-	str	x0,       [sp, #32+16*8]
-	cfi_rel_offset (x1, 32)
-	cfi_rel_offset (x4, 32+8)
-	cfi_rel_offset (x5, 32+16)
-	cfi_rel_offset (x6, 32+16+8)
-	cfi_rel_offset (x7, 32+16*2)
-	cfi_rel_offset (x8, 32+16*2+8)
-	cfi_rel_offset (x9, 32+16*3)
-	cfi_rel_offset (x10, 32+16*3+8)
-	cfi_rel_offset (x11, 32+16*4)
-	cfi_rel_offset (x12, 32+16*4+8)
-	cfi_rel_offset (x13, 32+16*5)
-	cfi_rel_offset (x14, 32+16*5+8)
-	cfi_rel_offset (x15, 32+16*6)
-	cfi_rel_offset (x16, 32+16*6+8)
-	cfi_rel_offset (x17, 32+16*7)
-	cfi_rel_offset (x18, 32+16*7+8)
-	cfi_rel_offset (x0, 32+16*8)
-
-	SAVE_Q_REGISTERS
-
-	DELOUSE (3)
-	ldr	PTR_REG (1), [x3, #PTR_SIZE]
-	bl	_dl_tlsdesc_resolve_rela_fixup
-
-	RESTORE_Q_REGISTERS
-
-	ldr	x0, [sp, #32+16*8]
-	DELOUSE (0)
-	ldr	PTR_REG (1), [x0]
-	blr	x1
-
-	ldp	 x1,  x4, [sp, #32+16*0]
-	ldp	 x5,  x6, [sp, #32+16*1]
-	ldp	 x7,  x8, [sp, #32+16*2]
-	ldp	 x9, x10, [sp, #32+16*3]
-	ldp	x11, x12, [sp, #32+16*4]
-	ldp	x13, x14, [sp, #32+16*5]
-	ldp	x15, x16, [sp, #32+16*6]
-	ldp	x17, x18, [sp, #32+16*7]
-	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
-	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
-	cfi_restore (x29)
-	cfi_restore (x30)
-	ldp	x2, x3, [sp], #16
-	cfi_adjust_cfa_offset (-16)
-	RET
-#undef NSAVEXREGPAIRS
-	cfi_endproc
-	.size	_dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
-
-	/* This function is a placeholder for lazy resolving of TLS
-	relocations.  Once some thread starts resolving a TLS
-	relocation, it sets up the TLS descriptor to use this
-	resolver, such that other threads that would attempt to
-	resolve it concurrently may skip the call to the original lazy
-	resolver and go straight to a condition wait.
-
-	When the actual resolver returns, it will have adjusted the
-	TLS descriptor such that we can tail-call it for it to return
-	the TP offset of the symbol.  */
-
-	.hidden _dl_tlsdesc_resolve_hold
-	.global	_dl_tlsdesc_resolve_hold
-	.type	_dl_tlsdesc_resolve_hold,%function
-	cfi_startproc
-	.align 2
-_dl_tlsdesc_resolve_hold:
-#define	NSAVEXREGPAIRS 10
-1:
-	stp	x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
-	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
-	cfi_rel_offset (x29, 0)
-	cfi_rel_offset (x30, 8)
-	mov	x29, sp
-	stp	 x1,  x2, [sp, #32+16*0]
-	stp	 x3,  x4, [sp, #32+16*1]
-	stp	 x5,  x6, [sp, #32+16*2]
-	stp	 x7,  x8, [sp, #32+16*3]
-	stp	 x9, x10, [sp, #32+16*4]
-	stp	x11, x12, [sp, #32+16*5]
-	stp	x13, x14, [sp, #32+16*6]
-	stp	x15, x16, [sp, #32+16*7]
-	stp	x17, x18, [sp, #32+16*8]
-	str	x0,       [sp, #32+16*9]
-	cfi_rel_offset (x1, 32)
-	cfi_rel_offset (x2, 32+8)
-	cfi_rel_offset (x3, 32+16)
-	cfi_rel_offset (x4, 32+16+8)
-	cfi_rel_offset (x5, 32+16*2)
-	cfi_rel_offset (x6, 32+16*2+8)
-	cfi_rel_offset (x7, 32+16*3)
-	cfi_rel_offset (x8, 32+16*3+8)
-	cfi_rel_offset (x9, 32+16*4)
-	cfi_rel_offset (x10, 32+16*4+8)
-	cfi_rel_offset (x11, 32+16*5)
-	cfi_rel_offset (x12, 32+16*5+8)
-	cfi_rel_offset (x13, 32+16*6)
-	cfi_rel_offset (x14, 32+16*6+8)
-	cfi_rel_offset (x15, 32+16*7)
-	cfi_rel_offset (x16, 32+16*7+8)
-	cfi_rel_offset (x17, 32+16*8)
-	cfi_rel_offset (x18, 32+16*8+8)
-	cfi_rel_offset (x0, 32+16*9)
-
-	SAVE_Q_REGISTERS
-
-	adr	x1, 1b
-	bl	_dl_tlsdesc_resolve_hold_fixup
-
-	RESTORE_Q_REGISTERS
-
-	ldr	x0, [sp, #32+16*9]
-	DELOUSE (0)
-	ldr	PTR_REG (1), [x0]
-	blr	x1
-
-	ldp	 x1,  x2, [sp, #32+16*0]
-	ldp	 x3,  x4, [sp, #32+16*1]
-	ldp	 x5,  x6, [sp, #32+16*2]
-	ldp	 x7,  x8, [sp, #32+16*3]
-	ldp	 x9, x10, [sp, #32+16*4]
-	ldp	x11, x12, [sp, #32+16*5]
-	ldp	x13, x14, [sp, #32+16*6]
-	ldp	x15, x16, [sp, #32+16*7]
-	ldp	x17, x18, [sp, #32+16*8]
-	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
-	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
-	cfi_restore (x29)
-	cfi_restore (x30)
-	RET
-	cfi_endproc
-	.size	_dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
-#undef NSAVEXREGPAIRS
diff --git a/sysdeps/aarch64/dl-tlsdesc.h b/sysdeps/aarch64/dl-tlsdesc.h
index 66ec0de..25b5b64 100644
--- a/sysdeps/aarch64/dl-tlsdesc.h
+++ b/sysdeps/aarch64/dl-tlsdesc.h
@@ -46,17 +46,8 @@ extern ptrdiff_t attribute_hidden
 _dl_tlsdesc_return (struct tlsdesc *);
 
 extern ptrdiff_t attribute_hidden
-_dl_tlsdesc_return_lazy (struct tlsdesc *);
-
-extern ptrdiff_t attribute_hidden
 _dl_tlsdesc_undefweak (struct tlsdesc *);
 
-extern ptrdiff_t attribute_hidden
-_dl_tlsdesc_resolve_rela (struct tlsdesc *);
-
-extern ptrdiff_t attribute_hidden
-_dl_tlsdesc_resolve_hold (struct tlsdesc *);
-
 # ifdef SHARED
 extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
 
diff --git a/sysdeps/aarch64/tlsdesc.c b/sysdeps/aarch64/tlsdesc.c
index 4c8a5a8..bd1356f 100644
--- a/sysdeps/aarch64/tlsdesc.c
+++ b/sysdeps/aarch64/tlsdesc.c
@@ -18,137 +18,12 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <link.h>
 #include <ldsodefs.h>
-#include <elf/dynamic-link.h>
 #include <tls.h>
 #include <dl-tlsdesc.h>
 #include <dl-unmap-segments.h>
+#define _dl_tlsdesc_resolve_hold 0
 #include <tlsdeschtab.h>
-#include <atomic.h>
-
-/* The following functions take an entry_check_offset argument.  It's
-   computed by the caller as an offset between its entry point and the
-   call site, such that by adding the built-in return address that is
-   implicitly passed to the function with this offset, we can easily
-   obtain the caller's entry point to compare with the entry point
-   given in the TLS descriptor.  If it's changed, we want to return
-   immediately.  */
-
-/* This function is used to lazily resolve TLS_DESC RELA relocations.
-   The argument location is used to hold a pointer to the relocation.  */
-
-void
-attribute_hidden
-_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc *td, struct link_map *l)
-{
-  const ElfW(Rela) *reloc = atomic_load_relaxed (&td->arg);
-
-  /* After GL(dl_load_lock) is grabbed only one caller can see td->entry in
-     initial state in _dl_tlsdesc_resolve_early_return_p, other concurrent
-     callers will return and retry calling td->entry.  The updated td->entry
-     synchronizes with the single writer so all read accesses here can use
-     relaxed order.  */
-  if (_dl_tlsdesc_resolve_early_return_p
-      (td, (void*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_PLT)]) + l->l_addr)))
-    return;
-
-  /* The code below was borrowed from _dl_fixup(),
-     except for checking for STB_LOCAL.  */
-  const ElfW(Sym) *const symtab
-    = (const void *) D_PTR (l, l_info[DT_SYMTAB]);
-  const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]);
-  const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)];
-  lookup_t result;
-
-   /* Look up the target symbol.  If the normal lookup rules are not
-      used don't look in the global scope.  */
-  if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL
-      && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0)
-    {
-      const struct r_found_version *version = NULL;
-
-      if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
-	{
-	  const ElfW(Half) *vernum =
-	    (const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]);
-	  ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff;
-	  version = &l->l_versions[ndx];
-	  if (version->hash == 0)
-	    version = NULL;
-	}
-
-      result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym,
-				    l->l_scope, version, ELF_RTYPE_CLASS_PLT,
-				    DL_LOOKUP_ADD_DEPENDENCY, NULL);
-    }
-  else
-    {
-      /* We already found the symbol.  The module (and therefore its load
-	 address) is also known.  */
-      result = l;
-    }
-
-  if (!sym)
-    {
-      atomic_store_relaxed (&td->arg, (void *) reloc->r_addend);
-      /* This release store synchronizes with the ldar acquire load
-	 instruction in _dl_tlsdesc_undefweak.  */
-      atomic_store_release (&td->entry, _dl_tlsdesc_undefweak);
-    }
-  else
-    {
-#  ifndef SHARED
-      CHECK_STATIC_TLS (l, result);
-#  else
-      if (!TRY_STATIC_TLS (l, result))
-	{
-	  void *p = _dl_make_tlsdesc_dynamic (result, sym->st_value
-					      + reloc->r_addend);
-	  atomic_store_relaxed (&td->arg, p);
-	  /* This release store synchronizes with the ldar acquire load
-	     instruction in _dl_tlsdesc_dynamic.  */
-	  atomic_store_release (&td->entry, _dl_tlsdesc_dynamic);
-	}
-      else
-#  endif
-	{
-	  void *p = (void*) (sym->st_value + result->l_tls_offset
-			     + reloc->r_addend);
-	  atomic_store_relaxed (&td->arg, p);
-	  /* This release store synchronizes with the ldar acquire load
-	     instruction in _dl_tlsdesc_return_lazy.  */
-	  atomic_store_release (&td->entry, _dl_tlsdesc_return_lazy);
-	}
-    }
-
-  _dl_tlsdesc_wake_up_held_fixups ();
-}
-
-/* This function is used to avoid busy waiting for other threads to
-   complete the lazy relocation.  Once another thread wins the race to
-   relocate a TLS descriptor, it sets the descriptor up such that this
-   function is called to wait until the resolver releases the
-   lock.  */
-
-void
-attribute_hidden
-_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc *td, void *caller)
-{
-  /* Maybe we're lucky and can return early.  */
-  if (caller != atomic_load_relaxed (&td->entry))
-    return;
-
-  /* Locking here will stop execution until the running resolver runs
-     _dl_tlsdesc_wake_up_held_fixups(), releasing the lock.
-
-     FIXME: We'd be better off waiting on a condition variable, such
-     that we didn't have to hold the lock throughout the relocation
-     processing.  */
-  __rtld_lock_lock_recursive (GL(dl_load_lock));
-  __rtld_lock_unlock_recursive (GL(dl_load_lock));
-}
-
 
 /* Unmap the dynamic object, but also release its TLS descriptor table
    if there is one.  */

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=b7cf203b5c17dd6d9878537d41e0c7cc3d270a67

commit b7cf203b5c17dd6d9878537d41e0c7cc3d270a67
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Wed Sep 27 16:55:14 2017 +0100

    aarch64: Disable lazy symbol binding of TLSDESC
    
    Always do TLS descriptor initialization at load time during relocation
    processing to avoid barriers at every TLS access. In non-dlopened shared
    libraries the overhead of tls access vs static global access is > 3x
    bigger when lazy initialization is used (_dl_tlsdesc_return_lazy)
    compared to bind-now (_dl_tlsdesc_return) so the barriers dominate tls
    access performance.
    
    TLSDESC relocs are in DT_JMPREL which are processed at load time using
    elf_machine_lazy_rel which is only supposed to do lightweight
    initialization using the DT_TLSDESC_PLT trampoline (the trampoline code
    jumps to the entry point in DT_TLSDESC_GOT which does the lazy tlsdesc
    initialization at runtime).  This patch changes elf_machine_lazy_rel
    in aarch64 to do the symbol binding and initialization as if DF_BIND_NOW
    was set, so the non-lazy code path of elf/do-rel.h was replicated.
    
    The static linker could be changed to emit TLSDESC relocs in DT_REL*,
    which are processed non-lazily, but the goal of this patch is to always
    guarantee bind-now semantics, even if the binary was produced with an
    old linker, so the barriers can be dropped in tls descriptor functions.
    
    After this change the synchronizing ldar instructions can be dropped
    as well as the lazy initialization machinery including the DT_TLSDESC_GOT
    setup.
    
    I believe this should be done on all targets, including ones where no
    barrier is needed for lazy initialization.  There is very little gain in
    optimizing for large number of symbolic tlsdesc relocations which is an
    extremely uncommon case.  And currently the tlsdesc entries are only
    readonly protected with -z now and some hardennings against writable
    JUMPSLOT relocs don't work for TLSDESC so they are a security hazard.
    (But to fix that the static linker has to be changed.)
    
    	* sysdeps/aarch64/dl-machine.h (elf_machine_lazy_rel): Do symbol
    	binding and initialization non-lazily for R_AARCH64_TLSDESC.

diff --git a/ChangeLog b/ChangeLog
index 44e18dd..0126d77 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2017-11-03  Szabolcs Nagy  <szabolcs.nagy@arm.com>
 
+	* sysdeps/aarch64/dl-machine.h (elf_machine_lazy_rel): Do symbol
+	binding and initialization non-lazily for R_AARCH64_TLSDESC.
+
+2017-11-03  Szabolcs Nagy  <szabolcs.nagy@arm.com>
+
 	* elf/tlsdeschtab.h (_dl_tls_resolve_early_return_p): Mark unused.
 	(_dl_tlsdesc_wake_up_held_fixups): Likewise.
 
diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
index e765612..837e281 100644
--- a/sysdeps/aarch64/dl-machine.h
+++ b/sysdeps/aarch64/dl-machine.h
@@ -399,12 +399,21 @@ elf_machine_lazy_rel (struct link_map *map,
     }
   else if (__builtin_expect (r_type == AARCH64_R(TLSDESC), 1))
     {
-      struct tlsdesc volatile *td =
-	(struct tlsdesc volatile *)reloc_addr;
+      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
+      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
+      const ElfW (Sym) *sym = &symtab[symndx];
+      const struct r_found_version *version = NULL;
 
-      td->arg = (void*)reloc;
-      td->entry = (void*)(D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_PLT)])
-			  + map->l_addr);
+      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+	{
+	  const ElfW (Half) *vernum =
+	    (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
+	  version = &map->l_versions[vernum[symndx] & 0x7fff];
+	}
+
+      /* Always initialize TLS descriptors completely, because lazy
+	 initialization requires synchronization at every TLS access.  */
+      elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc);
     }
   else if (__glibc_unlikely (r_type == AARCH64_R(IRELATIVE)))
     {

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=0cc5b022f817eeaa81735ae58717b5dabae92941

commit 0cc5b022f817eeaa81735ae58717b5dabae92941
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Mon Oct 23 12:15:40 2017 +0100

    Mark lazy tlsdesc helper functions unused to avoid warnings
    
    These static functions are not needed if a target does not do lazy
    tlsdesc initialization.
    
    	* elf/tlsdeschtab.h (_dl_tls_resolve_early_return_p): Mark unused.
    	(_dl_tlsdesc_wake_up_held_fixups): Likewise.

diff --git a/ChangeLog b/ChangeLog
index 626828e..44e18dd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2017-11-03  Szabolcs Nagy  <szabolcs.nagy@arm.com>
+
+	* elf/tlsdeschtab.h (_dl_tls_resolve_early_return_p): Mark unused.
+	(_dl_tlsdesc_wake_up_held_fixups): Likewise.
+
 2017-11-02  Joseph Myers  <joseph@codesourcery.com>
 
 	* wcsmbs/wchar.h [__HAVE_FLOAT16 && __USE_GNU] (wcstof16):
diff --git a/elf/tlsdeschtab.h b/elf/tlsdeschtab.h
index ad3001d..8796318 100644
--- a/elf/tlsdeschtab.h
+++ b/elf/tlsdeschtab.h
@@ -137,6 +137,7 @@ _dl_make_tlsdesc_dynamic (struct link_map *map, size_t ti_offset)
    avoid introducing such dependencies.  */
 
 static int
+__attribute__ ((unused))
 _dl_tlsdesc_resolve_early_return_p (struct tlsdesc volatile *td, void *caller)
 {
   if (caller != atomic_load_relaxed (&td->entry))
@@ -155,6 +156,7 @@ _dl_tlsdesc_resolve_early_return_p (struct tlsdesc volatile *td, void *caller)
 }
 
 static void
+__attribute__ ((unused))
 _dl_tlsdesc_wake_up_held_fixups (void)
 {
   __rtld_lock_unlock_recursive (GL(dl_load_lock));

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                    |   59 ++++++++
 elf/tlsdeschtab.h            |    2 +
 sysdeps/aarch64/dl-machine.h |   23 ++--
 sysdeps/aarch64/dl-tlsdesc.S |  308 +++++++-----------------------------------
 sysdeps/aarch64/dl-tlsdesc.h |    9 --
 sysdeps/aarch64/tlsdesc.c    |  127 +-----------------
 sysdeps/arm/dl-machine.h     |   59 +++------
 sysdeps/arm/dl-tlsdesc.S     |   84 ------------
 sysdeps/arm/dl-tlsdesc.h     |    4 +-
 sysdeps/arm/tlsdesc.c        |  119 +----------------
 10 files changed, 145 insertions(+), 649 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]