This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch master updated. glibc-2.26.9000-716-g659ca26
- From: nsz at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 3 Nov 2017 14:54:02 -0000
- Subject: GNU C Library master sources branch master updated. glibc-2.26.9000-716-g659ca26
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via 659ca267360e1c1f64eea9205bb81cb5e9049908 (commit)
via 94d2f0af157d8c02e688a315039f8044aedbcc89 (commit)
via 28e1ddf340e205cd8e1eff58e92957318c6e9966 (commit)
via 0ca3d1d6d096e222346c74601d50e9013c8bb25d (commit)
via 2c1d4e5fe4e722e0b747d6bddd7ce3a6b1766c52 (commit)
via 91c5a366d8d398d2fc4542f961c93058a92ade6f (commit)
via b7cf203b5c17dd6d9878537d41e0c7cc3d270a67 (commit)
via 0cc5b022f817eeaa81735ae58717b5dabae92941 (commit)
from 43ddff2e364c69847f5f698f6a43f9dde328b76a (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=659ca267360e1c1f64eea9205bb81cb5e9049908
commit 659ca267360e1c1f64eea9205bb81cb5e9049908
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Tue Oct 24 17:49:14 2017 +0100
aarch64: optimize _dl_tlsdesc_dynamic fast path
Remove some load/store instructions from the dynamic tlsdesc resolver
fast path. This gives around 20% faster tls access in dlopened shared
libraries (assuming glibc ran out of static tls space).
* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_dynamic): Optimize.
diff --git a/ChangeLog b/ChangeLog
index 2cc266b..e1146ba 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
+ * sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_dynamic): Optimize.
+
+2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
+
* sysdeps/arm/dl-machine.h (elf_machine_runtime_setup): Remove
DT_TLSDESC_GOT initialization.
* sysdeps/arm/dl-tlsdesc.S (_dl_tlsdesc_lazy_resolver): Remove.
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index 70550c7..1d2008c 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -142,23 +142,17 @@ _dl_tlsdesc_undefweak:
cfi_startproc
.align 2
_dl_tlsdesc_dynamic:
-# define NSAVEXREGPAIRS 2
- stp x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]!
- cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
- cfi_rel_offset (x29, 0)
- cfi_rel_offset (x30, 8)
- mov x29, sp
DELOUSE (0)
/* Save just enough registers to support fast path, if we fall
into slow path we will save additional registers. */
-
- stp x1, x2, [sp, #32+16*0]
- stp x3, x4, [sp, #32+16*1]
- cfi_rel_offset (x1, 32)
- cfi_rel_offset (x2, 32+8)
- cfi_rel_offset (x3, 32+16)
- cfi_rel_offset (x4, 32+24)
+ stp x1, x2, [sp, #-32]!
+ stp x3, x4, [sp, #16]
+ cfi_adjust_cfa_offset (32)
+ cfi_rel_offset (x1, 0)
+ cfi_rel_offset (x2, 8)
+ cfi_rel_offset (x3, 16)
+ cfi_rel_offset (x4, 24)
mrs x4, tpidr_el0
ldr PTR_REG (1), [x0,#TLSDESC_ARG]
@@ -167,23 +161,18 @@ _dl_tlsdesc_dynamic:
ldr PTR_REG (2), [x0,#DTV_COUNTER]
cmp PTR_REG (3), PTR_REG (2)
b.hi 2f
- ldr PTR_REG (2), [x1,#TLSDESC_MODID]
+ /* Load r2 = td->tlsinfo.ti_module and r3 = td->tlsinfo.ti_offset. */
+ ldp PTR_REG (2), PTR_REG (3), [x1,#TLSDESC_MODID]
add PTR_REG (0), PTR_REG (0), PTR_REG (2), lsl #(PTR_LOG_SIZE + 1)
ldr PTR_REG (0), [x0] /* Load val member of DTV entry. */
cmp PTR_REG (0), #TLS_DTV_UNALLOCATED
b.eq 2f
- ldr PTR_REG (1), [x1,#TLSDESC_MODOFF]
- add PTR_REG (0), PTR_REG (0), PTR_REG (1)
- sub PTR_REG (0), PTR_REG (0), PTR_REG (4)
+ sub PTR_REG (3), PTR_REG (3), PTR_REG (4)
+ add PTR_REG (0), PTR_REG (0), PTR_REG (3)
1:
- ldp x1, x2, [sp, #32+16*0]
- ldp x3, x4, [sp, #32+16*1]
-
- ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
- cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
- cfi_restore (x29)
- cfi_restore (x30)
-# undef NSAVEXREGPAIRS
+ ldp x3, x4, [sp, #16]
+ ldp x1, x2, [sp], #32
+ cfi_adjust_cfa_offset (-32)
RET
2:
/* This is the slow path. We need to call __tls_get_addr() which
@@ -191,29 +180,33 @@ _dl_tlsdesc_dynamic:
callee will trash. */
/* Save the remaining registers that we must treat as caller save. */
-# define NSAVEXREGPAIRS 7
- stp x5, x6, [sp, #-16*NSAVEXREGPAIRS]!
+# define NSAVEXREGPAIRS 8
+ stp x29, x30, [sp,#-16*NSAVEXREGPAIRS]!
cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS)
- stp x7, x8, [sp, #16*1]
- stp x9, x10, [sp, #16*2]
- stp x11, x12, [sp, #16*3]
- stp x13, x14, [sp, #16*4]
- stp x15, x16, [sp, #16*5]
- stp x17, x18, [sp, #16*6]
- cfi_rel_offset (x5, 0)
- cfi_rel_offset (x6, 8)
- cfi_rel_offset (x7, 16)
- cfi_rel_offset (x8, 16+8)
- cfi_rel_offset (x9, 16*2)
- cfi_rel_offset (x10, 16*2+8)
- cfi_rel_offset (x11, 16*3)
- cfi_rel_offset (x12, 16*3+8)
- cfi_rel_offset (x13, 16*4)
- cfi_rel_offset (x14, 16*4+8)
- cfi_rel_offset (x15, 16*5)
- cfi_rel_offset (x16, 16*5+8)
- cfi_rel_offset (x17, 16*6)
- cfi_rel_offset (x18, 16*6+8)
+ cfi_rel_offset (x29, 0)
+ cfi_rel_offset (x30, 8)
+ mov x29, sp
+ stp x5, x6, [sp, #16*1]
+ stp x7, x8, [sp, #16*2]
+ stp x9, x10, [sp, #16*3]
+ stp x11, x12, [sp, #16*4]
+ stp x13, x14, [sp, #16*5]
+ stp x15, x16, [sp, #16*6]
+ stp x17, x18, [sp, #16*7]
+ cfi_rel_offset (x5, 16*1)
+ cfi_rel_offset (x6, 16*1+8)
+ cfi_rel_offset (x7, 16*2)
+ cfi_rel_offset (x8, 16*2+8)
+ cfi_rel_offset (x9, 16*3)
+ cfi_rel_offset (x10, 16*3+8)
+ cfi_rel_offset (x11, 16*4)
+ cfi_rel_offset (x12, 16*4+8)
+ cfi_rel_offset (x13, 16*5)
+ cfi_rel_offset (x14, 16*5+8)
+ cfi_rel_offset (x15, 16*6)
+ cfi_rel_offset (x16, 16*6+8)
+ cfi_rel_offset (x17, 16*7)
+ cfi_rel_offset (x18, 16*7+8)
SAVE_Q_REGISTERS
@@ -225,14 +218,18 @@ _dl_tlsdesc_dynamic:
RESTORE_Q_REGISTERS
- ldp x7, x8, [sp, #16*1]
- ldp x9, x10, [sp, #16*2]
- ldp x11, x12, [sp, #16*3]
- ldp x13, x14, [sp, #16*4]
- ldp x15, x16, [sp, #16*5]
- ldp x17, x18, [sp, #16*6]
- ldp x5, x6, [sp], #16*NSAVEXREGPAIRS
+ ldp x5, x6, [sp, #16*1]
+ ldp x7, x8, [sp, #16*2]
+ ldp x9, x10, [sp, #16*3]
+ ldp x11, x12, [sp, #16*4]
+ ldp x13, x14, [sp, #16*5]
+ ldp x15, x16, [sp, #16*6]
+ ldp x17, x18, [sp, #16*7]
+
+ ldp x29, x30, [sp], #16*NSAVEXREGPAIRS
cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS)
+ cfi_restore (x29)
+ cfi_restore (x30)
b 1b
cfi_endproc
.size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=94d2f0af157d8c02e688a315039f8044aedbcc89
commit 94d2f0af157d8c02e688a315039f8044aedbcc89
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Fri Oct 20 17:53:44 2017 +0100
arm: Remove lazy tlsdesc initialization related code
Lazy tlsdesc initialization is no longer used in the dynamic linker
so all related code can be removed.
* sysdeps/arm/dl-machine.h (elf_machine_runtime_setup): Remove
DT_TLSDESC_GOT initialization.
* sysdeps/arm/dl-tlsdesc.S (_dl_tlsdesc_lazy_resolver): Remove.
(_dl_tlsdesc_resolve_hold): Likewise.
* sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_lazy_resolver): Remove.
(_dl_tlsdesc_resolve_hold): Likewise.
* sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_lazy_resolver_fixup): Remove.
(_dl_tlsdesc_resolve_hold_fixup): Likewise.
diff --git a/ChangeLog b/ChangeLog
index 9598180..2cc266b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,16 @@
2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
+ * sysdeps/arm/dl-machine.h (elf_machine_runtime_setup): Remove
+ DT_TLSDESC_GOT initialization.
+ * sysdeps/arm/dl-tlsdesc.S (_dl_tlsdesc_lazy_resolver): Remove.
+ (_dl_tlsdesc_resolve_hold): Likewise.
+ * sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_lazy_resolver): Remove.
+ (_dl_tlsdesc_resolve_hold): Likewise.
+ * sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_lazy_resolver_fixup): Remove.
+ (_dl_tlsdesc_resolve_hold_fixup): Likewise.
+
+2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
+
* sysdeps/arm/dl-machine.h (elf_machine_rel): Remove volatile.
2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h
index ec3a027..fb5468a 100644
--- a/sysdeps/arm/dl-machine.h
+++ b/sysdeps/arm/dl-machine.h
@@ -127,10 +127,6 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
got[2] = (Elf32_Addr) &_dl_runtime_resolve;
}
- if (l->l_info[ADDRIDX (DT_TLSDESC_GOT)] && lazy)
- *(Elf32_Addr*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_GOT)]) + l->l_addr)
- = (Elf32_Addr) &_dl_tlsdesc_lazy_resolver;
-
return lazy;
}
diff --git a/sysdeps/arm/dl-tlsdesc.S b/sysdeps/arm/dl-tlsdesc.S
index e7bed02..99d4b83 100644
--- a/sysdeps/arm/dl-tlsdesc.S
+++ b/sysdeps/arm/dl-tlsdesc.S
@@ -132,87 +132,3 @@ _dl_tlsdesc_dynamic:
cfi_endproc
.size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
#endif /* SHARED */
-
-/* lazy resolved for tls descriptors. */
- .hidden _dl_tlsdesc_lazy_resolver
- .global _dl_tlsdesc_lazy_resolver
- .type _dl_tlsdesc_lazy_resolver,#function
- cfi_startproc
- eabi_fnstart
- .align 2
-_dl_tlsdesc_lazy_resolver:
- /* r0 points at the tlsdesc,
- r1 points at the GOT
- r2 was pushed by the trampoline and used as a temp,
- we need to pop it here.
- We push the remaining call-clobbered registers here, and also
- R1 -- to keep the stack correctly aligned. */
- /* Tell the unwinder that r2 has already been pushed. */
- eabi_save ({r2})
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (r2, 0)
- eabi_save ({r0,r1,r3,ip,lr})
- push {r0, r1, r3, ip, lr}
- cfi_adjust_cfa_offset (20)
- cfi_rel_offset (r0, 0)
- cfi_rel_offset (r1, 4)
- cfi_rel_offset (r3, 8)
- cfi_rel_offset (ip, 12)
- cfi_rel_offset (lr, 16)
- bl _dl_tlsdesc_lazy_resolver_fixup
- pop {r0, r1, r3, ip, lr}
- cfi_adjust_cfa_offset (-20)
- cfi_restore (lr)
- cfi_restore (ip)
- cfi_restore (r3)
- cfi_restore (r1)
- cfi_restore (r0)
- pop {r2}
- cfi_adjust_cfa_offset (-4)
- cfi_restore (r2)
- ldr r1, [r0, #4]
- BX (r1)
- eabi_fnend
- cfi_endproc
- .size _dl_tlsdesc_lazy_resolver, .-_dl_tlsdesc_lazy_resolver
-
-/* Holder for lazy tls descriptors being resolve in another thread.
-
- Our calling convention is to clobber r0, r1 and the processor
- flags. All others that are modified must be saved */
- .hidden _dl_tlsdesc_resolve_hold
- .global _dl_tlsdesc_resolve_hold
- .type _dl_tlsdesc_resolve_hold,#function
- cfi_startproc
- eabi_fnstart
- .align 2
-_dl_tlsdesc_resolve_hold:
- /* r0 is saved so its original value can be used after the call and
- r1 is saved only to keep the stack aligned. (r0 points to the tls
- descriptor, it is passed to _dl_tlsdesc_resolve_hold_fixup which
- is a void function that may clobber r0, later r0 is used to load
- the new resolver.) */
- eabi_save ({r0,r1,r2,r3,ip,lr})
- push {r0, r1, r2, r3, ip, lr}
- cfi_adjust_cfa_offset (24)
- cfi_rel_offset (r0, 0)
- cfi_rel_offset (r1, 4)
- cfi_rel_offset (r2, 8)
- cfi_rel_offset (r3, 12)
- cfi_rel_offset (ip, 16)
- cfi_rel_offset (lr, 20)
- adr r1, _dl_tlsdesc_resolve_hold
- bl _dl_tlsdesc_resolve_hold_fixup
- pop {r0, r1, r2, r3, ip, lr}
- cfi_adjust_cfa_offset (-24)
- cfi_restore (lr)
- cfi_restore (ip)
- cfi_restore (r3)
- cfi_restore (r2)
- cfi_restore (r1)
- cfi_restore (r0)
- ldr r1, [r0, #4]
- BX (r1)
- eabi_fnend
- cfi_endproc
- .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
diff --git a/sysdeps/arm/dl-tlsdesc.h b/sysdeps/arm/dl-tlsdesc.h
index 2770af0..bb2c731 100644
--- a/sysdeps/arm/dl-tlsdesc.h
+++ b/sysdeps/arm/dl-tlsdesc.h
@@ -48,9 +48,7 @@ struct tlsdesc_dynamic_arg
extern ptrdiff_t attribute_hidden
_dl_tlsdesc_return(struct tlsdesc *),
- _dl_tlsdesc_undefweak(struct tlsdesc *),
- _dl_tlsdesc_resolve_hold(struct tlsdesc *),
- _dl_tlsdesc_lazy_resolver(struct tlsdesc *);
+ _dl_tlsdesc_undefweak(struct tlsdesc *);
# ifdef SHARED
extern void *_dl_make_tlsdesc_dynamic (struct link_map *map, size_t ti_offset);
diff --git a/sysdeps/arm/tlsdesc.c b/sysdeps/arm/tlsdesc.c
index ad79506..329f26e 100644
--- a/sysdeps/arm/tlsdesc.c
+++ b/sysdeps/arm/tlsdesc.c
@@ -16,130 +16,13 @@
License along with the GNU C Library. If not, see
<http://www.gnu.org/licenses/>. */
-#include <link.h>
#include <ldsodefs.h>
-#include <elf/dynamic-link.h>
#include <tls.h>
#include <dl-tlsdesc.h>
#include <dl-unmap-segments.h>
+#define _dl_tlsdesc_resolve_hold 0
#include <tlsdeschtab.h>
-/* This function is used to lazily resolve TLS_DESC REL relocations
- Besides the TLS descriptor itself, we get the module's got address
- as the second parameter. */
-
-void
-attribute_hidden
-_dl_tlsdesc_lazy_resolver_fixup (struct tlsdesc volatile *td,
- Elf32_Addr *got)
-{
- struct link_map *l = (struct link_map *)got[1];
- lookup_t result;
- unsigned long value;
-
- if (_dl_tlsdesc_resolve_early_return_p
- (td, (void*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_PLT)]) + l->l_addr)))
- return;
-
- if (td->argument.value & 0x80000000)
- {
- /* A global symbol, this is the symbol index. */
- /* The code below was borrowed from _dl_fixup(). */
- const Elf_Symndx symndx = td->argument.value ^ 0x80000000;
- const ElfW(Sym) *const symtab
- = (const void *) D_PTR (l, l_info[DT_SYMTAB]);
- const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]);
- const ElfW(Sym) *sym = &symtab[symndx];
-
- /* Look up the target symbol. If the normal lookup rules are not
- used don't look in the global scope. */
- if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL
- && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0)
- {
- const struct r_found_version *version = NULL;
-
- if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
- {
- const ElfW(Half) *vernum =
- (const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]);
- ElfW(Half) ndx = vernum[symndx] & 0x7fff;
- version = &l->l_versions[ndx];
- if (version->hash == 0)
- version = NULL;
- }
-
- result = _dl_lookup_symbol_x
- (strtab + sym->st_name, l, &sym,
- l->l_scope, version, ELF_RTYPE_CLASS_PLT,
- DL_LOOKUP_ADD_DEPENDENCY, NULL);
- if (sym)
- value = sym->st_value;
- else
- {
- td->entry = _dl_tlsdesc_undefweak;
- goto done;
- }
- }
- else
- {
- /* We already found the symbol. The module (and therefore its load
- address) is also known. */
- result = l;
- value = sym->st_value;
- }
- }
- else
- {
- /* A local symbol, this is the offset within our tls section.
- */
- value = td->argument.value;
- result = l;
- }
-
-#ifndef SHARED
- CHECK_STATIC_TLS (l, result);
-#else
- if (!TRY_STATIC_TLS (l, result))
- {
- td->argument.pointer = _dl_make_tlsdesc_dynamic (result, value);
- td->entry = _dl_tlsdesc_dynamic;
- }
- else
-#endif
- {
- td->argument.value = value + result->l_tls_offset;
- td->entry = _dl_tlsdesc_return;
- }
-
- done:
- _dl_tlsdesc_wake_up_held_fixups ();
-}
-
-/* This function is used to avoid busy waiting for other threads to
- complete the lazy relocation. Once another thread wins the race to
- relocate a TLS descriptor, it sets the descriptor up such that this
- function is called to wait until the resolver releases the
- lock. */
-
-void
-attribute_hidden
-_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc volatile *td,
- void *caller)
-{
- /* Maybe we're lucky and can return early. */
- if (caller != td->entry)
- return;
-
- /* Locking here will stop execution until the running resolver runs
- _dl_tlsdesc_wake_up_held_fixups(), releasing the lock.
-
- FIXME: We'd be better off waiting on a condition variable, such
- that we didn't have to hold the lock throughout the relocation
- processing. */
- __rtld_lock_lock_recursive (GL(dl_load_lock));
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
-}
-
/* Unmap the dynamic object, but also release its TLS descriptor table
if there is one. */
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=28e1ddf340e205cd8e1eff58e92957318c6e9966
commit 28e1ddf340e205cd8e1eff58e92957318c6e9966
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Fri Oct 20 17:44:18 2017 +0100
arm: Remove unnecessary volatile qualifier
There is no reason to treat tlsdesc entries as volatile objects.
* sysdeps/arm/dl-machine.h (elf_machine_rel): Remove volatile.
diff --git a/ChangeLog b/ChangeLog
index e1dc25c..9598180 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
+ * sysdeps/arm/dl-machine.h (elf_machine_rel): Remove volatile.
+
+2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
+
[BZ #18572]
* sysdeps/arm/dl-machine.h (elf_machine_lazy_rel): Do symbol binding
non-lazily for R_ARM_TLS_DESC.
diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h
index c59386f..ec3a027 100644
--- a/sysdeps/arm/dl-machine.h
+++ b/sysdeps/arm/dl-machine.h
@@ -464,8 +464,7 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc,
}
case R_ARM_TLS_DESC:
{
- struct tlsdesc volatile *td =
- (struct tlsdesc volatile *)reloc_addr;
+ struct tlsdesc *td = (struct tlsdesc *)reloc_addr;
# ifndef RTLD_BOOTSTRAP
if (! sym)
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=0ca3d1d6d096e222346c74601d50e9013c8bb25d
commit 0ca3d1d6d096e222346c74601d50e9013c8bb25d
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Fri Oct 20 17:35:12 2017 +0100
[BZ #18572] arm: Disable lazy initialization of tlsdesc entries
Follow up to
https://sourceware.org/ml/libc-alpha/2015-11/msg00272.html
Always do tls descriptor initialization at load time during relocation
processing (as if DF_BIND_NOW were set for the binary) to avoid barriers
at every tls access. This patch mimics bind-now semantics in the lazy
relocation code of the arm target (elf_machine_lazy_rel).
Ideally the static linker should be updated too to not emit tlsdesc
relocs in DT_REL*, so elf_machine_lazy_rel is not called on them at all.
[BZ #18572]
* sysdeps/arm/dl-machine.h (elf_machine_lazy_rel): Do symbol binding
non-lazily for R_ARM_TLS_DESC.
diff --git a/ChangeLog b/ChangeLog
index 778e41f..e1dc25c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
+ [BZ #18572]
+ * sysdeps/arm/dl-machine.h (elf_machine_lazy_rel): Do symbol binding
+ non-lazily for R_ARM_TLS_DESC.
+
+2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
+
[BZ #17078]
* sysdeps/arm/dl-machine.h (elf_machine_rela): Remove the
R_ARM_TLS_DESC case.
diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h
index bf5f5d2..c59386f 100644
--- a/sysdeps/arm/dl-machine.h
+++ b/sysdeps/arm/dl-machine.h
@@ -669,15 +669,21 @@ elf_machine_lazy_rel (struct link_map *map,
}
else if (__builtin_expect (r_type == R_ARM_TLS_DESC, 1))
{
- struct tlsdesc volatile *td =
- (struct tlsdesc volatile *)reloc_addr;
-
- /* The linker must have given us the parameter we need in the
- first GOT entry, and left the second one empty. We fill the
- latter with the resolver address. */
- assert (td->entry == 0);
- td->entry = (void*)(D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_PLT)])
- + map->l_addr);
+ const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
+ const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
+ const ElfW (Sym) *sym = &symtab[symndx];
+ const struct r_found_version *version = NULL;
+
+ if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+ {
+ const ElfW (Half) *vernum =
+ (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
+ version = &map->l_versions[vernum[symndx] & 0x7fff];
+ }
+
+ /* Always initialize TLS descriptors completely, because lazy
+ initialization requires synchronization at every TLS access. */
+ elf_machine_rel (map, reloc, sym, version, reloc_addr, skip_ifunc);
}
else
_dl_reloc_bad_type (map, r_type, 1);
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=2c1d4e5fe4e722e0b747d6bddd7ce3a6b1766c52
commit 2c1d4e5fe4e722e0b747d6bddd7ce3a6b1766c52
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Fri Oct 20 17:10:50 2017 +0100
[BZ #17078] arm: remove prelinker support for R_ARM_TLS_DESC
This patch reverts
commit 9c82da17b5794efebe005de2fd22d61a3ea4b58a
Author: Maciej W. Rozycki <macro@codesourcery.com>
Date: 2014-07-17 19:22:05 +0100
[BZ #17078] ARM: R_ARM_TLS_DESC prelinker support
This only implemented support for the lazy binding case (and thus
closed the bugzilla ticket prematurely), however tlsdesc on arm is
not correct with lazy binding because there is a data race between
the lazy initialization code and tlsdesc resolver functions.
Lazy initialization of tlsdesc entries will be removed from arm to
fix the data races and thus this half-finished prelinker support
is no longer useful.
[BZ #17078]
* sysdeps/arm/dl-machine.h (elf_machine_rela): Remove the
R_ARM_TLS_DESC case.
(elf_machine_lazy_rel): Remove the prelink check.
diff --git a/ChangeLog b/ChangeLog
index 8499ada..778e41f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
+ [BZ #17078]
+ * sysdeps/arm/dl-machine.h (elf_machine_rela): Remove the
+ R_ARM_TLS_DESC case.
+ (elf_machine_lazy_rel): Remove the prelink check.
+
+2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
+
* sysdeps/aarch64/dl-machine.h (elf_machine_runtime_setup): Remove
DT_TLSDESC_GOT initialization.
* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Remove.
diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h
index 7e2d73e..bf5f5d2 100644
--- a/sysdeps/arm/dl-machine.h
+++ b/sysdeps/arm/dl-machine.h
@@ -594,32 +594,6 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
case R_ARM_ABS32:
*reloc_addr = value + reloc->r_addend;
break;
-# ifdef RESOLVE_CONFLICT_FIND_MAP
- case R_ARM_TLS_DESC:
- {
- struct tlsdesc volatile *td __attribute__ ((unused)) =
- (struct tlsdesc volatile *) reloc_addr;
-
- RESOLVE_CONFLICT_FIND_MAP (map, reloc_addr);
-
- /* Make sure we know what's going on. */
- assert (td->entry
- == (void *) (D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_PLT)])
- + map->l_addr));
- assert (map->l_info[ADDRIDX (DT_TLSDESC_GOT)]);
-
- /* Set up the lazy resolver and store the pointer to our link
- map in _GLOBAL_OFFSET_TABLE[1] now as for a prelinked
- binary elf_machine_runtime_setup() is not called and hence
- neither has been initialized. */
- *(Elf32_Addr *) (D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_GOT)])
- + map->l_addr)
- = (Elf32_Addr) &_dl_tlsdesc_lazy_resolver;
- ((Elf32_Addr *) D_PTR (map, l_info[DT_PLTGOT]))[1]
- = (Elf32_Addr) map;
- }
- break;
-# endif /* RESOLVE_CONFLICT_FIND_MAP */
case R_ARM_PC24:
relocate_pc24 (map, value, reloc_addr, reloc->r_addend);
break;
@@ -699,11 +673,9 @@ elf_machine_lazy_rel (struct link_map *map,
(struct tlsdesc volatile *)reloc_addr;
/* The linker must have given us the parameter we need in the
- first GOT entry, and left the second one empty. The latter
- will have been preset by the prelinker if used though.
- We fill it with the resolver address. */
- assert (td->entry == 0
- || map->l_info[VALIDX (DT_GNU_PRELINKED)] != NULL);
+ first GOT entry, and left the second one empty. We fill the
+ latter with the resolver address. */
+ assert (td->entry == 0);
td->entry = (void*)(D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_PLT)])
+ map->l_addr);
}
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=91c5a366d8d398d2fc4542f961c93058a92ade6f
commit 91c5a366d8d398d2fc4542f961c93058a92ade6f
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Wed Sep 27 18:14:21 2017 +0100
aarch64: Remove barriers from TLS descriptor functions
Remove ldar synchronization and most lazy TLSDESC initialization
related code.
* sysdeps/aarch64/dl-machine.h (elf_machine_runtime_setup): Remove
DT_TLSDESC_GOT initialization.
* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Remove.
(_dl_tlsdesc_resolve_rela): Likewise.
(_dl_tlsdesc_resolve_hold): Likewise.
(_dl_tlsdesc_undefweak): Remove ldar.
(_dl_tlsdesc_dynamic): Likewise.
* sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_return_lazy): Remove.
(_dl_tlsdesc_resolve_rela): Likewise.
(_dl_tlsdesc_resolve_hold): Likewise.
* sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_resolve_rela_fixup): Remove.
(_dl_tlsdesc_resolve_hold_fixup): Likewise.
(_dl_tlsdesc_resolve_rela): Likewise.
(_dl_tlsdesc_resolve_hold): Likewise.
diff --git a/ChangeLog b/ChangeLog
index 0126d77..8499ada 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,22 @@
2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
+ * sysdeps/aarch64/dl-machine.h (elf_machine_runtime_setup): Remove
+ DT_TLSDESC_GOT initialization.
+ * sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Remove.
+ (_dl_tlsdesc_resolve_rela): Likewise.
+ (_dl_tlsdesc_resolve_hold): Likewise.
+ (_dl_tlsdesc_undefweak): Remove ldar.
+ (_dl_tlsdesc_dynamic): Likewise.
+ * sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_return_lazy): Remove.
+ (_dl_tlsdesc_resolve_rela): Likewise.
+ (_dl_tlsdesc_resolve_hold): Likewise.
+ * sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_resolve_rela_fixup): Remove.
+ (_dl_tlsdesc_resolve_hold_fixup): Likewise.
+ (_dl_tlsdesc_resolve_rela): Likewise.
+ (_dl_tlsdesc_resolve_hold): Likewise.
+
+2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
+
* sysdeps/aarch64/dl-machine.h (elf_machine_lazy_rel): Do symbol
binding and initialization non-lazily for R_AARCH64_TLSDESC.
diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
index 837e281..acbd3e3 100644
--- a/sysdeps/aarch64/dl-machine.h
+++ b/sysdeps/aarch64/dl-machine.h
@@ -102,10 +102,6 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
}
}
- if (l->l_info[ADDRIDX (DT_TLSDESC_GOT)] && lazy)
- *(ElfW(Addr)*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_GOT)]) + l->l_addr)
- = (ElfW(Addr)) &_dl_tlsdesc_resolve_rela;
-
return lazy;
}
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index 3b2da62..70550c7 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -80,30 +80,6 @@ _dl_tlsdesc_return:
cfi_endproc
.size _dl_tlsdesc_return, .-_dl_tlsdesc_return
- /* Same as _dl_tlsdesc_return but with synchronization for
- lazy relocation.
- Prototype:
- _dl_tlsdesc_return_lazy (tlsdesc *) ;
- */
- .hidden _dl_tlsdesc_return_lazy
- .global _dl_tlsdesc_return_lazy
- .type _dl_tlsdesc_return_lazy,%function
- cfi_startproc
- .align 2
-_dl_tlsdesc_return_lazy:
- /* The ldar here happens after the load from [x0] at the call site
- (that is generated by the compiler as part of the TLS access ABI),
- so it reads the same value (this function is the final value of
- td->entry) and thus it synchronizes with the release store to
- td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
- from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
- DELOUSE (0)
- ldar PTR_REG (zr), [x0]
- ldr PTR_REG (0), [x0, #PTR_SIZE]
- RET
- cfi_endproc
- .size _dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy
-
/* Handler for undefined weak TLS symbols.
Prototype:
_dl_tlsdesc_undefweak (tlsdesc *);
@@ -121,14 +97,7 @@ _dl_tlsdesc_return_lazy:
_dl_tlsdesc_undefweak:
str x1, [sp, #-16]!
cfi_adjust_cfa_offset (16)
- /* The ldar here happens after the load from [x0] at the call site
- (that is generated by the compiler as part of the TLS access ABI),
- so it reads the same value (this function is the final value of
- td->entry) and thus it synchronizes with the release store to
- td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
- from [x0,#8] here happens after the initialization of td->arg. */
DELOUSE (0)
- ldar PTR_REG (zr), [x0]
ldr PTR_REG (0), [x0, #PTR_SIZE]
mrs x1, tpidr_el0
sub PTR_REG (0), PTR_REG (0), PTR_REG (1)
@@ -192,13 +161,6 @@ _dl_tlsdesc_dynamic:
cfi_rel_offset (x4, 32+24)
mrs x4, tpidr_el0
- /* The ldar here happens after the load from [x0] at the call site
- (that is generated by the compiler as part of the TLS access ABI),
- so it reads the same value (this function is the final value of
- td->entry) and thus it synchronizes with the release store to
- td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
- from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
- ldar PTR_REG (zr), [x0]
ldr PTR_REG (1), [x0,#TLSDESC_ARG]
ldr PTR_REG (0), [x4,#TCBHEAD_DTV]
ldr PTR_REG (3), [x1,#TLSDESC_GEN_COUNT]
@@ -276,168 +238,3 @@ _dl_tlsdesc_dynamic:
.size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
# undef NSAVEXREGPAIRS
#endif
-
- /* This function is a wrapper for a lazy resolver for TLS_DESC
- RELA relocations.
- When the actual resolver returns, it will have adjusted the
- TLS descriptor such that we can tail-call it for it to return
- the TP offset of the symbol. */
-
- .hidden _dl_tlsdesc_resolve_rela
- .global _dl_tlsdesc_resolve_rela
- .type _dl_tlsdesc_resolve_rela,%function
- cfi_startproc
- .align 2
-_dl_tlsdesc_resolve_rela:
-#define NSAVEXREGPAIRS 9
- /* The tlsdesc PLT entry pushes x2 and x3 to the stack. */
- cfi_adjust_cfa_offset (16)
- cfi_rel_offset (x2, 0)
- cfi_rel_offset (x3, 8)
- stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
- cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
- cfi_rel_offset (x29, 0)
- cfi_rel_offset (x30, 8)
- mov x29, sp
- stp x1, x4, [sp, #32+16*0]
- stp x5, x6, [sp, #32+16*1]
- stp x7, x8, [sp, #32+16*2]
- stp x9, x10, [sp, #32+16*3]
- stp x11, x12, [sp, #32+16*4]
- stp x13, x14, [sp, #32+16*5]
- stp x15, x16, [sp, #32+16*6]
- stp x17, x18, [sp, #32+16*7]
- str x0, [sp, #32+16*8]
- cfi_rel_offset (x1, 32)
- cfi_rel_offset (x4, 32+8)
- cfi_rel_offset (x5, 32+16)
- cfi_rel_offset (x6, 32+16+8)
- cfi_rel_offset (x7, 32+16*2)
- cfi_rel_offset (x8, 32+16*2+8)
- cfi_rel_offset (x9, 32+16*3)
- cfi_rel_offset (x10, 32+16*3+8)
- cfi_rel_offset (x11, 32+16*4)
- cfi_rel_offset (x12, 32+16*4+8)
- cfi_rel_offset (x13, 32+16*5)
- cfi_rel_offset (x14, 32+16*5+8)
- cfi_rel_offset (x15, 32+16*6)
- cfi_rel_offset (x16, 32+16*6+8)
- cfi_rel_offset (x17, 32+16*7)
- cfi_rel_offset (x18, 32+16*7+8)
- cfi_rel_offset (x0, 32+16*8)
-
- SAVE_Q_REGISTERS
-
- DELOUSE (3)
- ldr PTR_REG (1), [x3, #PTR_SIZE]
- bl _dl_tlsdesc_resolve_rela_fixup
-
- RESTORE_Q_REGISTERS
-
- ldr x0, [sp, #32+16*8]
- DELOUSE (0)
- ldr PTR_REG (1), [x0]
- blr x1
-
- ldp x1, x4, [sp, #32+16*0]
- ldp x5, x6, [sp, #32+16*1]
- ldp x7, x8, [sp, #32+16*2]
- ldp x9, x10, [sp, #32+16*3]
- ldp x11, x12, [sp, #32+16*4]
- ldp x13, x14, [sp, #32+16*5]
- ldp x15, x16, [sp, #32+16*6]
- ldp x17, x18, [sp, #32+16*7]
- ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
- cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
- cfi_restore (x29)
- cfi_restore (x30)
- ldp x2, x3, [sp], #16
- cfi_adjust_cfa_offset (-16)
- RET
-#undef NSAVEXREGPAIRS
- cfi_endproc
- .size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
-
- /* This function is a placeholder for lazy resolving of TLS
- relocations. Once some thread starts resolving a TLS
- relocation, it sets up the TLS descriptor to use this
- resolver, such that other threads that would attempt to
- resolve it concurrently may skip the call to the original lazy
- resolver and go straight to a condition wait.
-
- When the actual resolver returns, it will have adjusted the
- TLS descriptor such that we can tail-call it for it to return
- the TP offset of the symbol. */
-
- .hidden _dl_tlsdesc_resolve_hold
- .global _dl_tlsdesc_resolve_hold
- .type _dl_tlsdesc_resolve_hold,%function
- cfi_startproc
- .align 2
-_dl_tlsdesc_resolve_hold:
-#define NSAVEXREGPAIRS 10
-1:
- stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
- cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
- cfi_rel_offset (x29, 0)
- cfi_rel_offset (x30, 8)
- mov x29, sp
- stp x1, x2, [sp, #32+16*0]
- stp x3, x4, [sp, #32+16*1]
- stp x5, x6, [sp, #32+16*2]
- stp x7, x8, [sp, #32+16*3]
- stp x9, x10, [sp, #32+16*4]
- stp x11, x12, [sp, #32+16*5]
- stp x13, x14, [sp, #32+16*6]
- stp x15, x16, [sp, #32+16*7]
- stp x17, x18, [sp, #32+16*8]
- str x0, [sp, #32+16*9]
- cfi_rel_offset (x1, 32)
- cfi_rel_offset (x2, 32+8)
- cfi_rel_offset (x3, 32+16)
- cfi_rel_offset (x4, 32+16+8)
- cfi_rel_offset (x5, 32+16*2)
- cfi_rel_offset (x6, 32+16*2+8)
- cfi_rel_offset (x7, 32+16*3)
- cfi_rel_offset (x8, 32+16*3+8)
- cfi_rel_offset (x9, 32+16*4)
- cfi_rel_offset (x10, 32+16*4+8)
- cfi_rel_offset (x11, 32+16*5)
- cfi_rel_offset (x12, 32+16*5+8)
- cfi_rel_offset (x13, 32+16*6)
- cfi_rel_offset (x14, 32+16*6+8)
- cfi_rel_offset (x15, 32+16*7)
- cfi_rel_offset (x16, 32+16*7+8)
- cfi_rel_offset (x17, 32+16*8)
- cfi_rel_offset (x18, 32+16*8+8)
- cfi_rel_offset (x0, 32+16*9)
-
- SAVE_Q_REGISTERS
-
- adr x1, 1b
- bl _dl_tlsdesc_resolve_hold_fixup
-
- RESTORE_Q_REGISTERS
-
- ldr x0, [sp, #32+16*9]
- DELOUSE (0)
- ldr PTR_REG (1), [x0]
- blr x1
-
- ldp x1, x2, [sp, #32+16*0]
- ldp x3, x4, [sp, #32+16*1]
- ldp x5, x6, [sp, #32+16*2]
- ldp x7, x8, [sp, #32+16*3]
- ldp x9, x10, [sp, #32+16*4]
- ldp x11, x12, [sp, #32+16*5]
- ldp x13, x14, [sp, #32+16*6]
- ldp x15, x16, [sp, #32+16*7]
- ldp x17, x18, [sp, #32+16*8]
- ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
- cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
- cfi_restore (x29)
- cfi_restore (x30)
- RET
- cfi_endproc
- .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
-#undef NSAVEXREGPAIRS
diff --git a/sysdeps/aarch64/dl-tlsdesc.h b/sysdeps/aarch64/dl-tlsdesc.h
index 66ec0de..25b5b64 100644
--- a/sysdeps/aarch64/dl-tlsdesc.h
+++ b/sysdeps/aarch64/dl-tlsdesc.h
@@ -46,17 +46,8 @@ extern ptrdiff_t attribute_hidden
_dl_tlsdesc_return (struct tlsdesc *);
extern ptrdiff_t attribute_hidden
-_dl_tlsdesc_return_lazy (struct tlsdesc *);
-
-extern ptrdiff_t attribute_hidden
_dl_tlsdesc_undefweak (struct tlsdesc *);
-extern ptrdiff_t attribute_hidden
-_dl_tlsdesc_resolve_rela (struct tlsdesc *);
-
-extern ptrdiff_t attribute_hidden
-_dl_tlsdesc_resolve_hold (struct tlsdesc *);
-
# ifdef SHARED
extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
diff --git a/sysdeps/aarch64/tlsdesc.c b/sysdeps/aarch64/tlsdesc.c
index 4c8a5a8..bd1356f 100644
--- a/sysdeps/aarch64/tlsdesc.c
+++ b/sysdeps/aarch64/tlsdesc.c
@@ -18,137 +18,12 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <link.h>
#include <ldsodefs.h>
-#include <elf/dynamic-link.h>
#include <tls.h>
#include <dl-tlsdesc.h>
#include <dl-unmap-segments.h>
+#define _dl_tlsdesc_resolve_hold 0
#include <tlsdeschtab.h>
-#include <atomic.h>
-
-/* The following functions take an entry_check_offset argument. It's
- computed by the caller as an offset between its entry point and the
- call site, such that by adding the built-in return address that is
- implicitly passed to the function with this offset, we can easily
- obtain the caller's entry point to compare with the entry point
- given in the TLS descriptor. If it's changed, we want to return
- immediately. */
-
-/* This function is used to lazily resolve TLS_DESC RELA relocations.
- The argument location is used to hold a pointer to the relocation. */
-
-void
-attribute_hidden
-_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc *td, struct link_map *l)
-{
- const ElfW(Rela) *reloc = atomic_load_relaxed (&td->arg);
-
- /* After GL(dl_load_lock) is grabbed only one caller can see td->entry in
- initial state in _dl_tlsdesc_resolve_early_return_p, other concurrent
- callers will return and retry calling td->entry. The updated td->entry
- synchronizes with the single writer so all read accesses here can use
- relaxed order. */
- if (_dl_tlsdesc_resolve_early_return_p
- (td, (void*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_PLT)]) + l->l_addr)))
- return;
-
- /* The code below was borrowed from _dl_fixup(),
- except for checking for STB_LOCAL. */
- const ElfW(Sym) *const symtab
- = (const void *) D_PTR (l, l_info[DT_SYMTAB]);
- const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]);
- const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)];
- lookup_t result;
-
- /* Look up the target symbol. If the normal lookup rules are not
- used don't look in the global scope. */
- if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL
- && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0)
- {
- const struct r_found_version *version = NULL;
-
- if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
- {
- const ElfW(Half) *vernum =
- (const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]);
- ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff;
- version = &l->l_versions[ndx];
- if (version->hash == 0)
- version = NULL;
- }
-
- result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym,
- l->l_scope, version, ELF_RTYPE_CLASS_PLT,
- DL_LOOKUP_ADD_DEPENDENCY, NULL);
- }
- else
- {
- /* We already found the symbol. The module (and therefore its load
- address) is also known. */
- result = l;
- }
-
- if (!sym)
- {
- atomic_store_relaxed (&td->arg, (void *) reloc->r_addend);
- /* This release store synchronizes with the ldar acquire load
- instruction in _dl_tlsdesc_undefweak. */
- atomic_store_release (&td->entry, _dl_tlsdesc_undefweak);
- }
- else
- {
-# ifndef SHARED
- CHECK_STATIC_TLS (l, result);
-# else
- if (!TRY_STATIC_TLS (l, result))
- {
- void *p = _dl_make_tlsdesc_dynamic (result, sym->st_value
- + reloc->r_addend);
- atomic_store_relaxed (&td->arg, p);
- /* This release store synchronizes with the ldar acquire load
- instruction in _dl_tlsdesc_dynamic. */
- atomic_store_release (&td->entry, _dl_tlsdesc_dynamic);
- }
- else
-# endif
- {
- void *p = (void*) (sym->st_value + result->l_tls_offset
- + reloc->r_addend);
- atomic_store_relaxed (&td->arg, p);
- /* This release store synchronizes with the ldar acquire load
- instruction in _dl_tlsdesc_return_lazy. */
- atomic_store_release (&td->entry, _dl_tlsdesc_return_lazy);
- }
- }
-
- _dl_tlsdesc_wake_up_held_fixups ();
-}
-
-/* This function is used to avoid busy waiting for other threads to
- complete the lazy relocation. Once another thread wins the race to
- relocate a TLS descriptor, it sets the descriptor up such that this
- function is called to wait until the resolver releases the
- lock. */
-
-void
-attribute_hidden
-_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc *td, void *caller)
-{
- /* Maybe we're lucky and can return early. */
- if (caller != atomic_load_relaxed (&td->entry))
- return;
-
- /* Locking here will stop execution until the running resolver runs
- _dl_tlsdesc_wake_up_held_fixups(), releasing the lock.
-
- FIXME: We'd be better off waiting on a condition variable, such
- that we didn't have to hold the lock throughout the relocation
- processing. */
- __rtld_lock_lock_recursive (GL(dl_load_lock));
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
-}
-
/* Unmap the dynamic object, but also release its TLS descriptor table
if there is one. */
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=b7cf203b5c17dd6d9878537d41e0c7cc3d270a67
commit b7cf203b5c17dd6d9878537d41e0c7cc3d270a67
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Wed Sep 27 16:55:14 2017 +0100
aarch64: Disable lazy symbol binding of TLSDESC
Always do TLS descriptor initialization at load time during relocation
processing to avoid barriers at every TLS access. In non-dlopened shared
libraries the overhead of tls access vs static global access is > 3x
bigger when lazy initialization is used (_dl_tlsdesc_return_lazy)
compared to bind-now (_dl_tlsdesc_return) so the barriers dominate tls
access performance.
TLSDESC relocs are in DT_JMPREL which are processed at load time using
elf_machine_lazy_rel which is only supposed to do lightweight
initialization using the DT_TLSDESC_PLT trampoline (the trampoline code
jumps to the entry point in DT_TLSDESC_GOT which does the lazy tlsdesc
initialization at runtime). This patch changes elf_machine_lazy_rel
in aarch64 to do the symbol binding and initialization as if DF_BIND_NOW
was set, so the non-lazy code path of elf/do-rel.h was replicated.
The static linker could be changed to emit TLSDESC relocs in DT_REL*,
which are processed non-lazily, but the goal of this patch is to always
guarantee bind-now semantics, even if the binary was produced with an
old linker, so the barriers can be dropped in tls descriptor functions.
After this change the synchronizing ldar instructions can be dropped
as well as the lazy initialization machinery including the DT_TLSDESC_GOT
setup.
I believe this should be done on all targets, including ones where no
barrier is needed for lazy initialization. There is very little gain in
optimizing for large number of symbolic tlsdesc relocations which is an
extremely uncommon case. And currently the tlsdesc entries are only
readonly protected with -z now and some hardennings against writable
JUMPSLOT relocs don't work for TLSDESC so they are a security hazard.
(But to fix that the static linker has to be changed.)
* sysdeps/aarch64/dl-machine.h (elf_machine_lazy_rel): Do symbol
binding and initialization non-lazily for R_AARCH64_TLSDESC.
diff --git a/ChangeLog b/ChangeLog
index 44e18dd..0126d77 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
+ * sysdeps/aarch64/dl-machine.h (elf_machine_lazy_rel): Do symbol
+ binding and initialization non-lazily for R_AARCH64_TLSDESC.
+
+2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
+
* elf/tlsdeschtab.h (_dl_tls_resolve_early_return_p): Mark unused.
(_dl_tlsdesc_wake_up_held_fixups): Likewise.
diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
index e765612..837e281 100644
--- a/sysdeps/aarch64/dl-machine.h
+++ b/sysdeps/aarch64/dl-machine.h
@@ -399,12 +399,21 @@ elf_machine_lazy_rel (struct link_map *map,
}
else if (__builtin_expect (r_type == AARCH64_R(TLSDESC), 1))
{
- struct tlsdesc volatile *td =
- (struct tlsdesc volatile *)reloc_addr;
+ const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
+ const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
+ const ElfW (Sym) *sym = &symtab[symndx];
+ const struct r_found_version *version = NULL;
- td->arg = (void*)reloc;
- td->entry = (void*)(D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_PLT)])
- + map->l_addr);
+ if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+ {
+ const ElfW (Half) *vernum =
+ (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
+ version = &map->l_versions[vernum[symndx] & 0x7fff];
+ }
+
+ /* Always initialize TLS descriptors completely, because lazy
+ initialization requires synchronization at every TLS access. */
+ elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc);
}
else if (__glibc_unlikely (r_type == AARCH64_R(IRELATIVE)))
{
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=0cc5b022f817eeaa81735ae58717b5dabae92941
commit 0cc5b022f817eeaa81735ae58717b5dabae92941
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Mon Oct 23 12:15:40 2017 +0100
Mark lazy tlsdesc helper functions unused to avoid warnings
These static functions are not needed if a target does not do lazy
tlsdesc initialization.
* elf/tlsdeschtab.h (_dl_tls_resolve_early_return_p): Mark unused.
(_dl_tlsdesc_wake_up_held_fixups): Likewise.
diff --git a/ChangeLog b/ChangeLog
index 626828e..44e18dd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
+
+ * elf/tlsdeschtab.h (_dl_tls_resolve_early_return_p): Mark unused.
+ (_dl_tlsdesc_wake_up_held_fixups): Likewise.
+
2017-11-02 Joseph Myers <joseph@codesourcery.com>
* wcsmbs/wchar.h [__HAVE_FLOAT16 && __USE_GNU] (wcstof16):
diff --git a/elf/tlsdeschtab.h b/elf/tlsdeschtab.h
index ad3001d..8796318 100644
--- a/elf/tlsdeschtab.h
+++ b/elf/tlsdeschtab.h
@@ -137,6 +137,7 @@ _dl_make_tlsdesc_dynamic (struct link_map *map, size_t ti_offset)
avoid introducing such dependencies. */
static int
+__attribute__ ((unused))
_dl_tlsdesc_resolve_early_return_p (struct tlsdesc volatile *td, void *caller)
{
if (caller != atomic_load_relaxed (&td->entry))
@@ -155,6 +156,7 @@ _dl_tlsdesc_resolve_early_return_p (struct tlsdesc volatile *td, void *caller)
}
static void
+__attribute__ ((unused))
_dl_tlsdesc_wake_up_held_fixups (void)
{
__rtld_lock_unlock_recursive (GL(dl_load_lock));
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 59 ++++++++
elf/tlsdeschtab.h | 2 +
sysdeps/aarch64/dl-machine.h | 23 ++--
sysdeps/aarch64/dl-tlsdesc.S | 308 +++++++-----------------------------------
sysdeps/aarch64/dl-tlsdesc.h | 9 --
sysdeps/aarch64/tlsdesc.c | 127 +-----------------
sysdeps/arm/dl-machine.h | 59 +++------
sysdeps/arm/dl-tlsdesc.S | 84 ------------
sysdeps/arm/dl-tlsdesc.h | 4 +-
sysdeps/arm/tlsdesc.c | 119 +----------------
10 files changed, 145 insertions(+), 649 deletions(-)
hooks/post-receive
--
GNU C Library master sources