This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: powerpc _tls_get_addr call optimization
- From: Alan Modra <amodra at gmail dot com>
- To: libc-alpha at sourceware dot org
- Date: Fri, 12 Feb 2010 23:46:07 +1030
- Subject: Re: powerpc _tls_get_addr call optimization
- References: <20090330075541.GM30645@bubble.grove.modra.org>
This is a repost of
http://sourceware.org/ml/libc-alpha/2009-03/msg00053.html fixing a
problem Roland pointed out (thanks!). ld and glibc now co-operate in
this optimization. ld won't generate the fancy __tls_get_addr call
stub unless it sees a magic glibc symbol, __tls_get_addr_opt, and
glibc will only optimize the tls_index module/offset pair if an
executable or shared lib has a DT_PPC_TLSOPT dynamic tag.
The elf/tst-tls-dlinfo regression is no longer present due to a quirk
in the way ld operates. Most of the tls test executables and
libraries are not linked against ld.so, as is usually done when
libc.so is a link script. When linking executables, ld looks at
DT_NEEDED entries of libraries to resolve undefined symbols. It
doesn't do this when building shared libraries. The result is that
when linking most of the test libraries, ld does not see a defined
__tls_get_addr_opt, and thus doesn't generate the optimized
__tls_get_addr call stub. However when linking test executables that
call __tls_get_addr, ld will pull in ld.so and thus do the
optimization. I think that's just fine. :)
See my original post for the description of the dl-open.c bug and fix.
* elf/dl-open.c (dl_open_worker): Delay running _dl_update_slotinfo
until all loaded modules have called _dl_add_to_slotinfo, and then
only call it once.
* elf/elf.h (R_PPC_TLSGD, R_PPC_TLSLD, DT_PPC_TLSOPT): Define.
(R_PPC64_TLSGD, R_PPC64_TLSLD, DT_PPC64_TLSOPT): Define.
(DT_PPC_NUM, DT_PPC64_NUM): Increment.
* sysdeps/powerpc/powerpc32/dl-machine.h (elf_machine_rela): Optimize
TLS descriptors.
* sysdeps/powerpc/powerpc64/dl-machine.h (elf_machine_rela): Likewise.
* sysdeps/powerpc/dl-tls.c: New file.
* sysdeps/powerpc/Versions: Add __tls_get_addr_opt.
* Versions.def: Add ld GLIBC_2.12.
diff --git a/elf/dl-open.c b/elf/dl-open.c
index 754a263..5e2ed9c 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -375,7 +375,7 @@ dl_open_worker (void *a)
/* If the file is not loaded now as a dependency, add the search
list of the newly loaded object to the scope. */
- bool any_tls = false;
+ struct link_map *any_tls = NULL;
for (unsigned int i = 0; i < new->l_searchlist.r_nlist; ++i)
{
struct link_map *imap = new->l_searchlist.r_list[i];
@@ -467,27 +467,37 @@ dl_open_worker (void *a)
if (! RTLD_SINGLE_THREAD_P && imap->l_tls_modid > DTV_SURPLUS)
_dl_signal_error (0, "dlopen", NULL, N_("\
cannot load any more object with static TLS"));
+ }
- imap->l_need_tls_init = 0;
+ any_tls = imap;
+ }
+ }
+
+ if (any_tls != NULL)
+ {
#ifdef SHARED
- /* Update the slot information data for at least the
- generation of the DSO we are allocating data for. */
- _dl_update_slotinfo (imap->l_tls_modid);
+ /* Update the slot information data for at least the
+ generation of the DSO we are allocating data for. */
+ _dl_update_slotinfo (any_tls->l_tls_modid);
#endif
+ for (unsigned int i = 0; i < new->l_searchlist.r_nlist; ++i)
+ {
+ struct link_map *imap = new->l_searchlist.r_list[i];
+
+ if (!imap->l_init_called && imap->l_need_tls_init)
+ {
+ imap->l_need_tls_init = 0;
GL(dl_init_static_tls) (imap);
assert (imap->l_need_tls_init == 0);
}
-
- /* We have to bump the generation counter. */
- any_tls = true;
}
- }
- /* Bump the generation number if necessary. */
- if (any_tls && __builtin_expect (++GL(dl_tls_generation) == 0, 0))
- _dl_fatal_printf (N_("\
+ /* Bump the generation number. */
+ if (__builtin_expect (++GL(dl_tls_generation) == 0, 0))
+ _dl_fatal_printf (N_("\
TLS generation counter wrapped! Please report this."));
+ }
/* Run the initializer functions of new objects. */
_dl_init (new, args->argc, args->argv, args->env);
diff --git a/elf/elf.h b/elf/elf.h
index 8af7c17..5de0a0c 100644
--- a/elf/elf.h
+++ b/elf/elf.h
@@ -2046,6 +2046,8 @@ typedef Elf32_Addr Elf32_Conflict;
#define R_PPC_GOT_DTPREL16_LO 92 /* half16* (sym+add)@got@dtprel@l */
#define R_PPC_GOT_DTPREL16_HI 93 /* half16* (sym+add)@got@dtprel@h */
#define R_PPC_GOT_DTPREL16_HA 94 /* half16* (sym+add)@got@dtprel@ha */
+#define R_PPC_TLSGD 95 /* none (sym+add)@tlsgd */
+#define R_PPC_TLSLD 96 /* none (sym+add)@tlsld */
/* The remaining relocs are from the Embedded ELF ABI, and are not
in the SVR4 ELF ABI. */
@@ -2089,7 +2091,8 @@ typedef Elf32_Addr Elf32_Conflict;
/* PowerPC specific values for the Dyn d_tag field. */
#define DT_PPC_GOT (DT_LOPROC + 0)
-#define DT_PPC_NUM 1
+#define DT_PPC_TLSOPT (DT_LOPROC + 1)
+#define DT_PPC_NUM 2
/* PowerPC64 relocations defined by the ABIs */
#define R_PPC64_NONE R_PPC_NONE
@@ -2202,6 +2205,8 @@ typedef Elf32_Addr Elf32_Conflict;
#define R_PPC64_DTPREL16_HIGHERA 104 /* half16 (sym+add)@dtprel@highera */
#define R_PPC64_DTPREL16_HIGHEST 105 /* half16 (sym+add)@dtprel@highest */
#define R_PPC64_DTPREL16_HIGHESTA 106 /* half16 (sym+add)@dtprel@highesta */
+#define R_PPC64_TLSGD 107 /* none (sym+add)@tlsgd */
+#define R_PPC64_TLSLD 108 /* none (sym+add)@tlsld */
/* GNU extension to support local ifunc. */
#define R_PPC64_JMP_IREL 247
@@ -2215,7 +2220,8 @@ typedef Elf32_Addr Elf32_Conflict;
#define DT_PPC64_GLINK (DT_LOPROC + 0)
#define DT_PPC64_OPD (DT_LOPROC + 1)
#define DT_PPC64_OPDSZ (DT_LOPROC + 2)
-#define DT_PPC64_NUM 3
+#define DT_PPC64_TLSOPT (DT_LOPROC + 3)
+#define DT_PPC64_NUM 4
/* ARM specific declarations */
diff --git a/sysdeps/powerpc/powerpc32/dl-machine.h b/sysdeps/powerpc/powerpc32/dl-machine.h
index 6f8d0f5..df5d377 100644
--- a/sysdeps/powerpc/powerpc32/dl-machine.h
+++ b/sysdeps/powerpc/powerpc32/dl-machine.h
@@ -343,6 +343,31 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
# endif
case R_PPC_DTPMOD32:
+ if (map->l_info[DT_PPC(TLSOPT)] != 0)
+ {
+ if (!NOT_BOOTSTRAP)
+ {
+ reloc_addr[0] = 0;
+ reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET
+ + TLS_DTV_OFFSET);
+ break;
+ }
+ else if (sym_map != NULL)
+ {
+# ifndef SHARED
+ CHECK_STATIC_TLS (map, sym_map);
+# else
+ if (TRY_STATIC_TLS (map, sym_map))
+# endif
+ {
+ reloc_addr[0] = 0;
+ /* Set up for local dynamic. */
+ reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET
+ + TLS_DTV_OFFSET);
+ break;
+ }
+ }
+ }
if (!NOT_BOOTSTRAP)
/* During startup the dynamic linker is always index 1. */
*reloc_addr = 1;
@@ -352,6 +377,26 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
*reloc_addr = sym_map->l_tls_modid;
break;
case R_PPC_DTPREL32:
+ if (map->l_info[DT_PPC(TLSOPT)] != 0)
+ {
+ if (!NOT_BOOTSTRAP)
+ {
+ *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc);
+ break;
+ }
+ else if (sym_map != NULL)
+ {
+# ifndef SHARED
+ CHECK_STATIC_TLS (map, sym_map);
+# else
+ if (TRY_STATIC_TLS (map, sym_map))
+# endif
+ {
+ *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc);
+ break;
+ }
+ }
+ }
/* During relocation all TLS symbols are defined and used.
Therefore the offset is already correct. */
if (NOT_BOOTSTRAP && sym_map != NULL)
diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h
index 0088858..9fcdb31 100644
--- a/sysdeps/powerpc/powerpc64/dl-machine.h
+++ b/sysdeps/powerpc/powerpc64/dl-machine.h
@@ -613,6 +613,31 @@ elf_machine_rela (struct link_map *map,
#if !defined RTLD_BOOTSTRAP || USE___THREAD
case R_PPC64_DTPMOD64:
+ if (map->l_info[DT_PPC64(TLSOPT)] != 0)
+ {
+# ifdef RTLD_BOOTSTRAP
+ reloc_addr[0] = 0;
+ reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET
+ + TLS_DTV_OFFSET);
+ return;
+# else
+ if (sym_map != NULL)
+ {
+# ifndef SHARED
+ CHECK_STATIC_TLS (map, sym_map);
+# else
+ if (TRY_STATIC_TLS (map, sym_map))
+# endif
+ {
+ reloc_addr[0] = 0;
+ /* Set up for local dynamic. */
+ reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET
+ + TLS_DTV_OFFSET);
+ return;
+ }
+ }
+# endif
+ }
# ifdef RTLD_BOOTSTRAP
/* During startup the dynamic linker is always index 1. */
*reloc_addr = 1;
@@ -625,6 +650,26 @@ elf_machine_rela (struct link_map *map,
return;
case R_PPC64_DTPREL64:
+ if (map->l_info[DT_PPC64(TLSOPT)] != 0)
+ {
+# ifdef RTLD_BOOTSTRAP
+ *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc);
+ return;
+# else
+ if (sym_map != NULL)
+ {
+# ifndef SHARED
+ CHECK_STATIC_TLS (map, sym_map);
+# else
+ if (TRY_STATIC_TLS (map, sym_map))
+# endif
+ {
+ *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc);
+ return;
+ }
+ }
+# endif
+ }
/* During relocation all TLS symbols are defined and used.
Therefore the offset is already correct. */
# ifndef RTLD_BOOTSTRAP
diff --git a/sysdeps/powerpc/dl-tls.c b/sysdeps/powerpc/dl-tls.c
new file mode 100644
index 0000000..f61e91c
--- /dev/null
+++ b/sysdeps/powerpc/dl-tls.c
@@ -0,0 +1,24 @@
+/* Thread-local storage handling in the ELF dynamic linker. PowerPC version.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include "elf/dl-tls.c"
+
+#ifdef SHARED
+strong_alias(__tls_get_addr, __tls_get_addr_opt)
+#endif
diff --git a/sysdeps/powerpc/Versions b/sysdeps/powerpc/Versions
index 47c2c3e..b032fe9 100644
--- a/sysdeps/powerpc/Versions
+++ b/sysdeps/powerpc/Versions
@@ -15,3 +15,9 @@ libc {
__vmx__libc_longjmp; __vmx__libc_siglongjmp;
}
}
+
+ld {
+ GLIBC_2.12 {
+ __tls_get_addr_opt;
+ }
+}
diff --git a/Versions.def b/Versions.def
index eab006b..1b3e101 100644
--- a/Versions.def
+++ b/Versions.def
@@ -119,6 +119,7 @@ ld {
GLIBC_2.1
GLIBC_2.3
GLIBC_2.4
+ GLIBC_2.12
GLIBC_PRIVATE
}
libthread_db {
--
Alan Modra
Australia Development Lab, IBM