This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] x86-64: Align the stack in __tls_get_addr [BZ #21609]


On Mon, Jul 3, 2017 at 9:47 PM, Florian Weimer <fweimer@redhat.com> wrote:
> On 07/03/2017 10:25 PM, H.J. Lu wrote:
>> I prefer to let GCC realign the stack for us.  What do you think?
>
> I tried that first, but it does not work with all the GCC versions we

That is true.

> support.  We would have to add a configure check and hope that older GCC
> versions never generate code which needs an aligned stack.
>
> Furthermore, the code GCC generates for stack realignment is really bad,

GCC generates very good code for stack realignment when
-maccumulate-outgoing-args is used.

> and __tls_get_addr appears to be quite performance-critical because not
> all callers cache the result.
>

Some comments on the patch at

https://sourceware.org/ml/libc-alpha/2017-06/msg00922.html

# define __tls_get_addr __tls_get_addr_default
+# include <elf/dl-tls.c>
+
+# undef __tls_get_addr_default
              ^^^^^^^ Shouldn't it be __tls_get_addr?

-#include <stdint.h>
-
 #ifndef _X86_64_DL_TLSDESC_H
-# define _X86_64_DL_TLSDESC_H 1
+#define _X86_64_DL_TLSDESC_H
+
+#include <stdint.h>
+#include <dl-tls.h>

 /* Type used to represent a TLS descriptor in the GOT.  */
 struct tlsdesc
@@ -39,12 +40,6 @@ struct tlsdesc
     };
 };

-typedef struct dl_tls_index
-{
-  uint64_t ti_module;
-  uint64_t ti_offset;
-} tls_index;
-
 /* Type used as the argument in a TLS descriptor for a symbol that
    needs dynamic TLS offsets.  */
 struct tlsdesc_dynamic_arg
@@ -59,12 +54,12 @@ extern ptrdiff_t attribute_hidden
   _dl_tlsdesc_resolve_rela(struct tlsdesc *on_rax),
   _dl_tlsdesc_resolve_hold(struct tlsdesc *on_rax);

-# ifdef SHARED
+#ifdef SHARED
 extern void *_dl_make_tlsdesc_dynamic (struct link_map *map,
        size_t ti_offset)
   internal_function attribute_hidden;

 extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic(struct tlsdesc *);
-# endif
-
 #endif
+
+#endif /* _X86_64_DL_TLSDESC_H */

Is this sysdeps/x86_64/dl-tlsdesc.h change related to this?

__tls_get_addr_compat:
+ .type __tls_get_addr_compat,@function
+ .global __tls_get_addr_compat
+ strong_alias (__tls_get_addr_compat, __tls_get_addr)

We can use ENTRY/END here.  Why do we need __tls_get_addr_compat?
Can we just have __tls_get_addr?

Since we are talking performance here, we should add __tls_get_addr_slow
to only handle slow paths.

Here is the patch which implements those.  It is tested on x86-64
and x32.

-- 
H.J.
From eb4dd17f2f8222278567dc085071bd85c6669cc4 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Mon, 3 Jul 2017 13:16:57 -0700
Subject: [PATCH] x86-64: Align the stack in __tls_get_addr [BZ #21609]

This change forces realignment of the stack pointer in __tls_get_addr, so
that binaries compiled by GCCs older than GCC 4.9:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066

continue to work even if vector instructions are used in glibc which
require the ABI stack realignment.

__tls_get_addr_slow is added to handle the slow paths in the default
implementation of__tls_get_addr in elf/dl-tls.c.  The new __tls_get_addr
calls __tls_get_addr_slow after realigning the stack.  Internal calls
within ld.so go directly to the default implementation of __tls_get_addr
because they do not need stack realignment.

2017-07-04  Florian Weimer  <fweimer@redhat.com>
	    H.J. Lu  <hongjiu.lu@intel.com>

	[BZ #21609]
	* sysdeps/x86_64/Makefile (sysdep-dl-routines): Add tls_get_addr.
	(gen-as-const-headers): Add rtld-offsets.sym.
	* sysdeps/x86_64/dl-tls.c: New file.
	* sysdeps/x86_64/rtld-offsets.sym: Likwise.
	* sysdeps/x86_64/tls_get_addr.S: Likewise.
	* sysdeps/x86_64/dl-tls.h: Add multiple inclusion guards.
	* sysdeps/x86_64/tlsdesc.sym (TI_MODULE_OFFSET): New.
	(TI_OFFSET_OFFSET): Likwise.
---
 sysdeps/x86_64/Makefile         |  4 +--
 sysdeps/x86_64/dl-tls.c         | 53 +++++++++++++++++++++++++++++++++++
 sysdeps/x86_64/dl-tls.h         |  5 ++++
 sysdeps/x86_64/rtld-offsets.sym |  6 ++++
 sysdeps/x86_64/tls_get_addr.S   | 61 +++++++++++++++++++++++++++++++++++++++++
 sysdeps/x86_64/tlsdesc.sym      |  3 ++
 6 files changed, 130 insertions(+), 2 deletions(-)
 create mode 100644 sysdeps/x86_64/dl-tls.c
 create mode 100644 sysdeps/x86_64/rtld-offsets.sym
 create mode 100644 sysdeps/x86_64/tls_get_addr.S

diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 5075c91..132470d 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -27,7 +27,7 @@ ifeq ($(subdir),elf)
 CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\
 		   -mno-mmx)
 
-sysdep-dl-routines += tlsdesc dl-tlsdesc
+sysdep-dl-routines += tlsdesc dl-tlsdesc tls_get_addr
 
 tests += ifuncmain8
 modules-names += ifuncmod8
@@ -120,5 +120,5 @@ endif
 endif
 
 ifeq ($(subdir),csu)
-gen-as-const-headers += tlsdesc.sym
+gen-as-const-headers += tlsdesc.sym rtld-offsets.sym
 endif
diff --git a/sysdeps/x86_64/dl-tls.c b/sysdeps/x86_64/dl-tls.c
new file mode 100644
index 0000000..567224e
--- /dev/null
+++ b/sysdeps/x86_64/dl-tls.c
@@ -0,0 +1,53 @@
+/* Thread-local storage handling in the ELF dynamic linker.  x86-64 version.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef SHARED
+/* Work around GCC PR58066, due to which __tls_get_addr may be called
+   with an unaligned stack.  The compat implementation is in
+   tls_get_addr-compat.S.  */
+
+# include <dl-tls.h>
+
+/* Define __tls_get_addr within elf/dl-tls.c under a different
+   name.  */
+extern __typeof__ (__tls_get_addr) __tls_get_addr_default;
+
+# define __tls_get_addr __tls_get_addr_default
+# include <elf/dl-tls.c>
+# undef __tls_get_addr
+
+hidden_ver (__tls_get_addr_default, __tls_get_addr)
+
+/* Only handle slow paths for __tls_get_addr.  */
+attribute_hidden
+void *
+__tls_get_addr_slow (GET_ADDR_ARGS)
+{
+  dtv_t *dtv = THREAD_DTV ();
+
+  if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation)))
+    return update_get_addr (GET_ADDR_PARAM);
+
+  return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);
+}
+#else
+
+/* No compatibility symbol needed.  */
+# include <elf/dl-tls.c>
+
+#endif
diff --git a/sysdeps/x86_64/dl-tls.h b/sysdeps/x86_64/dl-tls.h
index 4a59d2a..c2fb56c 100644
--- a/sysdeps/x86_64/dl-tls.h
+++ b/sysdeps/x86_64/dl-tls.h
@@ -16,6 +16,9 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#ifndef _X86_64_DL_TLS_H
+#define _X86_64_DL_TLS_H
+
 #include <stdint.h>
 
 /* Type used for the representation of TLS information in the GOT.  */
@@ -27,3 +30,5 @@ typedef struct dl_tls_index
 
 
 extern void *__tls_get_addr (tls_index *ti);
+
+#endif /* _X86_64_DL_TLS_H */
diff --git a/sysdeps/x86_64/rtld-offsets.sym b/sysdeps/x86_64/rtld-offsets.sym
new file mode 100644
index 0000000..fd41b51
--- /dev/null
+++ b/sysdeps/x86_64/rtld-offsets.sym
@@ -0,0 +1,6 @@
+#define SHARED
+#include <ldsodefs.h>
+
+--
+
+GL_TLS_GENERATION_OFFSET        offsetof (struct rtld_global, _dl_tls_generation)
diff --git a/sysdeps/x86_64/tls_get_addr.S b/sysdeps/x86_64/tls_get_addr.S
new file mode 100644
index 0000000..9d38fb3
--- /dev/null
+++ b/sysdeps/x86_64/tls_get_addr.S
@@ -0,0 +1,61 @@
+/* Stack-aligning implementation of __tls_get_addr.  x86-64 version.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef SHARED
+
+# include <sysdep.h>
+# include "tlsdesc.h"
+# include "rtld-offsets.h"
+
+/* See __tls_get_addr and __tls_get_addr_slow in dl-tls.c.  This function
+   call __tls_get_addr_slow on both slow paths.  It realigns the stack
+   before the call to work around GCC PR58066.  */
+
+ENTRY (__tls_get_addr)
+	mov 	%fs:DTV_OFFSET, %RDX_LP
+	mov	GL_TLS_GENERATION_OFFSET+_rtld_local(%rip), %RAX_LP
+	/* GL(dl_tls_generation) == dtv[0].counter */
+	cmp	%RAX_LP, (%rdx)
+	jne	1f
+	mov	TI_MODULE_OFFSET(%rdi), %RAX_LP
+	/* dtv[ti->ti_module] */
+# ifdef __LP64__
+	salq	$4, %rax
+	movq	(%rdx,%rax), %rax
+# else
+	movl	(%rdx,%rax, 8), %eax
+# endif
+	cmp	$-1, %RAX_LP
+	je	1f
+	add	TI_OFFSET_OFFSET(%rdi), %RAX_LP
+	ret
+1:
+	/* On the slow path, align the stack.  */
+	pushq	%rbp
+	cfi_def_cfa_offset (16)
+	cfi_offset (%rbp, -16)
+	mov	%RSP_LP, %RBP_LP
+	cfi_def_cfa_register (%rbp)
+	and	$-16, %RSP_LP
+	call	__tls_get_addr_slow
+	mov	%RBP_LP, %RSP_LP
+	popq	%rbp
+	cfi_def_cfa (%rsp, 8)
+	ret
+END (__tls_get_addr)
+#endif /* SHARED */
diff --git a/sysdeps/x86_64/tlsdesc.sym b/sysdeps/x86_64/tlsdesc.sym
index 3385497..fc897ab 100644
--- a/sysdeps/x86_64/tlsdesc.sym
+++ b/sysdeps/x86_64/tlsdesc.sym
@@ -15,3 +15,6 @@ TLSDESC_ARG			offsetof(struct tlsdesc, arg)
 TLSDESC_GEN_COUNT		offsetof(struct tlsdesc_dynamic_arg, gen_count)
 TLSDESC_MODID			offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
 TLSDESC_MODOFF			offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
+
+TI_MODULE_OFFSET 		offsetof(tls_index, ti_module)
+TI_OFFSET_OFFSET 		offsetof(tls_index, ti_offset)
-- 
2.9.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]