This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PowerPC64 ELFv2 PPC64_OPT_LOCALENTRY


ELFv2 functions with localentry:0 are those with a single entry point,
ie. global entry == local entry, and that have no requirement on r2 or
r12, and guarantee r2 is unchanged on return.  Such an external
function can be called via the PLT without saving r2 or restoring it
on return, avoiding a common load-hit-store for small functions.   The
optimization is attractive.  The TOC pointer load-hit-store is a major
reason why calls to small functions that need no register saves, or
with shrink-wrap, no register saves on a fast path, are slow on
powerpc64le.

To be safe, this optimization needs ld.so support to check that the
run-time matches link-time function implementation.  If a function
in a shared library with st_other localentry non-zero is called
without saving and restoring r2, r2 will be trashed on return, leading
to segfaults.  For that reason the optimization does not happen for
weak functions since a weak definition is a fairly solid hint that the
function will likely be overridden.  I'm also not enabling the
optimization by default unless glibc-2.26 is detected, which should
have the ld.so checks implemented.

bfd/
	* elf64-ppc.c (struct ppc_link_hash_table): Add has_plt_localentry0.
	(ppc64_elf_merge_symbol_attribute): Merge localentry bits from
	dynamic objects.
	(is_elfv2_localentry0): New function.
	(ppc64_elf_tls_setup): Default params->plt_localentry0.
	(plt_stub_size): Adjust size for tls_get_addr_opt stub.
	(build_tls_get_addr_stub): Use a simpler stub when r2 is not saved.
	(ppc64_elf_size_stubs): Leave stub_type as ppc_stub_plt_call for
	optimized localentry:0 stubs.
	(ppc64_elf_build_stubs): Save r2 in ELFv2 __glink_PLTresolve.
	(ppc64_elf_relocate_section): Leave nop unchanged for optimized
	localentry:0 stubs.
	(ppc64_elf_finish_dynamic_sections): Set PPC64_OPT_LOCALENTRY in
	DT_PPC64_OPT.
	* elf64-ppc.h (struct ppc64_elf_params): Add plt_localentry0.
include/
	* elf/ppc64.h (PPC64_OPT_LOCALENTRY): Define.
ld/
	* emultempl/ppc64elf.em (params): Init plt_localentry0 field.
	(enum ppc64_opt): New, replacing OPTION_* defines.  Add
	OPTION_PLT_LOCALENTRY, and OPTION_NO_PLT_LOCALENTRY.
	(PARSE_AND_LIST_*): Support --plt-localentry and --no-plt-localentry.
	* testsuite/ld-powerpc/elfv2so.d: Update.
	* testsuite/ld-powerpc/powerpc.exp (TLS opt 5): Use --no-plt-localentry.
	* testsuite/ld-powerpc/tlsopt5.d: Update.

diff --git a/bfd/elf64-ppc.c b/bfd/elf64-ppc.c
index 948de12..42abb96 100644
--- a/bfd/elf64-ppc.c
+++ b/bfd/elf64-ppc.c
@@ -4118,6 +4118,9 @@ struct ppc_link_hash_table
   unsigned int local_ifunc_resolver:1;
   unsigned int maybe_local_ifunc_resolver:1;
 
+  /* Whether plt calls for ELFv2 localentry:0 funcs have been optimized.  */
+  unsigned int has_plt_localentry0:1;
+
   /* Incremented every time we size stubs.  */
   unsigned int stub_iteration;
 
@@ -5005,7 +5008,7 @@ ppc64_elf_merge_symbol_attribute (struct elf_link_hash_entry *h,
 				  bfd_boolean definition,
 				  bfd_boolean dynamic)
 {
-  if (definition && !dynamic)
+  if (definition && (!dynamic || !h->def_regular))
     h->other = ((isym->st_other & ~ELF_ST_VISIBILITY (-1))
 		| ELF_ST_VISIBILITY (h->other));
 }
@@ -6318,6 +6321,21 @@ ppc64_elf_maybe_function_sym (const asymbol *sym, asection *sec,
   return size;
 }
 
+/* Return true if symbol is a strong function defined in an ELFv2
+   object with st_other localentry bits of zero, ie. its local entry
+   point coincides with its global entry point.  */
+
+static bfd_boolean
+is_elfv2_localentry0 (struct elf_link_hash_entry *h)
+{
+  return (h != NULL
+	  && h->type == STT_FUNC
+	  && h->root.type == bfd_link_hash_defined
+	  && (STO_PPC64_LOCAL_MASK & h->other) == 0
+	  && is_ppc64_elf (h->root.u.def.section->owner)
+	  && abiversion (h->root.u.def.section->owner) >= 2);
+}
+
 /* Return true if symbol is defined in a regular object file.  */
 
 static bfd_boolean
@@ -8328,6 +8346,11 @@ ppc64_elf_tls_setup (struct bfd_link_info *info)
   else if (!htab->do_multi_toc)
     htab->params->no_multi_toc = 1;
 
+  if (htab->params->plt_localentry0 < 0)
+    htab->params->plt_localentry0
+      = elf_link_hash_lookup (&htab->elf, "GLIBC_2.26",
+			      FALSE, FALSE, FALSE) != NULL;
+
   htab->tls_get_addr = ((struct ppc_link_hash_entry *)
 			elf_link_hash_lookup (&htab->elf, ".__tls_get_addr",
 					      FALSE, FALSE, TRUE));
@@ -10548,7 +10571,12 @@ plt_stub_size (struct ppc_link_hash_table *htab,
       && (stub_entry->h == htab->tls_get_addr_fd
 	  || stub_entry->h == htab->tls_get_addr)
       && htab->params->tls_get_addr_opt)
-    size += 13 * 4;
+    {
+      size += 7 * 4;
+      if (ALWAYS_EMIT_R2SAVE
+	  || stub_entry->stub_type == ppc_stub_plt_call_r2save)
+	size += 6 * 4;
+    }
   return size;
 }
 
@@ -10775,11 +10803,17 @@ build_tls_get_addr_stub (struct ppc_link_hash_table *htab,
   bfd_put_32 (obfd, ADD_R3_R12_R13, p),		p += 4;
   bfd_put_32 (obfd, BEQLR, p),			p += 4;
   bfd_put_32 (obfd, MR_R3_R0, p),		p += 4;
+  if (r != NULL)
+    r[0].r_offset += 7 * 4;
+  if (!ALWAYS_EMIT_R2SAVE
+      && stub_entry->stub_type != ppc_stub_plt_call_r2save)
+    return build_plt_stub (htab, stub_entry, p, offset, r);
+
   bfd_put_32 (obfd, MFLR_R11, p),		p += 4;
   bfd_put_32 (obfd, STD_R11_0R1 + STK_LINKER (htab), p), p += 4;
 
   if (r != NULL)
-    r[0].r_offset += 9 * 4;
+    r[0].r_offset += 2 * 4;
   p = build_plt_stub (htab, stub_entry, p, offset, r);
   bfd_put_32 (obfd, BCTRL, p - 4);
 
@@ -12598,17 +12632,23 @@ ppc64_elf_size_stubs (struct bfd_link_info *info)
 			continue;
 		    }
 
-		  if (stub_type == ppc_stub_plt_call
-		      && irela + 1 < irelaend
-		      && irela[1].r_offset == irela->r_offset + 4
-		      && ELF64_R_TYPE (irela[1].r_info) == R_PPC64_TOCSAVE)
+		  if (stub_type == ppc_stub_plt_call)
 		    {
-		      if (!tocsave_find (htab, INSERT,
-					 &local_syms, irela + 1, input_bfd))
-			goto error_ret_free_internal;
+		      if (irela + 1 < irelaend
+			  && irela[1].r_offset == irela->r_offset + 4
+			  && ELF64_R_TYPE (irela[1].r_info) == R_PPC64_TOCSAVE)
+			{
+			  if (!tocsave_find (htab, INSERT,
+					     &local_syms, irela + 1, input_bfd))
+			    goto error_ret_free_internal;
+			}
+		      else if (!htab->opd_abi
+			       && htab->params->plt_localentry0 != 0
+			       && is_elfv2_localentry0 (&hash->elf))
+			htab->has_plt_localentry0 = 1;
+		      else
+			stub_type = ppc_stub_plt_call_r2save;
 		    }
-		  else if (stub_type == ppc_stub_plt_call)
-		    stub_type = ppc_stub_plt_call_r2save;
 
 		  /* Support for grouping stub sections.  */
 		  id_sec = htab->sec_info[section->id].u.group->link_sec;
@@ -13160,6 +13200,8 @@ ppc64_elf_build_stubs (struct bfd_link_info *info,
 	  p += 4;
 	  bfd_put_32 (htab->glink->owner, MFLR_R11, p);
 	  p += 4;
+	  bfd_put_32 (htab->glink->owner, STD_R2_0R1 + 24, p);
+	  p += 4;
 	  bfd_put_32 (htab->glink->owner, LD_R2_0R11 | (-16 & 0xfffc), p);
 	  p += 4;
 	  bfd_put_32 (htab->glink->owner, MTLR_R0, p);
@@ -14170,7 +14212,7 @@ ppc64_elf_relocate_section (bfd *output_bfd,
 	    {
 	      bfd_boolean can_plt_call = FALSE;
 
-	      /* All of these stubs will modify r2, so there must be a
+	      /* All of these stubs may modify r2, so there must be a
 		 branch and link followed by a nop.  The nop is
 		 replaced by an insn to restore r2.  */
 	      if (rel->r_offset + 8 <= input_section->size)
@@ -14195,6 +14237,13 @@ ppc64_elf_relocate_section (bfd *output_bfd,
 			    {
 			      /* Special stub used, leave nop alone.  */
 			    }
+			  else if (stub_entry->stub_type == ppc_stub_plt_call
+				   && !htab->opd_abi
+				   && htab->params->plt_localentry0 != 0
+				   && is_elfv2_localentry0 (&h->elf))
+			    {
+			      /* The function doesn't use or change r2.  */
+			    }
 			  else
 			    bfd_put_32 (input_bfd,
 					LD_R2_0R1 + STK_TOC (htab),
@@ -15636,6 +15685,8 @@ ppc64_elf_finish_dynamic_sections (bfd *output_bfd,
 	    case DT_PPC64_OPT:
 	      if (htab->do_multi_toc && htab->multi_toc_needed)
 		dyn.d_un.d_val |= PPC64_OPT_MULTI_TOC;
+	      if (htab->has_plt_localentry0)
+		dyn.d_un.d_val |= PPC64_OPT_LOCALENTRY;
 	      break;
 
 	    case DT_PPC64_OPDSZ:
diff --git a/bfd/elf64-ppc.h b/bfd/elf64-ppc.h
index 4609679..ca20fe4 100644
--- a/bfd/elf64-ppc.h
+++ b/bfd/elf64-ppc.h
@@ -48,6 +48,9 @@ struct ppc64_elf_params
   /* Set if individual PLT call stubs should be aligned.  */
   int plt_stub_align;
 
+  /* Set if PLT call stubs for localentry:0 functions should omit r2 save.  */
+  int plt_localentry0;
+
   /* Whether to canonicalize .opd so that there are no overlapping
      .opd entries.  */
   int non_overlapping_opd;
diff --git a/include/elf/ppc64.h b/include/elf/ppc64.h
index 43090dd..145e9b6 100644
--- a/include/elf/ppc64.h
+++ b/include/elf/ppc64.h
@@ -251,5 +251,6 @@ ppc64_encode_local_entry(unsigned int val)
 #define DT_PPC64_OPT		(DT_LOPROC + 3)
 #define PPC64_OPT_TLS		1
 #define PPC64_OPT_MULTI_TOC	2
+#define PPC64_OPT_LOCALENTRY	4
 
 #endif /* _ELF_PPC64_H */
diff --git a/ld/emultempl/ppc64elf.em b/ld/emultempl/ppc64elf.em
index f1d5274..58cb798 100644
--- a/ld/emultempl/ppc64elf.em
+++ b/ld/emultempl/ppc64elf.em
@@ -38,7 +38,7 @@ static struct ppc64_elf_params params = { NULL,
 					  &ppc_layout_sections_again,
 					  1, -1, 0,
 					  ${DEFAULT_PLT_STATIC_CHAIN-0}, -1, 0,
-					  0, -1, -1, 0};
+					  -1, 0, -1, -1, 0};
 
 /* Fake input file for stubs.  */
 static lang_input_statement_type *stub_file;
@@ -683,27 +683,32 @@ fi
 # parse_args and list_options functions.
 #
 PARSE_AND_LIST_PROLOGUE=${PARSE_AND_LIST_PROLOGUE}'
-#define OPTION_STUBGROUP_SIZE		321
-#define OPTION_PLT_STATIC_CHAIN		(OPTION_STUBGROUP_SIZE + 1)
-#define OPTION_NO_PLT_STATIC_CHAIN	(OPTION_PLT_STATIC_CHAIN + 1)
-#define OPTION_PLT_THREAD_SAFE		(OPTION_NO_PLT_STATIC_CHAIN + 1)
-#define OPTION_NO_PLT_THREAD_SAFE	(OPTION_PLT_THREAD_SAFE + 1)
-#define OPTION_PLT_ALIGN		(OPTION_NO_PLT_THREAD_SAFE + 1)
-#define OPTION_NO_PLT_ALIGN		(OPTION_PLT_ALIGN + 1)
-#define OPTION_STUBSYMS			(OPTION_NO_PLT_ALIGN + 1)
-#define OPTION_NO_STUBSYMS		(OPTION_STUBSYMS + 1)
-#define OPTION_SAVRES			(OPTION_NO_STUBSYMS + 1)
-#define OPTION_NO_SAVRES		(OPTION_SAVRES + 1)
-#define OPTION_DOTSYMS			(OPTION_NO_SAVRES + 1)
-#define OPTION_NO_DOTSYMS		(OPTION_DOTSYMS + 1)
-#define OPTION_NO_TLS_OPT		(OPTION_NO_DOTSYMS + 1)
-#define OPTION_TLS_GET_ADDR_OPT		(OPTION_NO_TLS_OPT + 1)
-#define OPTION_NO_TLS_GET_ADDR_OPT	(OPTION_TLS_GET_ADDR_OPT + 1)
-#define OPTION_NO_OPD_OPT		(OPTION_NO_TLS_GET_ADDR_OPT + 1)
-#define OPTION_NO_TOC_OPT		(OPTION_NO_OPD_OPT + 1)
-#define OPTION_NO_MULTI_TOC		(OPTION_NO_TOC_OPT + 1)
-#define OPTION_NO_TOC_SORT		(OPTION_NO_MULTI_TOC + 1)
-#define OPTION_NON_OVERLAPPING_OPD	(OPTION_NO_TOC_SORT + 1)
+enum ppc64_opt
+{
+  OPTION_STUBGROUP_SIZE = 321,
+  OPTION_PLT_STATIC_CHAIN,
+  OPTION_NO_PLT_STATIC_CHAIN,
+  OPTION_PLT_THREAD_SAFE,
+  OPTION_NO_PLT_THREAD_SAFE,
+  OPTION_PLT_ALIGN,
+  OPTION_NO_PLT_ALIGN,
+  OPTION_PLT_LOCALENTRY,
+  OPTION_NO_PLT_LOCALENTRY,
+  OPTION_STUBSYMS,
+  OPTION_NO_STUBSYMS,
+  OPTION_SAVRES,
+  OPTION_NO_SAVRES,
+  OPTION_DOTSYMS,
+  OPTION_NO_DOTSYMS,
+  OPTION_NO_TLS_OPT,
+  OPTION_TLS_GET_ADDR_OPT,
+  OPTION_NO_TLS_GET_ADDR_OPT,
+  OPTION_NO_OPD_OPT,
+  OPTION_NO_TOC_OPT,
+  OPTION_NO_MULTI_TOC,
+  OPTION_NO_TOC_SORT,
+  OPTION_NON_OVERLAPPING_OPD
+};
 '
 
 PARSE_AND_LIST_LONGOPTS=${PARSE_AND_LIST_LONGOPTS}'
@@ -714,6 +719,8 @@ PARSE_AND_LIST_LONGOPTS=${PARSE_AND_LIST_LONGOPTS}'
   { "no-plt-thread-safe", no_argument, NULL, OPTION_NO_PLT_THREAD_SAFE },
   { "plt-align", optional_argument, NULL, OPTION_PLT_ALIGN },
   { "no-plt-align", no_argument, NULL, OPTION_NO_PLT_ALIGN },
+  { "plt-localentry", optional_argument, NULL, OPTION_PLT_LOCALENTRY },
+  { "no-plt-localentry", no_argument, NULL, OPTION_NO_PLT_LOCALENTRY },
   { "emit-stub-syms", no_argument, NULL, OPTION_STUBSYMS },
   { "no-emit-stub-syms", no_argument, NULL, OPTION_NO_STUBSYMS },
   { "dotsyms", no_argument, NULL, OPTION_DOTSYMS },
@@ -760,6 +767,12 @@ PARSE_AND_LIST_OPTIONS=${PARSE_AND_LIST_OPTIONS}'
   --no-plt-align              Dont'\''t align individual PLT call stubs.\n"
 		   ));
   fprintf (file, _("\
+  --plt-localentry            Optimize calls to ELFv2 localentry:0 functions.\n"
+		   ));
+  fprintf (file, _("\
+  --no-plt-localentry         Don'\''t optimize ELFv2 calls.\n"
+		   ));
+  fprintf (file, _("\
   --emit-stub-syms            Label linker stubs with a symbol.\n"
 		   ));
   fprintf (file, _("\
@@ -852,6 +865,14 @@ PARSE_AND_LIST_ARGS_CASES=${PARSE_AND_LIST_ARGS_CASES}'
       params.plt_stub_align = 0;
       break;
 
+    case OPTION_PLT_LOCALENTRY:
+      params.plt_localentry0 = 1;
+      break;
+
+    case OPTION_NO_PLT_LOCALENTRY:
+      params.plt_localentry0 = 0;
+      break;
+
     case OPTION_STUBSYMS:
       params.emit_stub_syms = 1;
       break;
diff --git a/ld/testsuite/ld-powerpc/elfv2so.d b/ld/testsuite/ld-powerpc/elfv2so.d
index d6f0b7d..f3962ac 100644
--- a/ld/testsuite/ld-powerpc/elfv2so.d
+++ b/ld/testsuite/ld-powerpc/elfv2so.d
@@ -57,6 +57,7 @@ Disassembly of section \.text:
 .*:	(7c 08 02 a6|a6 02 08 7c) 	mflr    r0
 .*:	(42 9f 00 05|05 00 9f 42) 	bcl     .*
 .*:	(7d 68 02 a6|a6 02 68 7d) 	mflr    r11
+.*:	(18 00 41 f8|f8 41 00 18) 	std     r2,24\(r1\)
 .*:	(e8 4b ff f0|f0 ff 4b e8) 	ld      r2,-16\(r11\)
 .*:	(7c 08 03 a6|a6 03 08 7c) 	mtlr    r0
 .*:	(7d 8b 60 50|50 60 8b 7d) 	subf    r12,r11,r12
@@ -67,7 +68,6 @@ Disassembly of section \.text:
 .*:	(7d 89 03 a6|a6 03 89 7d) 	mtctr   r12
 .*:	(e9 6b 00 08|08 00 6b e9) 	ld      r11,8\(r11\)
 .*:	(4e 80 04 20|20 04 80 4e) 	bctr
-.*:	(60 00 00 00|00 00 00 60) 	nop
 
 .* <f3@plt>:
 .*:	(4b ff ff c8|c8 ff ff 4b) 	b       .* <__glink_PLTresolve>
diff --git a/ld/testsuite/ld-powerpc/powerpc.exp b/ld/testsuite/ld-powerpc/powerpc.exp
index 5350235..f12e413 100644
--- a/ld/testsuite/ld-powerpc/powerpc.exp
+++ b/ld/testsuite/ld-powerpc/powerpc.exp
@@ -210,7 +210,7 @@ set ppc64elftests {
       "tlsopt4"}
     {"TLS DLL" "-shared -melf64ppc --version-script tlsdll.ver" "" "-a64" {tlsdll.s}
      {} "tlsdll.so"}
-    {"TLS opt 5" "-melf64ppc --gc-sections tmpdir/tlsdll.so" "" "-a64"  {tlsopt5.s}
+    {"TLS opt 5" "-melf64ppc --gc-sections --no-plt-localentry tmpdir/tlsdll.so" "" "-a64"  {tlsopt5.s}
      {{objdump -dr tlsopt5.d}}
       "tlsopt5"}
     {"sym@tocbase" "-shared -melf64ppc" "" "-a64" {symtocbase-1.s symtocbase-2.s}
diff --git a/ld/testsuite/ld-powerpc/tlsopt5.d b/ld/testsuite/ld-powerpc/tlsopt5.d
index 7b17130..b356a2e 100644
--- a/ld/testsuite/ld-powerpc/tlsopt5.d
+++ b/ld/testsuite/ld-powerpc/tlsopt5.d
@@ -1,6 +1,6 @@
 #source: tlsopt5.s
 #as: -a64
-#ld: --gc-sections tlsdll.so
+#ld: --gc-sections --no-plt-localentry tlsdll.so
 #objdump: -dr
 #target: powerpc64*-*-*
 
@@ -38,6 +38,7 @@ Disassembly of section \.text:
 .*:	(a6 02 08 7c|7c 08 02 a6) 	mflr    r0
 .*:	(05 00 9f 42|42 9f 00 05) 	bcl     .*
 .*:	(a6 02 68 7d|7d 68 02 a6) 	mflr    r11
+.*:	(18 00 41 f8|f8 41 00 18) 	std     r2,24\(r1\)
 .*:	(f0 ff 4b e8|e8 4b ff f0) 	ld      r2,-16\(r11\)
 .*:	(a6 03 08 7c|7c 08 03 a6) 	mtlr    r0
 .*:	(50 60 8b 7d|7d 8b 60 50) 	subf    r12,r11,r12
@@ -48,7 +49,6 @@ Disassembly of section \.text:
 .*:	(a6 03 89 7d|7d 89 03 a6) 	mtctr   r12
 .*:	(08 00 6b e9|e9 6b 00 08) 	ld      r11,8\(r11\)
 .*:	(20 04 80 4e|4e 80 04 20) 	bctr
-.*:	(00 00 00 60|60 00 00 00) 	nop
 
 0000000010000390 <__tls_get_addr_opt@plt>:
 .*:	(c8 ff ff 4b|4b ff ff c8) 	b       .*

-- 
Alan Modra
Australia Development Lab, IBM


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]