This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

ppc476 plt call stubs


Some further ppc476 workaround changes.  With the original ppc476
patch, I aligned plt call stubs so that __glink_PLTresolve never
crossed a page boundary, but when we found that bctr at the end of a
page could trigger the icache problem, forgot that the plt call stubs
might result in a bctr at the end of a page.  Not that we've ever seen
the call stubs to cause trouble (and it might be impossible to achieve
the timing necessary to create the problem), but we want to be certain.

1) Replace most nops with "ba 0".  The write_glink_stub change catches
   the three insn plt call stubs.  The first hunk of this patch
   applying to ppc_elf_finish_dynamic_sections replaces nops towards
   the end of the glink branch table with branches.  The last two
   hunks of the patch are for nops emitted at the end of
   __glink_PLTresolve.
2) Tweaks a plt call stub sitting at the end of a page immediately
   preceding the glink branch table so that no prefetch of the next
   page will occur.
3) Fixes the default page size used by powerpc-lynxos,
   powerpc-netware, and powerpc-windiss.

Here's some example output for a shared library.

00000fe0 <00008000.got2.plt_pic32.__cxa_finalize@@GLIBC_2.1.3>:
     fe0:       3d 7e 00 01     addis   r11,r30,1
     fe4:       81 6b 81 30     lwz     r11,-32464(r11)
     fe8:       7d 69 03 a6     mtctr   r11
     fec:       4e 80 04 20     bctr

00000ff0 <00000000.plt_pic32.__gmon_start__>:
     ff0:       3d 7e 00 01     addis   r11,r30,1
     ff4:       81 6b 00 18     lwz     r11,24(r11)
     ff8:       7d 69 03 a6     mtctr   r11
     ffc:       4b ff ff f0     b       fec		# <<<<<< tweaked

00001000 <00000000.plt_pic32.close@@GLIBC_2.0>:
    1000:       3d 7e 00 01     addis   r11,r30,1
    1004:       81 6b 00 1c     lwz     r11,28(r11)
    1008:       7d 69 03 a6     mtctr   r11
    100c:       4e 80 04 20     bctr

00001010 <__glink>:
    1010:       48 00 00 30     b       1040 <__glink_PLTresolve>
    1014:       48 00 00 2c     b       1040 <__glink_PLTresolve>
[snip]
    103c:	48 00 00 04 	b       1040 <__glink_PLTresolve>

00001040 <__glink_PLTresolve>:
    1040:	3d 6b 00 00 	addis   r11,r11,0
    1044:	7c 08 02 a6 	mflr    r0
    1048:	42 9f 00 05 	bcl     20,4*cr7+so,104c <__glink_PLTresolve+0xc>
    104c:	39 6b 00 3c 	addi    r11,r11,60
    1050:	7d 88 02 a6 	mflr    r12
    1054:	7c 08 03 a6 	mtlr    r0
    1058:	7d 6c 58 50 	subf    r11,r12,r11
    105c:	3d 8c 00 01 	addis   r12,r12,1
    1060:	80 0c 01 a4 	lwz     r0,420(r12)
    1064:	81 8c 01 a8 	lwz     r12,424(r12)
    1068:	7c 09 03 a6 	mtctr   r0
    106c:	7c 0b 5a 14 	add     r0,r11,r11
    1070:	7d 60 5a 14 	add     r11,r0,r11
    1074:	4e 80 04 20 	bctr
    1078:	48 00 00 02 	ba      0 <_init-0x460>
    107c:	48 00 00 02 	ba      0 <_init-0x460>

Also attached are cumulative backports to various binutils versions.

	* elf32-ppc.c (BA): Define
	(ppc_elf_link_hash_table_create): Correct default_params.
	(write_glink_stub): Pad small plt call stub with "ba 0" rather
	than "nop" for ppc476_workaround.
	(ppc_elf_finish_dynamic_sections): Likewise for branch table
	and __glink_PLTresolve.  Ensure plt call stub at end of page
	doesn't allow fall-thru prefetch.

diff --git a/bfd/elf32-ppc.c b/bfd/elf32-ppc.c
index 61e94a0..8492732 100644
--- a/bfd/elf32-ppc.c
+++ b/bfd/elf32-ppc.c
@@ -147,6 +147,7 @@ static const bfd_vma ppc_elf_vxworks_pic_plt0_entry
 #define ADD_3_12_2	0x7c6c1214
 #define ADD_11_0_11	0x7d605a14
 #define B		0x48000000
+#define BA		0x48000002
 #define BCL_20_31	0x429f0005
 #define BCTR		0x4e800420
 #define BEQLR		0x4d820020
@@ -3249,7 +3250,7 @@ static struct bfd_link_hash_table *
 ppc_elf_link_hash_table_create (bfd *abfd)
 {
   struct ppc_elf_link_hash_table *ret;
-  static struct ppc_elf_params default_params = { PLT_OLD, 0, 1, 0, 0, 4096 };
+  static struct ppc_elf_params default_params = { PLT_OLD, 0, 1, 0, 0, 12 };
 
   ret = bfd_zmalloc (sizeof (struct ppc_elf_link_hash_table));
   if (ret == NULL)
@@ -7336,7 +7337,7 @@ write_glink_stub (struct plt_entry *ent, asection *plt_sec, unsigned char *p,
 	  p += 4;
 	  bfd_put_32 (output_bfd, BCTR, p);
 	  p += 4;
-	  bfd_put_32 (output_bfd, NOP, p);
+	  bfd_put_32 (output_bfd, htab->params->ppc476_workaround ? BA : NOP, p);
 	  p += 4;
 	}
       else
@@ -10115,7 +10116,7 @@ ppc_elf_finish_dynamic_sections (bfd *output_bfd,
       p += htab->glink_pltresolve;
       endp = htab->glink->contents;
       endp += htab->glink->size - GLINK_PLTRESOLVE;
-      while (p < endp - 8 * 4)
+      while (p < endp - (htab->params->ppc476_workaround ? 0 : 8 * 4))
 	{
 	  bfd_put_32 (output_bfd, B + endp - p, p);
 	  p += 4;
@@ -10130,6 +10131,39 @@ ppc_elf_finish_dynamic_sections (bfd *output_bfd,
 	      + htab->glink->output_section->vma
 	      + htab->glink->output_offset);
 
+      if (htab->params->ppc476_workaround)
+	{
+	  /* Ensure that a call stub at the end of a page doesn't
+	     result in prefetch over the end of the page into the
+	     glink branch table.  */
+	  bfd_vma pagesize = (bfd_vma) 1 << htab->params->pagesize_p2;
+	  bfd_vma page_addr;
+	  bfd_vma glink_start = (htab->glink->output_section->vma
+				 + htab->glink->output_offset);
+
+	  for (page_addr = res0 & -pagesize;
+	       page_addr > glink_start;
+	       page_addr -= pagesize)
+	    {
+	      /* We have a plt call stub that may need fixing.  */
+	      bfd_byte *loc;
+	      unsigned int insn;
+
+	      loc = htab->glink->contents + page_addr - 4 - glink_start;
+	      insn = bfd_get_32 (output_bfd, loc);
+	      if (insn == BCTR)
+		{
+		  /* By alignment, we know that there must be at least
+		     one other call stub before this one.  */
+		  insn = bfd_get_32 (output_bfd, loc - 16);
+		  if (insn == BCTR)
+		    bfd_put_32 (output_bfd, B | (-16 & 0x3fffffc), loc);
+		  else
+		    bfd_put_32 (output_bfd, B | (-20 & 0x3fffffc), loc);
+		}
+	    }
+	}
+
       /* Last comes the PLTresolve stub.  */
       if (info->shared)
 	{
@@ -10137,7 +10171,11 @@ ppc_elf_finish_dynamic_sections (bfd *output_bfd,
 
 	  for (i = 0; i < ARRAY_SIZE (pic_plt_resolve); i++)
 	    {
-	      bfd_put_32 (output_bfd, pic_plt_resolve[i], p);
+	      unsigned int insn = pic_plt_resolve[i];
+
+	      if (htab->params->ppc476_workaround && insn == NOP)
+		insn = BA + 0;
+	      bfd_put_32 (output_bfd, insn, p);
 	      p += 4;
 	    }
 	  p -= 4 * ARRAY_SIZE (pic_plt_resolve);
@@ -10171,7 +10209,11 @@ ppc_elf_finish_dynamic_sections (bfd *output_bfd,
 	{
 	  for (i = 0; i < ARRAY_SIZE (plt_resolve); i++)
 	    {
-	      bfd_put_32 (output_bfd, plt_resolve[i], p);
+	      unsigned int insn = plt_resolve[i];
+
+	      if (htab->params->ppc476_workaround && insn == NOP)
+		insn = BA + 0;
+	      bfd_put_32 (output_bfd, insn, p);
 	      p += 4;
 	    }
 	  p -= 4 * ARRAY_SIZE (plt_resolve);

-- 
Alan Modra
Australia Development Lab, IBM

Attachment: backport476w.tar.gz
Description: Binary data


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]