This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

MIPS JAL/JALR to BAL transformation for Linux (o32 ABI)


Hi All,

  We tried to implement an optimization to transform JALR to BAL
for function calls inside a shared library to speed up the performance.
It turned out that BFD_RELOC_MIPS_JALR was designed as a hint to help the JALR
transformation.  But, this relocation is enabled by N32 and N64 ABIs.
So, we made a patch to enable BFD_RELOC_MIPS_JALR for mips32, mips32r2, 
mips64, and mips64r2 for all ABIs.
In order to utilize this optimization, we need to use -mno-explicit-relocs
for GCC to let the assembler emit BFD_RELOC_MIPS_JALR for shared libraries (-mshared).

  The JAL to BAL transformation is just enabled by the same mechanism in this patch.
Please see the example and check if this patch may break something.  Thanks a lot!

Ex 1: (Calls inside a shared library)
# cat call.c
int t2() { return 1984 + t3(); }
int t3() { return 0; }

# cc1 -quiet call.c -O2 -mabicalls -mshared -G0 -mno-explicit-relocs -o call.s -fno-inline-small-functions
# as-new call.s -o call.o -mips32r2
# objdump -dr call.o
call.o:     file format elf32-tradbigmips

Disassembly of section .text:

00000000 <t3>:
   0:   3c1c0000        lui     gp,0x0
                        0: R_MIPS_HI16  _gp_disp
   4:   279c0000        addiu   gp,gp,0
                        4: R_MIPS_LO16  _gp_disp
   8:   0399e021        addu    gp,gp,t9
   c:   03e00008        jr      ra
  10:   00001021        move    v0,zero

00000014 <t2>:
  14:   3c1c0000        lui     gp,0x0
                        14: R_MIPS_HI16 _gp_disp
  18:   279c0000        addiu   gp,gp,0
                        18: R_MIPS_LO16 _gp_disp
  1c:   0399e021        addu    gp,gp,t9
  20:   27bdffe0        addiu   sp,sp,-32
  24:   afbf001c        sw      ra,28(sp)
  28:   afbc0010        sw      gp,16(sp)
  2c:   8f990000        lw      t9,0(gp)
                        2c: R_MIPS_CALL16       t3
  30:   0320f809        jalr    t9	<------------------
                        30: R_MIPS_JALR t3
  34:   00000000        nop
  38:   8fbc0010        lw      gp,16(sp)
  3c:   8fbf001c        lw      ra,28(sp)
  40:   244207c0        addiu   v0,v0,1984
  44:   03e00008        jr      ra
  48:   27bd0020        addiu   sp,sp,32

# ld-new -shared call.o -o libcall.so
# objdump -dr libcall.so
libcall.so:     file format elf32-tradbigmips
Disassembly of section .text:
000002d4 <t3>:
 2d4:   3c1c0002        lui     gp,0x2
 2d8:   279c803c        addiu   gp,gp,-32708
 2dc:   0399e021        addu    gp,gp,t9
 2e0:   03e00008        jr      ra
 2e4:   00001021        move    v0,zero

000002e8 <t2>:
 2e8:   3c1c0002        lui     gp,0x2
 2ec:   279c8028        addiu   gp,gp,-32728
 2f0:   0399e021        addu    gp,gp,t9
 2f4:   27bdffe0        addiu   sp,sp,-32
 2f8:   afbf001c        sw      ra,28(sp)
 2fc:   afbc0010        sw      gp,16(sp)
 300:   8f998018        lw      t9,-32744(gp)
 304:   0411fff3        bal     2d4 <t3>	<--------------------
 308:   00000000        nop
 30c:   8fbc0010        lw      gp,16(sp)
 310:   8fbf001c        lw      ra,28(sp)
 314:   244207c0        addiu   v0,v0,1984
 318:   03e00008        jr      ra
 31c:   27bd0020        addiu   sp,sp,32

Ex 2: (Calls not in a shared library)
# cat call.c
int t2() { return 1984 + t3(); }
int t3() { return 0; }

# cc1 -quiet call.c -O2 -mabicalls -mno-shared -G0 -o call.s -fno-inline-small-functions
# as-new call.s -o call.o -mips32r2
# objdump -dr call.o
call.o:     file format elf32-tradbigmips


Disassembly of section .text:

00000000 <t3>:
   0:   03e00008        jr      ra
   4:   00001021        move    v0,zero

00000008 <t2>:
   8:   27bdffe0        addiu   sp,sp,-32
   c:   afbf001c        sw      ra,28(sp)
  10:   0c000000        jal     0 <t3>	<-----------------
                        10: R_MIPS_26   t3
  14:   00000000        nop
  18:   8fbf001c        lw      ra,28(sp)
  1c:   244207c0        addiu   v0,v0,1984
  20:   03e00008        jr      ra
  24:   27bd0020        addiu   sp,sp,32

# ld-new call.o -o call
# objdump -dr call
call:     file format elf32-tradbigmips


Disassembly of section .text:

0040006c <t3>:
  40006c:       03e00008        jr      ra
  400070:       00001021        move    v0,zero

00400074 <t2>:
  400074:       27bdffe0        addiu   sp,sp,-32
  400078:       afbf001c        sw      ra,28(sp)
  40007c:       0411fffb        bal     40006c <t3> 	<-----------------
  400080:       00000000        nop
  400084:       8fbf001c        lw      ra,28(sp)
  400088:       244207c0        addiu   v0,v0,1984
  40008c:       03e00008        jr      ra
  400090:       27bd0020        addiu   sp,sp,32

Regards,
Chao-ying

gas/ChangeLog
2009-07-30  Chao-ying Fu  <fu@mips.com>

	* config/tc-mips.c (MIPS_JALR_HINT_P): New define. True for mips32, 
	mip32r2, mips64, and mips64r2.
	(macro_build_jalr): If MIPS_JALR_HINT_P, emit BFD_RELOC_MIPS_JALR.

bfd/ChangeLog
2009-07-30  Chao-ying Fu  <fu@mips.com>

	* elf32-mips.c (mips_reloc_map): Add BFD_RELOC_MIPS_JALR.
	* elfxx-mips.c (JAL_JALR_TO_BAL_P): New define to transform JAL/JALR
	to BAL for CPUs that include RM9000, mips32, mips32r2, mips64, and mips64r2.
	(mips_elf_perform_relocation): Use JAL_JALR_TO_BAL_P to guard the transformation.


Index: src/gas/config/tc-mips.c
===================================================================
--- src.orig/gas/config/tc-mips.c	2009-07-30 16:31:53.379834000 -0700
+++ src/gas/config/tc-mips.c	2009-07-30 16:54:32.814022000 -0700
@@ -290,6 +290,12 @@ static int file_ase_mips16;
 			      || mips_opts.isa == ISA_MIPS64		\
 			      || mips_opts.isa == ISA_MIPS64R2)
 
+/* True if we want to create BFD_RELOC_MIPS_JALR for jalr $25.  */
+#define MIPS_JALR_HINT_P (mips_opts.isa == ISA_MIPS32		\
+			  || mips_opts.isa == ISA_MIPS32R2	\
+			  || mips_opts.isa == ISA_MIPS64	\
+			  || mips_opts.isa == ISA_MIPS64R2)
+
 /* True if -mips3d was passed or implied by arguments passed on the
    command line (e.g., by -march).  */
 static int file_ase_mips3d;
@@ -3923,12 +3929,11 @@ macro_build_jalr (expressionS *ep)
   char *f = NULL;
 
   if (HAVE_NEWABI)
-    {
-      frag_grow (8);
-      f = frag_more (0);
-    }
+    frag_grow (8);
+  if (HAVE_NEWABI || MIPS_JALR_HINT_P)
+    f = frag_more (0);
   macro_build (NULL, "jalr", "d,s", RA, PIC_CALL_REG);
-  if (HAVE_NEWABI)
+  if (HAVE_NEWABI || MIPS_JALR_HINT_P)
     fix_new_exp (frag_now, f - frag_now->fr_literal,
 		 4, ep, FALSE, BFD_RELOC_MIPS_JALR);
 }
Index: src/bfd/elf32-mips.c
===================================================================
--- src.orig/bfd/elf32-mips.c	2009-07-30 16:31:53.587623000 -0700
+++ src/bfd/elf32-mips.c	2009-07-30 16:40:12.216495000 -0700
@@ -1261,6 +1261,7 @@ static const struct elf_reloc_map mips_r
   { BFD_RELOC_MIPS_GOT_PAGE, R_MIPS_GOT_PAGE },
   { BFD_RELOC_MIPS_GOT_OFST, R_MIPS_GOT_OFST },
   { BFD_RELOC_MIPS_GOT_DISP, R_MIPS_GOT_DISP },
+  { BFD_RELOC_MIPS_JALR, R_MIPS_JALR },
   { BFD_RELOC_MIPS_TLS_DTPMOD32, R_MIPS_TLS_DTPMOD32 },
   { BFD_RELOC_MIPS_TLS_DTPREL32, R_MIPS_TLS_DTPREL32 },
   { BFD_RELOC_MIPS_TLS_DTPMOD64, R_MIPS_TLS_DTPMOD64 },
Index: src/bfd/elfxx-mips.c
===================================================================
--- src.orig/bfd/elfxx-mips.c	2009-07-30 16:31:53.713500000 -0700
+++ src/bfd/elfxx-mips.c	2009-07-30 16:40:12.261451000 -0700
@@ -668,6 +668,16 @@ static bfd *reldyn_sorting_bfd;
   (   ((elf_elfheader (abfd)->e_flags & EF_MIPS_ARCH) != E_MIPS_ARCH_1) \
    || ((elf_elfheader (abfd)->e_flags & EF_MIPS_MACH) == E_MIPS_MACH_3900))
 
+/* True if ABFD is for CPUs that are faster if jal/jalr is converted to bal.
+   This should be safe for all architectures, but for now we enable it
+   for RM9000, mips32, mips32r2, mips64, and mips64r2.  */
+#define JAL_JALR_TO_BAL_P(abfd) \
+  (   ((elf_elfheader (abfd)->e_flags & EF_MIPS_MACH) == E_MIPS_MACH_9000) \
+   || ((elf_elfheader (abfd)->e_flags & EF_MIPS_ARCH) == E_MIPS_ARCH_32) \
+   || ((elf_elfheader (abfd)->e_flags & EF_MIPS_ARCH) == E_MIPS_ARCH_32R2) \
+   || ((elf_elfheader (abfd)->e_flags & EF_MIPS_ARCH) == E_MIPS_ARCH_64) \
+   || ((elf_elfheader (abfd)->e_flags & EF_MIPS_ARCH) == E_MIPS_ARCH_64R2))
+
 /* True if ABFD is a PIC object.  */
 #define PIC_OBJECT_P(abfd) \
   ((elf_elfheader (abfd)->e_flags & EF_MIPS_PIC) != 0)
@@ -5590,7 +5600,7 @@ mips_elf_perform_relocation (struct bfd_
      prediction hardware.  If we are linking for the RM9000, and we
      see jal, and bal fits, use it instead.  Note that this
      transformation should be safe for all architectures.  */
-  if (bfd_get_mach (input_bfd) == bfd_mach_mips9000
+  if (JAL_JALR_TO_BAL_P (input_bfd)
       && !info->relocatable
       && !require_jalx
       && ((r_type == R_MIPS_26 && (x >> 26) == 0x3)	    /* jal addr */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]