This is the mail archive of the
binutils@sourceware.cygnus.com
mailing list for the binutils project.
[PATCH] bfd sparc64 fixes and tail call optimizations (take 2)
- To: Richard Henderson <rth at cygnus dot com>
- Subject: [PATCH] bfd sparc64 fixes and tail call optimizations (take 2)
- From: Jakub Jelinek <jakub at redhat dot com>
- Date: Tue, 23 May 2000 18:06:33 +0200
- Cc: binutils at sourceware dot cygnus dot com, "David S. Miller" <davem at redhat dot com>
- References: <20000518162611.L474@sunsite.ms.mff.cuni.cz> <20000518090417.A9949@cygnus.com>
- Reply-To: Jakub Jelinek <jakub at redhat dot com>
On Thu, May 18, 2000 at 09:04:17AM -0700, Richard Henderson wrote:
> On Thu, May 18, 2000 at 04:26:11PM +0200, Jakub Jelinek wrote:
> > I have no idea how can I safely optimize the last one (perhaps I'll just
> > teach gcc to always emit 2nd one), but for the first two, the call can be
> > replaced by branch always instruction.
>
> You should probably be doing this with the linker relaxation
> hooks rather than directly in the relocation routines. That
> way you have a way to turn the optimization off if you want.
>
Ok, what about this?
I've added -relax and -no-relax options to gas on SPARC, plus I have
implemented a minimal *_relax_section in elf*_sparc.c, so that it only notes
the section should be relaxed and the actual relaxation is done later in
relocate_section.
I thought about doing it in relax_section for a while, but I think it would
be slower, because I'd have to look up a bunch of things, while at the
relocate_section time all the information is already handy (because the
relocation is being done).
Other relaxations can be done in relax_section if needed, but I don't think this
one must be done there. And ld -relax works as expected.
Jakub
2000-03-09 Jakub Jelinek <jakub@redhat.com>
* elf64-sparc.c: Add ATTRIBUTE_UNUSED to unused function parameters.
Remove unusued variables.
(sparc64_elf_relocate_section): Change r_symndx type to unsigned long.
(sparc64_elf_merge_private_bfd_data): Fix shared library case from
previous fix, so that shared libs really don't influence targets
extension mask and memory model.
--- bfd/elf64-sparc.c.jj Thu Mar 9 10:02:10 2000
+++ bfd/elf64-sparc.c Thu Mar 9 12:01:37 2000
@@ -212,7 +212,7 @@ static CONST struct elf_reloc_map sparc_
static reloc_howto_type *
sparc64_elf_reloc_type_lookup (abfd, code)
- bfd *abfd;
+ bfd *abfd ATTRIBUTE_UNUSED;
bfd_reloc_code_real_type code;
{
unsigned int i;
@@ -226,7 +226,7 @@ sparc64_elf_reloc_type_lookup (abfd, cod
static void
sparc64_elf_info_to_howto (abfd, cache_ptr, dst)
- bfd *abfd;
+ bfd *abfd ATTRIBUTE_UNUSED;
arelent *cache_ptr;
Elf64_Internal_Rela *dst;
{
@@ -240,7 +240,7 @@ sparc64_elf_info_to_howto (abfd, cache_p
static long
sparc64_elf_get_reloc_upper_bound (abfd, sec)
- bfd *abfd;
+ bfd *abfd ATTRIBUTE_UNUSED;
asection *sec;
{
return (sec->reloc_count * 2 + 1) * sizeof (arelent *);
@@ -266,7 +266,6 @@ sparc64_elf_slurp_one_reloc_table (abfd,
asymbol **symbols;
boolean dynamic;
{
- struct elf_backend_data * const ebd = get_elf_backend_data (abfd);
PTR allocated = NULL;
bfd_byte *native_relocs;
arelent *relent;
@@ -501,7 +500,6 @@ sparc64_elf_write_relocs (abfd, sec, dat
for (idx = 0; idx < sec->reloc_count; idx++)
{
bfd_vma addr;
- unsigned int i;
++count;
@@ -723,13 +721,13 @@ sparc_elf_notsup_reloc (abfd,
input_section,
output_bfd,
error_message)
- bfd *abfd;
- arelent *reloc_entry;
- asymbol *symbol;
- PTR data;
- asection *input_section;
- bfd *output_bfd;
- char **error_message;
+ bfd *abfd ATTRIBUTE_UNUSED;
+ arelent *reloc_entry ATTRIBUTE_UNUSED;
+ asymbol *symbol ATTRIBUTE_UNUSED;
+ PTR data ATTRIBUTE_UNUSED;
+ asection *input_section ATTRIBUTE_UNUSED;
+ bfd *output_bfd ATTRIBUTE_UNUSED;
+ char **error_message ATTRIBUTE_UNUSED;
{
return bfd_reloc_notsupported;
}
@@ -745,7 +743,7 @@ sparc_elf_wdisp16_reloc (abfd, reloc_ent
PTR data;
asection *input_section;
bfd *output_bfd;
- char **error_message;
+ char **error_message ATTRIBUTE_UNUSED;
{
bfd_vma relocation;
bfd_vma insn;
@@ -783,7 +781,7 @@ sparc_elf_hix22_reloc (abfd,
PTR data;
asection *input_section;
bfd *output_bfd;
- char **error_message;
+ char **error_message ATTRIBUTE_UNUSED;
{
bfd_vma relocation;
bfd_vma insn;
@@ -820,7 +818,7 @@ sparc_elf_lox10_reloc (abfd,
PTR data;
asection *input_section;
bfd *output_bfd;
- char **error_message;
+ char **error_message ATTRIBUTE_UNUSED;
{
bfd_vma relocation;
bfd_vma insn;
@@ -1265,9 +1263,9 @@ sparc64_elf_add_symbol_hook (abfd, info,
struct bfd_link_info *info;
const Elf_Internal_Sym *sym;
const char **namep;
- flagword *flagsp;
- asection **secp;
- bfd_vma *valp;
+ flagword *flagsp ATTRIBUTE_UNUSED;
+ asection **secp ATTRIBUTE_UNUSED;
+ bfd_vma *valp ATTRIBUTE_UNUSED;
{
static char *stt_types[] = { "NOTYPE", "OBJECT", "FUNCTION" };
@@ -1387,7 +1385,7 @@ sparc64_elf_add_symbol_hook (abfd, info,
static boolean
sparc64_elf_output_arch_syms (output_bfd, info, finfo, func)
- bfd *output_bfd;
+ bfd *output_bfd ATTRIBUTE_UNUSED;
struct bfd_link_info *info;
PTR finfo;
boolean (*func) PARAMS ((PTR, const char *,
@@ -1460,7 +1458,7 @@ sparc64_elf_get_symbol_type (elf_sym, ty
static void
sparc64_elf_symbol_processing (abfd, asym)
- bfd *abfd;
+ bfd *abfd ATTRIBUTE_UNUSED;
asymbol *asym;
{
elf_symbol_type *elfsym;
@@ -1898,7 +1896,7 @@ sparc64_elf_relocate_section (output_bfd
{
int r_type;
reloc_howto_type *howto;
- long r_symndx;
+ unsigned long r_symndx;
struct elf_link_hash_entry *h;
Elf_Internal_Sym *sym;
asection *sec;
@@ -2806,8 +2804,8 @@ sparc64_elf_merge_private_bfd_data (ibfd
/* We don't want dynamic objects memory ordering and
architecture to have any role. That's what dynamic linker
should do. */
- old_flags &= ~(EF_SPARCV9_MM | EF_SPARC_SUN_US1 | EF_SPARC_HAL_R1);
- old_flags |= (new_flags
+ new_flags &= ~(EF_SPARCV9_MM | EF_SPARC_SUN_US1 | EF_SPARC_HAL_R1);
+ new_flags |= (old_flags
& (EF_SPARCV9_MM
| EF_SPARC_SUN_US1
| EF_SPARC_HAL_R1));
@@ -2860,7 +2858,7 @@ sparc64_elf_merge_private_bfd_data (ibfd
static const char *
sparc64_elf_print_symbol_all (abfd, filep, symbol)
- bfd *abfd;
+ bfd *abfd ATTRIBUTE_UNUSED;
PTR filep;
asymbol *symbol;
{
2000-05-23 Jakub Jelinek <jakub@redhat.com>
* elf64-sparc.c (sparc64_elf_relax_section): New.
(sparc64_elf_relocate_section): Optimize tail call into branch always
if possible.
* elf32-sparc.c (elf32_sparc_relocate_section): Likewise.
(elf32_sparc_relax_section): New.
--- bfd/elf64-sparc.c.jj Thu Mar 9 12:01:37 2000
+++ bfd/elf64-sparc.c Tue May 23 16:45:44 2000
@@ -65,6 +65,8 @@ static void sparc64_elf_symbol_processin
static boolean sparc64_elf_merge_private_bfd_data
PARAMS ((bfd *, bfd *));
+static boolean sparc64_elf_relax_section
+ PARAMS ((bfd *, asection *, struct bfd_link_info *, boolean *));
static boolean sparc64_elf_relocate_section
PARAMS ((bfd *, struct bfd_link_info *, bfd *, asection *, bfd_byte *,
Elf_Internal_Rela *, Elf_Internal_Sym *, asection **));
@@ -1853,6 +1855,22 @@ sparc64_elf_size_dynamic_sections (outpu
return true;
}
+#define SET_SEC_DO_RELAX(section) do { elf_section_data(section)->tdata = (void *)1; } while (0)
+#define SEC_DO_RELAX(section) (elf_section_data(section)->tdata == (void *)1)
+
+/*ARGSUSED*/
+static boolean
+sparc64_elf_relax_section (abfd, section, link_info, again)
+ bfd *abfd ATTRIBUTE_UNUSED;
+ asection *section ATTRIBUTE_UNUSED;
+ struct bfd_link_info *link_info ATTRIBUTE_UNUSED;
+ boolean *again;
+{
+ *again = false;
+ SET_SEC_DO_RELAX (section);
+ return true;
+}
+
/* Relocate a SPARC64 ELF section. */
static boolean
@@ -2391,6 +2409,8 @@ sparc64_elf_relocate_section (output_bfd
relocation = (splt->output_section->vma
+ splt->output_offset
+ sparc64_elf_plt_entry_offset (h->plt.offset));
+ if (r_type == R_SPARC_WPLT30)
+ goto do_wplt30;
goto do_default;
case R_SPARC_OLO10:
@@ -2466,6 +2486,80 @@ sparc64_elf_relocate_section (output_bfd
}
break;
+ case R_SPARC_WDISP30:
+ do_wplt30:
+ if (SEC_DO_RELAX (input_section)
+ && rel->r_offset + 4 < input_section->_raw_size)
+ {
+ bfd_vma x, y;
+
+ /* If the instruction is a call with either:
+ restore
+ arithmetic instruction with rd == %o7
+ where rs1 != %o7 and rs2 if it is register != %o7
+ then we can optimize if the call destination is near
+ by changing the call into a branch always. */
+ x = bfd_get_32 (input_bfd, contents + rel->r_offset);
+ y = bfd_get_32 (input_bfd, contents + rel->r_offset + 4);
+ if (((x >> 30) & 3) == 1 && ((y >> 30) & 3) == 2)
+ {
+ if ((((y >> 19) & 0x3f) == 0x3d /* restore */
+ || ((y & (0x28 << 19)) == 0 /* arithmetic */
+ && ((y >> 25) & 0x1f) == 15))
+ && ((y >> 14) & 0x1f) != 15
+ && ((y & (1 << 13))
+ || (y & 0x1f) != 15))
+ {
+ bfd_vma reloc;
+
+ reloc = relocation + rel->r_addend - rel->r_offset;
+ reloc -= (input_section->output_section->vma
+ + input_section->output_offset);
+ if (reloc & 3)
+ goto do_default;
+ if ((reloc & ~(bfd_vma)0x7fffff)
+ && ((reloc | 0x7fffff) != MINUS_ONE))
+ goto do_default;
+ reloc >>= 2;
+ if ((reloc & 0x3c0000) == 0
+ || (reloc & 0x3c0000) == 0x3c0000)
+ x = 0x10680000 | (reloc & 0x7ffff); /* ba,pt %xcc */
+ else
+ x = 0x10800000 | (reloc & 0x3fffff); /* ba */
+ bfd_put_32 (input_bfd, x, contents + rel->r_offset);
+ r = bfd_reloc_ok;
+ if (rel->r_offset >= 4 && (y & 0xfff83fff) == 0x9e100000)
+ {
+ bfd_vma z;
+ unsigned int reg;
+
+ z = bfd_get_32 (input_bfd,
+ contents + rel->r_offset - 4);
+ if ((z & 0xc1ffffff) != 0x8013c000)
+ break;
+
+ /* The sequence was
+ or %o7, %g0, %rN
+ call foo
+ or %rN, %g0, %o7
+
+ If call foo was replaced with ba, replace
+ or %rN, %g0, %o7 with nop. */
+
+ reg = (y >> 14) & 0x1f;
+ if (reg != ((z >> 25) & 0x1f)
+ || reg == 0 || reg == 15)
+ break;
+
+ bfd_put_32 (input_bfd, 0x1000000,
+ contents + rel->r_offset + 4);
+ }
+ break;
+ }
+ }
+ }
+ /* FALLTHROUGH */
+
default:
do_default:
r = _bfd_final_link_relocate (howto, input_bfd, input_section,
@@ -2958,6 +3052,8 @@ const struct elf_size_info sparc64_elf_s
sparc64_elf_canonicalize_dynamic_reloc
#define bfd_elf64_bfd_reloc_type_lookup \
sparc64_elf_reloc_type_lookup
+#define bfd_elf64_bfd_relax_section \
+ sparc64_elf_relax_section
#define elf_backend_create_dynamic_sections \
_bfd_elf_create_dynamic_sections
--- bfd/elf32-sparc.c.jj Thu Mar 9 10:02:10 2000
+++ bfd/elf32-sparc.c Tue May 23 17:39:40 2000
@@ -36,6 +36,8 @@ static boolean elf32_sparc_adjust_dynami
PARAMS ((struct bfd_link_info *, struct elf_link_hash_entry *));
static boolean elf32_sparc_size_dynamic_sections
PARAMS ((bfd *, struct bfd_link_info *));
+static boolean elf32_sparc_relax_section
+ PARAMS ((bfd *, asection *, struct bfd_link_info *, boolean *));
static boolean elf32_sparc_relocate_section
PARAMS ((bfd *, struct bfd_link_info *, bfd *, asection *, bfd_byte *,
Elf_Internal_Rela *, Elf_Internal_Sym *, asection **));
@@ -1059,6 +1061,23 @@ elf32_sparc_size_dynamic_sections (outpu
return true;
}
+
+#define SET_SEC_DO_RELAX(section) do { elf_section_data(section)->tdata = (void *)1; } while (0)
+#define SEC_DO_RELAX(section) (elf_section_data(section)->tdata == (void *)1)
+
+/*ARGSUSED*/
+static boolean
+elf32_sparc_relax_section (abfd, section, link_info, again)
+ bfd *abfd ATTRIBUTE_UNUSED;
+ asection *section ATTRIBUTE_UNUSED;
+ struct bfd_link_info *link_info ATTRIBUTE_UNUSED;
+ boolean *again;
+{
+ *again = false;
+ SET_SEC_DO_RELAX (section);
+ return true;
+}
+
/* Relocate a SPARC ELF section. */
static boolean
@@ -1515,6 +1534,7 @@ elf32_sparc_relocate_section (output_bfd
break;
}
+ r = bfd_reloc_continue;
if (r_type == R_SPARC_WDISP16)
{
bfd_vma x;
@@ -1546,7 +1566,80 @@ elf32_sparc_relocate_section (output_bfd
bfd_putl32 (/*input_bfd,*/ x, contents + rel->r_offset);
r = bfd_reloc_ok;
}
- else
+ else if ((r_type == R_SPARC_WDISP30 || r_type == R_SPARC_WPLT30)
+ && SEC_DO_RELAX (input_section)
+ && rel->r_offset + 4 < input_section->_raw_size)
+ {
+ bfd_vma x, y;
+
+ /* If the instruction is a call with either:
+ restore
+ arithmetic instruction with rd == %o7
+ where rs1 != %o7 and rs2 if it is register != %o7
+ then we can optimize if the call destination is near
+ by changing the call into a branch always. */
+ x = bfd_get_32 (input_bfd, contents + rel->r_offset);
+ y = bfd_get_32 (input_bfd, contents + rel->r_offset + 4);
+ if (((x >> 30) & 3) == 1 && ((y >> 30) & 3) == 2)
+ {
+ if ((((y >> 19) & 0x3f) == 0x3d /* restore */
+ || ((y & (0x28 << 19)) == 0 /* arithmetic */
+ && ((y >> 25) & 0x1f) == 15))
+ && ((y >> 14) & 0x1f) != 15
+ && ((y & (1 << 13))
+ || (y & 0x1f) != 15))
+ {
+ bfd_vma reloc;
+
+ reloc = relocation + rel->r_addend - rel->r_offset;
+ reloc -= (input_section->output_section->vma
+ + input_section->output_offset);
+ if ((reloc & 3) == 0
+ && ((reloc & ~(bfd_vma)0x7fffff) == 0
+ || ((reloc | 0x7fffff) == ~(bfd_vma)0)))
+ {
+ reloc >>= 2;
+ if (((reloc & 0x3c0000) == 0
+ || (reloc & 0x3c0000) == 0x3c0000)
+ && (elf_elfheader (output_bfd)->e_flags & EF_SPARC_32PLUS))
+ x = 0x10680000 | (reloc & 0x7ffff); /* ba,pt %xcc */
+ else
+ x = 0x10800000 | (reloc & 0x3fffff); /* ba */
+ bfd_put_32 (input_bfd, x, contents + rel->r_offset);
+ r = bfd_reloc_ok;
+ if (rel->r_offset >= 4 && (y & 0xfff83fff) == 0x9e100000)
+ {
+ bfd_vma z;
+ unsigned int reg;
+
+ z = bfd_get_32 (input_bfd,
+ contents + rel->r_offset - 4);
+ if ((z & 0xc1ffffff) != 0x8013c000)
+ break;
+
+ /* The sequence was
+ or %o7, %g0, %rN
+ call foo
+ or %rN, %g0, %o7
+
+ If call foo was replaced with ba, replace
+ or %rN, %g0, %o7 with nop. */
+
+ reg = (y >> 14) & 0x1f;
+ if (reg != ((z >> 25) & 0x1f)
+ || reg == 0 || reg == 15)
+ break;
+
+ bfd_put_32 (input_bfd, 0x1000000,
+ contents + rel->r_offset + 4);
+ }
+
+ }
+ }
+ }
+ }
+
+ if (r == bfd_reloc_continue)
r = _bfd_final_link_relocate (howto, input_bfd, input_section,
contents, rel->r_offset,
relocation, rel->r_addend);
@@ -1964,6 +2057,7 @@ elf32_sparc_final_write_processing (abfd
#define ELF_MAXPAGESIZE 0x10000
#define bfd_elf32_bfd_reloc_type_lookup elf32_sparc_reloc_type_lookup
+#define bfd_elf32_bfd_relax_section elf32_sparc_relax_section
#define elf_info_to_howto elf32_sparc_info_to_howto
#define elf_backend_create_dynamic_sections \
_bfd_elf_create_dynamic_sections
2000-05-23 Jakub Jelinek <jakub@redhat.com>
* config/tc-sparc.c (sparc_relax): New.
(md_longopts): Add -relax and -no-relax options.
(md_parse_options, md_show_usage): Likewise.
(md_apply_fix3): Optimize tail call into branch always if possible.
--- gas/config/tc-sparc.c.jj Thu May 18 13:13:53 2000
+++ gas/config/tc-sparc.c Tue May 23 16:17:50 2000
@@ -91,6 +91,9 @@ static enum sparc_opcode_arch_val warn_a
has been used in -64. */
static int no_undeclared_regs;
+/* Non-zero if we should try to relax jumps and calls. */
+static int sparc_relax;
+
/* Non-zero if we are generating PIC code. */
int sparc_pic_code;
@@ -415,6 +418,10 @@ struct option md_longopts[] = {
#define OPTION_UNDECLARED_REGS (OPTION_MD_BASE + 13)
{"undeclared-regs", no_argument, NULL, OPTION_UNDECLARED_REGS},
#endif
+#define OPTION_RELAX (OPTION_MD_BASE + 14)
+ {"relax", no_argument, NULL, OPTION_RELAX},
+#define OPTION_NO_RELAX (OPTION_MD_BASE + 15)
+ {"no-relax", no_argument, NULL, OPTION_NO_RELAX},
{NULL, no_argument, NULL, 0}
};
size_t md_longopts_size = sizeof(md_longopts);
@@ -574,6 +581,14 @@ md_parse_option (c, arg)
break;
#endif
+ case OPTION_RELAX:
+ sparc_relax = 1;
+ break;
+
+ case OPTION_NO_RELAX:
+ sparc_relax = 0;
+ break;
+
default:
return 0;
}
@@ -605,7 +620,9 @@ md_show_usage (stream)
specify variant of SPARC architecture\n\
-bump warn when assembler switches architectures\n\
-sparc ignored\n\
---enforce-aligned-data force .long, etc., to be aligned correctly\n"));
+--enforce-aligned-data force .long, etc., to be aligned correctly\n\
+-relax relax jumps and branches (default)\n\
+-no-relax avoid changing any jumps and branches\n"));
#ifdef OBJ_AOUT
fprintf (stream, _("\
-k generate PIC\n"));
@@ -2915,7 +2932,77 @@ md_apply_fix3 (fixP, value, segment)
|| fixP->fx_addsy == NULL
|| symbol_section_p (fixP->fx_addsy))
++val;
+
insn |= val & 0x3fffffff;
+
+ /* See if we have a delay slot */
+ if (sparc_relax && fixP->fx_where + 8 <= fixP->fx_frag->fr_fix)
+ {
+ long delay;
+
+ /* If the instruction is a call with either:
+ restore
+ arithmetic instruction with rd == %o7
+ where rs1 != %o7 and rs2 if it is register != %o7
+ then we can optimize if the call destination is near
+ by changing the call into a branch always. */
+ if (INSN_BIG_ENDIAN)
+ delay = bfd_getb32 ((unsigned char *) buf + 4);
+ else
+ delay = bfd_getl32 ((unsigned char *) buf + 4);
+ if (((insn >> 30) & 3) != 1 || ((delay >> 30) & 3) != 2)
+ break;
+ if (((delay >> 19) & 0x3f) != 0x3d /* restore */
+ && ((delay & (0x28 << 19)) != 0 /* arithmetic */
+ || ((delay >> 25) & 0x1f) != 15))
+ break;
+ if (((delay >> 14) & 0x1f) == 15
+ || ((delay & (1 << 13)) == 0
+ && (delay & 0x1f) == 15))
+ break;
+ if ((val & 0x3fe00000)
+ && (val & 0x3fe00000) != 0x3fe00000)
+ break;
+ if (((val & 0x3c0000) == 0
+ || (val & 0x3c0000) == 0x3c0000)
+ && (sparc_arch_size == 64
+ || current_architecture >= SPARC_OPCODE_ARCH_V9))
+ /* ba,pt %xcc */
+ insn = 0x10680000 | (val & 0x7ffff);
+ else
+ /* ba */
+ insn = 0x10800000 | (val & 0x3fffff);
+ if (fixP->fx_where >= 4
+ && (delay & 0xfff83fff) == 0x9e100000)
+ {
+ long setter;
+ int reg;
+
+ if (INSN_BIG_ENDIAN)
+ setter = bfd_getb32 ((unsigned char *) buf - 4);
+ else
+ setter = bfd_getl32 ((unsigned char *) buf - 4);
+ if ((setter & 0xc1ffffff) != 0x8013c000)
+ break;
+ /* The sequence was
+ or %o7, %g0, %rN
+ call foo
+ or %rN, %g0, %o7
+
+ If call foo was replaced with ba, replace
+ or %rN, %g0, %o7 with nop. */
+ reg = (delay >> 14) & 0x1f;
+
+ if (reg != ((setter >> 25) & 0x1f)
+ || reg == 0 || reg == 15)
+ break;
+
+ if (INSN_BIG_ENDIAN)
+ bfd_putb32 (0x1000000, (unsigned char *) buf + 4);
+ else
+ bfd_putl32 (0x1000000, (unsigned char *) buf + 4);
+ }
+ }
break;
case BFD_RELOC_SPARC_11: