This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

SPU stack sizing


Stack space estimation for SPU, done the hard way.  Rummaging through
function prologues to find stack adjusting instructions, and
generating a call-graph from relocations on branch instructions.  It
would have been easier if we relied on gcc passing the function stack
sizes via debug info, but this way means people don't need to provide
debug info for assembly (eg. newlib).  Just so long as they don't get
too creative in their assembly prologues.

bfd/
	* elf32-spu.c (struct spu_link_hash_table): Add stack_analysis
	and emit_stack_syms bitfields.
	(get_sym_h): Read all symbols if stack analysis will be done.
	(spu_elf_create_sections): Add stack_analysis and emit_stack_syms
	params, and stash in hash table.
	(is_hint): Split off from..
	(is_branch): ..here.  Adjust callers.
	(spu_elf_size_stubs): Add stack_analysis param.  Arrange to read
	and keep all syms.
	(write_one_stub): Fix mem leak.
	(find_function_stack_adjust): New function.
	(sort_syms_syms, sort_syms_psecs): New vars.
	(sort_syms): New function.
	(struct call_info, struct function_info): New.
	(struct spu_elf_stack_info): New.
	(alloc_stack_info, maybe_insert_function, func_name): New functions.
	(is_nop, insns_at_end, check_function_ranges): Likewise.
	(find_function, insert_callee, mark_functions_via_relocs): Likewise.
	(pasted_function, interesting_section, discover_functions): Likewise.
	(mark_non_root, call_graph_traverse, build_call_tree): Likewise.
	(sum_stack, spu_elf_stack_analysis, spu_elf_final_link): Likewise.
	(bfd_elf32_bfd_final_link): Define.
	* elf32-spu.h (struct _spu_elf_section_data): Add stack_info field.
	(spu_elf_create_sections, spu_elf_size_stubs): Update prototypes.
include/
	* bfdlink.h (struct bfd_link_info): Add "info" and "minfo".
ld/
	* ldmain.c (link_callbacks): Init info and minfo fields.
	* ldmisc.c (minfo): Do nothing if no map file.
	* emultempl/spuelf.em (stack_analysis, emit_stack_syms): New vars.
	(spu_after_open): Adjust spu_elf_create_sections call.
	(spu_before_allocation): Likewise for spu_elf_size_stubs.
	(OPTION_SPU_STACK_ANALYSIS, OPTION_SPU_STACK_SYMS): Define.
	(PARSE_AND_LIST_LONGOPTS): Add new entries.
	(PARSE_AND_LIST_OPTIONS, PARSE_AND_LIST_ARGS_CASES): Likewise.
	* gen-doc.texi: Add @set for SPU and other missing targets.
	* ld.texinfo: Update man page selection to match gen-doc.texi.
	Document SPU features.

Index: bfd/elf32-spu.c
===================================================================
RCS file: /cvs/src/src/bfd/elf32-spu.c,v
retrieving revision 1.12
diff -u -p -r1.12 elf32-spu.c
--- bfd/elf32-spu.c	26 Apr 2007 14:46:57 -0000	1.12
+++ bfd/elf32-spu.c	30 Apr 2007 13:17:41 -0000
@@ -271,6 +271,12 @@ struct spu_link_hash_table
 
   /* Set on error.  */
   unsigned int stub_overflow : 1;
+
+  /* Set if stack size analysis should be done.  */
+  unsigned int stack_analysis : 1;
+
+  /* Set if __stack_* syms will be emitted.  */
+  unsigned int emit_stack_syms : 1;
 };
 
 #define spu_hash_table(p) \
@@ -411,9 +417,17 @@ get_sym_h (struct elf_link_hash_entry **
 	{
 	  locsyms = (Elf_Internal_Sym *) symtab_hdr->contents;
 	  if (locsyms == NULL)
-	    locsyms = bfd_elf_get_elf_syms (ibfd, symtab_hdr,
-					    symtab_hdr->sh_info,
-					    0, NULL, NULL, NULL);
+	    {
+	      size_t symcount = symtab_hdr->sh_info;
+
+	      /* If we are reading symbols into the contents, then
+		 read the global syms too.  This is done to cache
+		 syms for later stack analysis.  */
+	      if ((unsigned char **) locsymsp == &symtab_hdr->contents)
+		symcount = symtab_hdr->sh_size / symtab_hdr->sh_entsize;
+	      locsyms = bfd_elf_get_elf_syms (ibfd, symtab_hdr, symcount, 0,
+					      NULL, NULL, NULL);
+	    }
 	  if (locsyms == NULL)
 	    return FALSE;
 	  *locsymsp = locsyms;
@@ -436,6 +450,7 @@ get_sym_h (struct elf_link_hash_entry **
 	  *symsecp = symsec;
 	}
     }
+
   return TRUE;
 }
 
@@ -488,9 +503,17 @@ spu_stub_name (const asection *sym_sec,
    that the linker maps the sections to the right place in the output.  */
 
 bfd_boolean
-spu_elf_create_sections (bfd *output_bfd, struct bfd_link_info *info)
+spu_elf_create_sections (bfd *output_bfd,
+			 struct bfd_link_info *info,
+			 int stack_analysis,
+			 int emit_stack_syms)
 {
   bfd *ibfd;
+  struct spu_link_hash_table *htab = spu_hash_table (info);
+
+  /* Stash some options away where we can get at them later.  */
+  htab->stack_analysis = stack_analysis;
+  htab->emit_stack_syms = emit_stack_syms;
 
   for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->next)
     if (bfd_get_section_by_name (ibfd, SPU_PTNOTE_SPUNAME) != NULL)
@@ -672,7 +695,7 @@ spu_elf_find_overlays (bfd *output_bfd, 
 					/* br __ovly_load */
 #define NOP	0x40200000
 
-/* Return true for all relative and absolute branch and hint instructions.
+/* Return true for all relative and absolute branch instructions.
    bra   00110000 0..
    brasl 00110001 0..
    br    00110010 0..
@@ -680,15 +703,22 @@ spu_elf_find_overlays (bfd *output_bfd, 
    brz   00100000 0..
    brnz  00100001 0..
    brhz  00100010 0..
-   brhnz 00100011 0..
+   brhnz 00100011 0..  */
+
+static bfd_boolean
+is_branch (const unsigned char *insn)
+{
+  return (insn[0] & 0xec) == 0x20 && (insn[1] & 0x80) == 0;
+}
+
+/* Return true for branch hint instructions.
    hbra  0001000..
    hbrr  0001001..  */
 
 static bfd_boolean
-is_branch (const unsigned char *insn)
+is_hint (const unsigned char *insn)
 {
-  return (((insn[0] & 0xec) == 0x20 && (insn[1] & 0x80) == 0)
-	  || (insn[0] & 0xfc) == 0x10);
+  return (insn[0] & 0xfc) == 0x10;
 }
 
 /* Return TRUE if this reloc symbol should possibly go via an overlay stub.  */
@@ -833,6 +863,7 @@ bfd_boolean
 spu_elf_size_stubs (bfd *output_bfd,
 		    struct bfd_link_info *info,
 		    int non_overlay_stubs,
+		    int stack_analysis,
 		    asection **stub,
 		    asection **ovtab,
 		    asection **toe)
@@ -853,6 +884,7 @@ spu_elf_size_stubs (bfd *output_bfd,
       Elf_Internal_Shdr *symtab_hdr;
       asection *section;
       Elf_Internal_Sym *local_syms = NULL;
+      Elf_Internal_Sym **psyms;
 
       if (ibfd->xvec != &bfd_elf32_spu_vec)
 	continue;
@@ -862,6 +894,11 @@ spu_elf_size_stubs (bfd *output_bfd,
       if (symtab_hdr->sh_info == 0)
 	continue;
 
+      /* Arrange to read and keep global syms for later stack analysis.  */
+      psyms = &local_syms;
+      if (stack_analysis)
+	psyms = (Elf_Internal_Sym **) &symtab_hdr->contents;
+
       /* Walk over each section attached to the input bfd.  */
       for (section = ibfd->sections; section != NULL; section = section->next)
 	{
@@ -913,7 +950,7 @@ spu_elf_size_stubs (bfd *output_bfd,
 		}
 
 	      /* Determine the reloc target section.  */
-	      if (!get_sym_h (&h, &sym, &sym_sec, &local_syms, r_indx, ibfd))
+	      if (!get_sym_h (&h, &sym, &sym_sec, psyms, r_indx, ibfd))
 		goto error_ret_free_internal;
 
 	      if (sym_sec == NULL
@@ -937,7 +974,7 @@ spu_elf_size_stubs (bfd *output_bfd,
 						 irela->r_offset, 4))
 		    goto error_ret_free_internal;
 
-		  if (is_branch (insn))
+		  if (is_branch (insn) || is_hint (insn))
 		    {
 		      insn_type = branch;
 		      if ((insn[0] & 0xfd) == 0x31)
@@ -1241,7 +1278,8 @@ write_one_stub (struct bfd_hash_entry *b
 	return FALSE;
       memcpy (name, "00000000.ovl_call.", len1);
       memcpy (name + len1, ent->root.string, len2 + 1);
-      h = elf_link_hash_lookup (&htab->elf, name, TRUE, FALSE, FALSE);
+      h = elf_link_hash_lookup (&htab->elf, name, TRUE, TRUE, FALSE);
+      free (name);
       if (h == NULL)
 	return FALSE;
       if (h->root.type == bfd_link_hash_new)
@@ -1421,6 +1459,1199 @@ spu_elf_build_stubs (struct bfd_link_inf
   return TRUE;
 }
 
+/* OFFSET in SEC (presumably) is the beginning of a function prologue.
+   Search for stack adjusting insns, and return the sp delta.  */
+
+static int
+find_function_stack_adjust (asection *sec, bfd_vma offset)
+{
+  int unrecog;
+  int reg[128];
+
+  memset (reg, 0, sizeof (reg));
+  for (unrecog = 0; offset + 4 <= sec->size && unrecog < 32; offset += 4)
+    {
+      unsigned char buf[4];
+      int rt, ra;
+      int imm;
+
+      /* Assume no relocs on stack adjusing insns.  */
+      if (!bfd_get_section_contents (sec->owner, sec, buf, offset, 4))
+	break;
+
+      if (buf[0] == 0x24 /* stqd */)
+	continue;
+
+      rt = buf[3] & 0x7f;
+      ra = ((buf[2] & 0x3f) << 1) | (buf[3] >> 7);
+      /* Partly decoded immediate field.  */
+      imm = (buf[1] << 9) | (buf[2] << 1) | (buf[3] >> 7);
+
+      if (buf[0] == 0x1c /* ai */)
+	{
+	  imm >>= 7;
+	  imm = (imm ^ 0x200) - 0x200;
+	  reg[rt] = reg[ra] + imm;
+
+	  if (rt == 1 /* sp */)
+	    {
+	      if (imm > 0)
+		break;
+	      return reg[rt];
+	    }
+	}
+      else if (buf[0] == 0x18 && (buf[1] & 0xe0) == 0 /* a */)
+	{
+	  int rb = ((buf[1] & 0x1f) << 2) | ((buf[2] & 0xc0) >> 6);
+
+	  reg[rt] = reg[ra] + reg[rb];
+	  if (rt == 1)
+	    return reg[rt];
+	}
+      else if ((buf[0] & 0xfc) == 0x40 /* il, ilh, ilhu, ila */)
+	{
+	  if (buf[0] >= 0x42 /* ila */)
+	    imm |= (buf[0] & 1) << 17;
+	  else
+	    {
+	      imm &= 0xffff;
+
+	      if (buf[0] == 0x40 /* il */)
+		{
+		  if ((buf[1] & 0x80) == 0)
+		    goto unknown_insn;
+		  imm = (imm ^ 0x8000) - 0x8000;
+		}
+	      else if ((buf[1] & 0x80) == 0 /* ilhu */)
+		imm <<= 16;
+	    }
+	  reg[rt] = imm;
+	  continue;
+	}
+      else if (buf[0] == 0x60 && (buf[1] & 0x80) != 0 /* iohl */)
+	{
+	  reg[rt] |= imm & 0xffff;
+	  continue;
+	}
+      else if (buf[0] == 0x04 /* ori */)
+	{
+	  imm >>= 7;
+	  imm = (imm ^ 0x200) - 0x200;
+	  reg[rt] = reg[ra] | imm;
+	  continue;
+	}
+      else if ((buf[0] == 0x33 && imm == 1 /* brsl .+4 */)
+	       || (buf[0] == 0x08 && (buf[1] & 0xe0) == 0 /* sf */))
+	{
+	  /* Used in pic reg load.  Say rt is trashed.  */
+	  reg[rt] = 0;
+	  continue;
+	}
+      else if (is_branch (buf))
+	/* If we hit a branch then we must be out of the prologue.  */
+	break;
+    unknown_insn:
+      ++unrecog;
+    }
+
+  return 0;
+}
+
+/* qsort predicate to sort symbols by section and value.  */
+
+static Elf_Internal_Sym *sort_syms_syms;
+static asection **sort_syms_psecs;
+
+static int
+sort_syms (const void *a, const void *b)
+{
+  Elf_Internal_Sym *const *s1 = a;
+  Elf_Internal_Sym *const *s2 = b;
+  asection *sec1,*sec2;
+  bfd_signed_vma delta;
+
+  sec1 = sort_syms_psecs[*s1 - sort_syms_syms];
+  sec2 = sort_syms_psecs[*s2 - sort_syms_syms];
+
+  if (sec1 != sec2)
+    return sec1->index - sec2->index;
+
+  delta = (*s1)->st_value - (*s2)->st_value;
+  if (delta != 0)
+    return delta < 0 ? -1 : 1;
+
+  delta = (*s2)->st_size - (*s1)->st_size;
+  if (delta != 0)
+    return delta < 0 ? -1 : 1;
+
+  return *s1 < *s2 ? -1 : 1;
+}
+
+struct call_info
+{
+  struct function_info *fun;
+  struct call_info *next;
+  int is_tail;
+};
+
+struct function_info
+{
+  /* List of functions called.  Also branches to hot/cold part of
+     function.  */
+  struct call_info *call_list;
+  /* For hot/cold part of function, point to owner.  */
+  struct function_info *start;
+  /* Symbol at start of function.  */
+  union {
+    Elf_Internal_Sym *sym;
+    struct elf_link_hash_entry *h;
+  } u;
+  /* Function section.  */
+  asection *sec;
+  /* Address range of (this part of) function.  */
+  bfd_vma lo, hi;
+  /* Stack usage.  */
+  int stack;
+  /* Set if global symbol.  */
+  unsigned int global : 1;
+  /* Set if known to be start of function (as distinct from a hunk
+     in hot/cold section.  */
+  unsigned int is_func : 1;
+  /* Flags used during call tree traversal.  */
+  unsigned int visit1 : 1;
+  unsigned int non_root : 1;
+  unsigned int visit2 : 1;
+  unsigned int marking : 1;
+  unsigned int visit3 : 1;
+};
+
+struct spu_elf_stack_info
+{
+  int num_fun;
+  int max_fun;
+  /* Variable size array describing functions, one per contiguous
+     address range belonging to a function.  */
+  struct function_info fun[1];
+};
+
+/* Allocate a struct spu_elf_stack_info with MAX_FUN struct function_info
+   entries for section SEC.  */
+
+static struct spu_elf_stack_info *
+alloc_stack_info (asection *sec, int max_fun)
+{
+  struct _spu_elf_section_data *sec_data = spu_elf_section_data (sec);
+  bfd_size_type amt;
+
+  amt = sizeof (struct spu_elf_stack_info);
+  amt += (max_fun - 1) * sizeof (struct function_info);
+  sec_data->stack_info = bfd_zmalloc (amt);
+  if (sec_data->stack_info != NULL)
+    sec_data->stack_info->max_fun = max_fun;
+  return sec_data->stack_info;
+}
+
+/* Add a new struct function_info describing a (part of a) function
+   starting at SYM_H.  Keep the array sorted by address.  */
+
+static struct function_info *
+maybe_insert_function (asection *sec,
+		       void *sym_h,
+		       bfd_boolean global,
+		       bfd_boolean is_func)
+{
+  struct _spu_elf_section_data *sec_data = spu_elf_section_data (sec);
+  struct spu_elf_stack_info *sinfo = sec_data->stack_info;
+  int i;
+  bfd_vma off, size;
+
+  if (sinfo == NULL)
+    {
+      sinfo = alloc_stack_info (sec, 20);
+      if (sinfo == NULL)
+	return NULL;
+    }
+
+  if (!global)
+    {
+      Elf_Internal_Sym *sym = sym_h;
+      off = sym->st_value;
+      size = sym->st_size;
+    }
+  else
+    {
+      struct elf_link_hash_entry *h = sym_h;
+      off = h->root.u.def.value;
+      size = h->size;
+    }
+
+  for (i = sinfo->num_fun; --i >= 0; )
+    if (sinfo->fun[i].lo <= off)
+      break;
+
+  if (i >= 0)
+    {
+      /* Don't add another entry for an alias, but do update some
+	 info.  */
+      if (sinfo->fun[i].lo == off)
+	{
+	  /* Prefer globals over local syms.  */
+	  if (global && !sinfo->fun[i].global)
+	    {
+	      sinfo->fun[i].global = TRUE;
+	      sinfo->fun[i].u.h = sym_h;
+	    }
+	  if (is_func)
+	    sinfo->fun[i].is_func = TRUE;
+	  return &sinfo->fun[i];
+	}
+      /* Ignore a zero-size symbol inside an existing function.  */
+      else if (sinfo->fun[i].hi > off && size == 0)
+	return &sinfo->fun[i];
+    }
+
+  if (++i < sinfo->num_fun)
+    memmove (&sinfo->fun[i + 1], &sinfo->fun[i],
+	     (sinfo->num_fun - i) * sizeof (sinfo->fun[i]));
+  else if (i >= sinfo->max_fun)
+    {
+      bfd_size_type amt = sizeof (struct spu_elf_stack_info);
+      bfd_size_type old = amt;
+
+      old += (sinfo->max_fun - 1) * sizeof (struct function_info);
+      sinfo->max_fun += 20 + (sinfo->max_fun >> 1);
+      amt += (sinfo->max_fun - 1) * sizeof (struct function_info);
+      sinfo = bfd_realloc (sinfo, amt);
+      if (sinfo == NULL)
+	return NULL;
+      memset ((char *) sinfo + old, 0, amt - old);
+      sec_data->stack_info = sinfo;
+    }
+  sinfo->fun[i].is_func = is_func;
+  sinfo->fun[i].global = global;
+  sinfo->fun[i].sec = sec;
+  if (global)
+    sinfo->fun[i].u.h = sym_h;
+  else
+    sinfo->fun[i].u.sym = sym_h;
+  sinfo->fun[i].lo = off;
+  sinfo->fun[i].hi = off + size;
+  sinfo->fun[i].stack = -find_function_stack_adjust (sec, off);
+  sinfo->num_fun += 1;
+  return &sinfo->fun[i];
+}
+
+/* Return the name of FUN.  */
+
+static const char *
+func_name (struct function_info *fun)
+{
+  asection *sec;
+  bfd *ibfd;
+  Elf_Internal_Shdr *symtab_hdr;
+
+  while (fun->start != NULL)
+    fun = fun->start;
+
+  if (fun->global)
+    return fun->u.h->root.root.string;
+
+  sec = fun->sec;
+  if (fun->u.sym->st_name == 0)
+    {
+      size_t len = strlen (sec->name);
+      char *name = bfd_malloc (len + 10);
+      if (name == NULL)
+	return "(null)";
+      sprintf (name, "%s+%lx", sec->name,
+	       (unsigned long) fun->u.sym->st_value & 0xffffffff);
+      return name;
+    }
+  ibfd = sec->owner;
+  symtab_hdr = &elf_tdata (ibfd)->symtab_hdr;
+  return bfd_elf_sym_name (ibfd, symtab_hdr, fun->u.sym, sec);
+}
+
+/* Read the instruction at OFF in SEC.  Return true iff the instruction
+   is a nop, lnop, or stop 0 (all zero insn).  */
+
+static bfd_boolean
+is_nop (asection *sec, bfd_vma off)
+{
+  unsigned char insn[4];
+
+  if (off + 4 > sec->size
+      || !bfd_get_section_contents (sec->owner, sec, insn, off, 4))
+    return FALSE;
+  if ((insn[0] & 0xbf) == 0 && (insn[1] & 0xe0) == 0x20)
+    return TRUE;
+  if (insn[0] == 0 && insn[1] == 0 && insn[2] == 0 && insn[3] == 0)
+    return TRUE;
+  return FALSE;
+}
+
+/* Extend the range of FUN to cover nop padding up to LIMIT.
+   Return TRUE iff some instruction other than a NOP was found.  */
+
+static bfd_boolean
+insns_at_end (struct function_info *fun, bfd_vma limit)
+{
+  bfd_vma off = (fun->hi + 3) & -4;
+
+  while (off < limit && is_nop (fun->sec, off))
+    off += 4;
+  if (off < limit)
+    {
+      fun->hi = off;
+      return TRUE;
+    }
+  fun->hi = limit;
+  return FALSE;
+}
+
+/* Check and fix overlapping function ranges.  Return TRUE iff there
+   are gaps in the current info we have about functions in SEC.  */
+
+static bfd_boolean
+check_function_ranges (asection *sec, struct bfd_link_info *info)
+{
+  struct _spu_elf_section_data *sec_data = spu_elf_section_data (sec);
+  struct spu_elf_stack_info *sinfo = sec_data->stack_info;
+  int i;
+  bfd_boolean gaps = FALSE;
+
+  if (sinfo == NULL)
+    return FALSE;
+
+  for (i = 1; i < sinfo->num_fun; i++)
+    if (sinfo->fun[i - 1].hi > sinfo->fun[i].lo)
+      {
+	/* Fix overlapping symbols.  */
+	const char *f1 = func_name (&sinfo->fun[i - 1]);
+	const char *f2 = func_name (&sinfo->fun[i]);
+
+	info->callbacks->einfo (_("warning: %s overlaps %s\n"), f1, f2);
+	sinfo->fun[i - 1].hi = sinfo->fun[i].lo;
+      }
+    else if (insns_at_end (&sinfo->fun[i - 1], sinfo->fun[i].lo))
+      gaps = TRUE;
+
+  if (sinfo->num_fun == 0)
+    gaps = TRUE;
+  else
+    {
+      if (sinfo->fun[0].lo != 0)
+	gaps = TRUE;
+      if (sinfo->fun[sinfo->num_fun - 1].hi > sec->size)
+	{
+	  const char *f1 = func_name (&sinfo->fun[sinfo->num_fun - 1]);
+
+	  info->callbacks->einfo (_("warning: %s exceeds section size\n"), f1);
+	  sinfo->fun[sinfo->num_fun - 1].hi = sec->size;
+	}
+      else if (insns_at_end (&sinfo->fun[sinfo->num_fun - 1], sec->size))
+	gaps = TRUE;
+    }
+  return gaps;
+}
+
+/* Search current function info for a function that contains address
+   OFFSET in section SEC.  */
+
+static struct function_info *
+find_function (asection *sec, bfd_vma offset, struct bfd_link_info *info)
+{
+  struct _spu_elf_section_data *sec_data = spu_elf_section_data (sec);
+  struct spu_elf_stack_info *sinfo = sec_data->stack_info;
+  int lo, hi, mid;
+
+  lo = 0;
+  hi = sinfo->num_fun;
+  while (lo < hi)
+    {
+      mid = (lo + hi) / 2;
+      if (offset < sinfo->fun[mid].lo)
+	hi = mid;
+      else if (offset >= sinfo->fun[mid].hi)
+	lo = mid + 1;
+      else
+	return &sinfo->fun[mid];
+    }
+  info->callbacks->einfo (_("%A:0x%v not found in function table\n"),
+			  sec, offset);
+  return NULL;
+}
+
+/* Add CALLEE to CALLER call list if not already present.  */
+
+static bfd_boolean
+insert_callee (struct function_info *caller, struct call_info *callee)
+{
+  struct call_info *p;
+  for (p = caller->call_list; p != NULL; p = p->next)
+    if (p->fun == callee->fun)
+      {
+	/* Tail calls use less stack than normal calls.  Retain entry
+	   for normal call over one for tail call.  */
+	if (p->is_tail > callee->is_tail)
+	  p->is_tail = callee->is_tail;
+	return FALSE;
+      }
+  callee->next = caller->call_list;
+  caller->call_list = callee;
+  return TRUE;
+}
+
+/* Rummage through the relocs for SEC, looking for function calls.
+   If CALL_TREE is true, fill in call graph.  If CALL_TREE is false,
+   mark destination symbols on calls as being functions.  Also
+   look at branches, which may be tail calls or go to hot/cold
+   section part of same function.  */
+
+static bfd_boolean
+mark_functions_via_relocs (asection *sec,
+			   struct bfd_link_info *info,
+			   int call_tree)
+{
+  Elf_Internal_Rela *internal_relocs, *irelaend, *irela;
+  Elf_Internal_Shdr *symtab_hdr = &elf_tdata (sec->owner)->symtab_hdr;
+  Elf_Internal_Sym *syms, **psyms;
+  static bfd_boolean warned;
+
+  internal_relocs = _bfd_elf_link_read_relocs (sec->owner, sec, NULL, NULL,
+					       info->keep_memory);
+  if (internal_relocs == NULL)
+    return FALSE;
+
+  symtab_hdr = &elf_tdata (sec->owner)->symtab_hdr;
+  psyms = (Elf_Internal_Sym **) &symtab_hdr->contents;
+  syms = *psyms;
+  irela = internal_relocs;
+  irelaend = irela + sec->reloc_count;
+  for (; irela < irelaend; irela++)
+    {
+      enum elf_spu_reloc_type r_type;
+      unsigned int r_indx;
+      asection *sym_sec;
+      Elf_Internal_Sym *sym;
+      struct elf_link_hash_entry *h;
+      bfd_vma val;
+      unsigned char insn[4];
+      bfd_boolean is_call;
+      struct function_info *caller;
+      struct call_info *callee;
+
+      r_type = ELF32_R_TYPE (irela->r_info);
+      if (r_type != R_SPU_REL16
+	  && r_type != R_SPU_ADDR16)
+	continue;
+
+      r_indx = ELF32_R_SYM (irela->r_info);
+      if (!get_sym_h (&h, &sym, &sym_sec, psyms, r_indx, sec->owner))
+	return FALSE;
+
+      if (sym_sec == NULL
+	  || sym_sec->output_section == NULL
+	  || sym_sec->output_section->owner != sec->output_section->owner)
+	continue;
+
+      if (!bfd_get_section_contents (sec->owner, sec, insn,
+				     irela->r_offset, 4))
+	return FALSE;
+      if (!is_branch (insn))
+	continue;
+
+      if ((sym_sec->flags & (SEC_ALLOC | SEC_LOAD | SEC_CODE))
+	  != (SEC_ALLOC | SEC_LOAD | SEC_CODE))
+	{
+	  if (!call_tree)
+	    warned = TRUE;
+	  if (!call_tree || !warned)
+	    info->callbacks->einfo (_("%B(%A+0x%v): call to non-code section"
+				      " %B(%A), stack analysis incomplete\n"),
+				    sec->owner, sec, irela->r_offset,
+				    sym_sec->owner, sym_sec);
+	  continue;
+	}
+
+      is_call = (insn[0] & 0xfd) == 0x31;
+
+      if (h)
+	val = h->root.u.def.value;
+      else
+	val = sym->st_value;
+      val += irela->r_addend;
+
+      if (!call_tree)
+	{
+	  struct function_info *fun;
+
+	  if (irela->r_addend != 0)
+	    {
+	      Elf_Internal_Sym *fake = bfd_zmalloc (sizeof (*fake));
+	      if (fake == NULL)
+		return FALSE;
+	      fake->st_value = val;
+	      fake->st_shndx
+		= _bfd_elf_section_from_bfd_section (sym_sec->owner, sym_sec);
+	      sym = fake;
+	    }
+	  if (sym)
+	    fun = maybe_insert_function (sym_sec, sym, FALSE, is_call);
+	  else
+	    fun = maybe_insert_function (sym_sec, h, TRUE, is_call);
+	  if (fun == NULL)
+	    return FALSE;
+	  if (irela->r_addend != 0
+	      && fun->u.sym != sym)
+	    free (sym);
+	  continue;
+	}
+
+      caller = find_function (sec, irela->r_offset, info);
+      if (caller == NULL)
+	return FALSE;
+      callee = bfd_malloc (sizeof *callee);
+      if (callee == NULL)
+	return FALSE;
+
+      callee->fun = find_function (sym_sec, val, info);
+      if (callee->fun == NULL)
+	return FALSE;
+      callee->is_tail = !is_call;
+      if (!insert_callee (caller, callee))
+	free (callee);
+      else if (!is_call
+	       && !callee->fun->is_func
+	       && callee->fun->stack == 0)
+	{
+	  /* This is either a tail call or a branch from one part of
+	     the function to another, ie. hot/cold section.  If the
+	     destination has been called by some other function then
+	     it is a separate function.  We also assume that functions
+	     are not split across input files.  */
+	  if (callee->fun->start != NULL
+	      || sec->owner != sym_sec->owner)
+	    {
+	      callee->fun->start = NULL;
+	      callee->fun->is_func = TRUE;
+	    }
+	  else
+	    callee->fun->start = caller;
+	}
+    }
+
+  return TRUE;
+}
+
+/* Handle something like .init or .fini, which has a piece of a function.
+   These sections are pasted together to form a single function.  */
+
+static bfd_boolean
+pasted_function (asection *sec, struct bfd_link_info *info)
+{
+  struct bfd_link_order *l;
+  struct _spu_elf_section_data *sec_data;
+  struct spu_elf_stack_info *sinfo;
+  Elf_Internal_Sym *fake;
+  struct function_info *fun, *fun_start;
+
+  fake = bfd_zmalloc (sizeof (*fake));
+  if (fake == NULL)
+    return FALSE;
+  fake->st_value = 0;
+  fake->st_size = sec->size;
+  fake->st_shndx
+    = _bfd_elf_section_from_bfd_section (sec->owner, sec);
+  fun = maybe_insert_function (sec, fake, FALSE, FALSE);
+  if (!fun)
+    return FALSE;
+
+  /* Find a function immediately preceding this section.  */
+  fun_start = NULL;
+  for (l = sec->output_section->map_head.link_order; l != NULL; l = l->next)
+    {
+      if (l->u.indirect.section == sec)
+	{
+	  if (fun_start != NULL)
+	    {
+	      if (fun_start->start)
+		fun_start = fun_start->start;
+	      fun->start = fun_start;
+	    }
+	  return TRUE;
+	}
+      if (l->type == bfd_indirect_link_order
+	  && (sec_data = spu_elf_section_data (l->u.indirect.section)) != NULL
+	  && (sinfo = sec_data->stack_info) != NULL
+	  && sinfo->num_fun != 0)
+	fun_start = &sinfo->fun[sinfo->num_fun - 1];
+    }
+
+  info->callbacks->einfo (_("%A link_order not found\n"), sec);
+  return FALSE;
+}
+
+/* We're only interested in code sections.  */
+
+static bfd_boolean
+interesting_section (asection *s, bfd *obfd, struct spu_link_hash_table *htab)
+{
+  return (s != htab->stub
+	  && s->output_section != NULL
+	  && s->output_section->owner == obfd
+	  && ((s->flags & (SEC_ALLOC | SEC_LOAD | SEC_CODE))
+	      == (SEC_ALLOC | SEC_LOAD | SEC_CODE))
+	  && s->size != 0);
+}
+
+/* Map address ranges in code sections to functions.  */
+
+static bfd_boolean
+discover_functions (bfd *output_bfd, struct bfd_link_info *info)
+{
+  struct spu_link_hash_table *htab = spu_hash_table (info);
+  bfd *ibfd;
+  int bfd_idx;
+  Elf_Internal_Sym ***psym_arr;
+  asection ***sec_arr;
+  bfd_boolean gaps = FALSE;
+
+  bfd_idx = 0;
+  for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next)
+    bfd_idx++;
+
+  psym_arr = bfd_zmalloc (bfd_idx * sizeof (*psym_arr));
+  if (psym_arr == NULL)
+    return FALSE;
+  sec_arr = bfd_zmalloc (bfd_idx * sizeof (*sec_arr));
+  if (sec_arr == NULL)
+    return FALSE;
+
+  
+  for (ibfd = info->input_bfds, bfd_idx = 0;
+       ibfd != NULL;
+       ibfd = ibfd->link_next, bfd_idx++)
+    {
+      extern const bfd_target bfd_elf32_spu_vec;
+      Elf_Internal_Shdr *symtab_hdr;
+      asection *sec;
+      size_t symcount;
+      Elf_Internal_Sym *syms, *sy, **psyms, **psy;
+      asection **psecs, **p;
+
+      if (ibfd->xvec != &bfd_elf32_spu_vec)
+	continue;
+
+      /* Read all the symbols.  */
+      symtab_hdr = &elf_tdata (ibfd)->symtab_hdr;
+      symcount = symtab_hdr->sh_size / symtab_hdr->sh_entsize;
+      if (symcount == 0)
+	continue;
+
+      syms = (Elf_Internal_Sym *) symtab_hdr->contents;
+      if (syms == NULL)
+	{
+	  syms = bfd_elf_get_elf_syms (ibfd, symtab_hdr, symcount, 0,
+				       NULL, NULL, NULL);
+	  symtab_hdr->contents = (void *) syms;
+	  if (syms == NULL)
+	    return FALSE;
+	}
+
+      /* Select defined function symbols that are going to be output.  */
+      psyms = bfd_malloc ((symcount + 1) * sizeof (*psyms));
+      if (psyms == NULL)
+	return FALSE;
+      psym_arr[bfd_idx] = psyms;
+      psecs = bfd_malloc (symcount * sizeof (*psecs));
+      if (psecs == NULL)
+	return FALSE;
+      sec_arr[bfd_idx] = psecs;
+      for (psy = psyms, p = psecs, sy = syms; sy < syms + symcount; ++p, ++sy)
+	if (ELF_ST_TYPE (sy->st_info) == STT_NOTYPE
+	    || ELF_ST_TYPE (sy->st_info) == STT_FUNC)
+	  {
+	    asection *s;
+
+	    *p = s = bfd_section_from_elf_index (ibfd, sy->st_shndx);
+	    if (s != NULL && interesting_section (s, output_bfd, htab))
+	      *psy++ = sy;
+	  }
+      symcount = psy - psyms;
+      *psy = NULL;
+
+      /* Sort them by section and offset within section.  */
+      sort_syms_syms = syms;
+      sort_syms_psecs = psecs;
+      qsort (psyms, symcount, sizeof (*psyms), sort_syms);
+
+      /* Now inspect the function symbols.  */
+      for (psy = psyms; psy < psyms + symcount; )
+	{
+	  asection *s = psecs[*psy - syms];
+	  Elf_Internal_Sym **psy2;
+
+	  for (psy2 = psy; ++psy2 < psyms + symcount; )
+	    if (psecs[*psy2 - syms] != s)
+	      break;
+
+	  if (!alloc_stack_info (s, psy2 - psy))
+	    return FALSE;
+	  psy = psy2;
+	}
+
+      /* First install info about properly typed and sized functions.
+	 In an ideal world this will cover all code sections, except
+	 when partitioning functions into hot and cold sections,
+	 and the horrible pasted together .init and .fini functions.  */
+      for (psy = psyms; psy < psyms + symcount; ++psy)
+	{
+	  sy = *psy;
+	  if (ELF_ST_TYPE (sy->st_info) == STT_FUNC)
+	    {
+	      asection *s = psecs[sy - syms];
+	      if (!maybe_insert_function (s, sy, FALSE, TRUE))
+		return FALSE;
+	    }
+	}
+
+      for (sec = ibfd->sections; sec != NULL && !gaps; sec = sec->next)
+	if (interesting_section (sec, output_bfd, htab))
+	  gaps |= check_function_ranges (sec, info);
+    }
+
+  if (gaps)
+    {
+      /* See if we can discover more function symbols by looking at
+	 relocations.  */
+      for (ibfd = info->input_bfds, bfd_idx = 0;
+	   ibfd != NULL;
+	   ibfd = ibfd->link_next, bfd_idx++)
+	{
+	  asection *sec;
+
+	  if (psym_arr[bfd_idx] == NULL)
+	    continue;
+
+	  for (sec = ibfd->sections; sec != NULL; sec = sec->next)
+	    if (interesting_section (sec, output_bfd, htab)
+		&& sec->reloc_count != 0)
+	      {
+		if (!mark_functions_via_relocs (sec, info, FALSE))
+		  return FALSE;
+	      }
+	}
+
+      for (ibfd = info->input_bfds, bfd_idx = 0;
+	   ibfd != NULL;
+	   ibfd = ibfd->link_next, bfd_idx++)
+	{
+	  Elf_Internal_Shdr *symtab_hdr;
+	  asection *sec;
+	  Elf_Internal_Sym *syms, *sy, **psyms, **psy;
+	  asection **psecs;
+
+	  if ((psyms = psym_arr[bfd_idx]) == NULL)
+	    continue;
+
+	  psecs = sec_arr[bfd_idx];
+
+	  symtab_hdr = &elf_tdata (ibfd)->symtab_hdr;
+	  syms = (Elf_Internal_Sym *) symtab_hdr->contents;
+
+	  gaps = FALSE;
+	  for (sec = ibfd->sections; sec != NULL && !gaps; sec = sec->next)
+	    if (interesting_section (sec, output_bfd, htab))
+	      gaps |= check_function_ranges (sec, info);
+	  if (!gaps)
+	    continue;
+
+	  /* Finally, install all globals.  */
+	  for (psy = psyms; (sy = *psy) != NULL; ++psy)
+	    {
+	      asection *s;
+
+	      s = psecs[sy - syms];
+
+	      /* Global syms might be improperly typed functions.  */
+	      if (ELF_ST_TYPE (sy->st_info) != STT_FUNC
+		  && ELF_ST_BIND (sy->st_info) == STB_GLOBAL)
+		{
+		  if (!maybe_insert_function (s, sy, FALSE, FALSE))
+		    return FALSE;
+		}
+	    }
+
+	  /* Some of the symbols we've installed as marking the
+	     beginning of functions may have a size of zero.  Extend
+	     the range of such functions to the beginning of the
+	     next symbol of interest.  */
+	  for (sec = ibfd->sections; sec != NULL; sec = sec->next)
+	    if (interesting_section (sec, output_bfd, htab))
+	      {
+		struct _spu_elf_section_data *sec_data;
+		struct spu_elf_stack_info *sinfo;
+
+		sec_data = spu_elf_section_data (sec);
+		sinfo = sec_data->stack_info;
+		if (sinfo != NULL)
+		  {
+		    int fun_idx;
+		    bfd_vma hi = sec->size;
+
+		    for (fun_idx = sinfo->num_fun; --fun_idx >= 0; )
+		      {
+			sinfo->fun[fun_idx].hi = hi;
+			hi = sinfo->fun[fun_idx].lo;
+		      }
+		  }
+		/* No symbols in this section.  Must be .init or .fini
+		   or something similar.  */
+		else if (!pasted_function (sec, info))
+		  return FALSE;
+	      }
+	}
+    }
+
+  for (ibfd = info->input_bfds, bfd_idx = 0;
+       ibfd != NULL;
+       ibfd = ibfd->link_next, bfd_idx++)
+    {
+      if (psym_arr[bfd_idx] == NULL)
+	continue;
+
+      free (psym_arr[bfd_idx]);
+      free (sec_arr[bfd_idx]);
+    }
+
+  free (psym_arr);
+  free (sec_arr);
+
+  return TRUE;
+}
+
+/* Mark nodes in the call graph that are called by some other node.  */
+
+static void
+mark_non_root (struct function_info *fun)
+{
+  struct call_info *call;
+
+  fun->visit1 = TRUE;
+  for (call = fun->call_list; call; call = call->next)
+    {
+      call->fun->non_root = TRUE;
+      if (!call->fun->visit1)
+	mark_non_root (call->fun);
+    }
+}
+
+/* Remove cycles from the call graph.  */
+
+static void
+call_graph_traverse (struct function_info *fun, struct bfd_link_info *info)
+{
+  struct call_info **callp, *call;
+
+  fun->visit2 = TRUE;
+  fun->marking = TRUE;
+
+  callp = &fun->call_list;
+  while ((call = *callp) != NULL)
+    {
+      if (!call->fun->visit2)
+	call_graph_traverse (call->fun, info);
+      else if (call->fun->marking)
+	{
+	  const char *f1 = func_name (fun);
+	  const char *f2 = func_name (call->fun);
+
+	  info->callbacks->info (_("Stack analysis will ignore the call "
+				   "from %s to %s\n"),
+				 f1, f2);
+	  *callp = call->next;
+	  continue;
+	}
+      callp = &call->next;
+    }
+  fun->marking = FALSE;
+}
+
+/* Populate call_list for each function.  */
+
+static bfd_boolean
+build_call_tree (bfd *output_bfd, struct bfd_link_info *info)
+{
+  struct spu_link_hash_table *htab = spu_hash_table (info);
+  bfd *ibfd;
+
+  for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next)
+    {
+      extern const bfd_target bfd_elf32_spu_vec;
+      asection *sec;
+
+      if (ibfd->xvec != &bfd_elf32_spu_vec)
+	continue;
+
+      for (sec = ibfd->sections; sec != NULL; sec = sec->next)
+	{
+	  if (!interesting_section (sec, output_bfd, htab)
+	      || sec->reloc_count == 0)
+	    continue;
+
+	  if (!mark_functions_via_relocs (sec, info, TRUE))
+	    return FALSE;
+	}
+
+      /* Transfer call info from hot/cold section part of function
+	 to main entry.  */
+      for (sec = ibfd->sections; sec != NULL; sec = sec->next)
+	{
+	  struct _spu_elf_section_data *sec_data;
+	  struct spu_elf_stack_info *sinfo;
+
+	  if ((sec_data = spu_elf_section_data (sec)) != NULL
+	      && (sinfo = sec_data->stack_info) != NULL)
+	    {
+	      int i;
+	      for (i = 0; i < sinfo->num_fun; ++i)
+		{
+		  if (sinfo->fun[i].start != NULL)
+		    {
+		      struct call_info *call = sinfo->fun[i].call_list;
+
+		      while (call != NULL)
+			{
+			  struct call_info *call_next = call->next;
+			  if (!insert_callee (sinfo->fun[i].start, call))
+			    free (call);
+			  call = call_next;
+			}
+		      sinfo->fun[i].call_list = NULL;
+		      sinfo->fun[i].non_root = TRUE;
+		    }
+		}
+	    }
+	}
+    }
+
+  /* Find the call graph root(s).  */
+  for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next)
+    {
+      extern const bfd_target bfd_elf32_spu_vec;
+      asection *sec;
+
+      if (ibfd->xvec != &bfd_elf32_spu_vec)
+	continue;
+
+      for (sec = ibfd->sections; sec != NULL; sec = sec->next)
+	{
+	  struct _spu_elf_section_data *sec_data;
+	  struct spu_elf_stack_info *sinfo;
+
+	  if ((sec_data = spu_elf_section_data (sec)) != NULL
+	      && (sinfo = sec_data->stack_info) != NULL)
+	    {
+	      int i;
+	      for (i = 0; i < sinfo->num_fun; ++i)
+		if (!sinfo->fun[i].visit1)
+		  mark_non_root (&sinfo->fun[i]);
+	    }
+	}
+    }
+
+  /* Remove cycles from the call graph.  We start from the root node(s)
+     so that we break cycles in a reasonable place.  */
+  for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next)
+    {
+      extern const bfd_target bfd_elf32_spu_vec;
+      asection *sec;
+
+      if (ibfd->xvec != &bfd_elf32_spu_vec)
+	continue;
+
+      for (sec = ibfd->sections; sec != NULL; sec = sec->next)
+	{
+	  struct _spu_elf_section_data *sec_data;
+	  struct spu_elf_stack_info *sinfo;
+
+	  if ((sec_data = spu_elf_section_data (sec)) != NULL
+	      && (sinfo = sec_data->stack_info) != NULL)
+	    {
+	      int i;
+	      for (i = 0; i < sinfo->num_fun; ++i)
+		if (!sinfo->fun[i].non_root)
+		  call_graph_traverse (&sinfo->fun[i], info);
+	    }
+	}
+    }
+
+  return TRUE;
+}
+
+/* Descend the call graph for FUN, accumulating total stack required.  */
+
+static bfd_vma
+sum_stack (struct function_info *fun,
+	   struct bfd_link_info *info,
+	   int emit_stack_syms)
+{
+  struct call_info *call;
+  struct function_info *max = NULL;
+  bfd_vma max_stack = fun->stack;
+  bfd_vma stack;
+  const char *f1;
+
+  if (fun->visit3)
+    return max_stack;
+
+  for (call = fun->call_list; call; call = call->next)
+    {
+      stack = sum_stack (call->fun, info, emit_stack_syms);
+      /* Include caller stack for normal calls, don't do so for
+	 tail calls.  fun->stack here is local stack usage for
+	 this function.  */
+      if (!call->is_tail)
+	stack += fun->stack;
+      if (max_stack < stack)
+	{
+	  max_stack = stack;
+	  max = call->fun;
+	}
+    }
+
+  f1 = func_name (fun);
+  info->callbacks->minfo (_("%s: 0x%v 0x%v\n"), f1, fun->stack, max_stack);
+
+  if (fun->call_list)
+    {
+      info->callbacks->minfo (_("  calls:\n"));
+      for (call = fun->call_list; call; call = call->next)
+	{
+	  const char *f2 = func_name (call->fun);
+	  const char *ann1 = call->fun == max ? "*" : " ";
+	  const char *ann2 = call->is_tail ? "t" : " ";
+
+	  info->callbacks->minfo (_("   %s%s %s\n"), ann1, ann2, f2);
+	}
+    }
+
+  /* Now fun->stack holds cumulative stack.  */
+  fun->stack = max_stack;
+  fun->visit3 = TRUE;
+
+  if (emit_stack_syms)
+    {
+      struct spu_link_hash_table *htab = spu_hash_table (info);
+      char *name = bfd_malloc (18 + strlen (f1));
+      struct elf_link_hash_entry *h;
+
+      if (name != NULL)
+	{
+	  if (fun->global || ELF_ST_BIND (fun->u.sym->st_info) == STB_GLOBAL)
+	    sprintf (name, "__stack_%s", f1);
+	  else
+	    sprintf (name, "__stack_%x_%s", fun->sec->id & 0xffffffff, f1);
+
+	  h = elf_link_hash_lookup (&htab->elf, name, TRUE, TRUE, FALSE);
+	  free (name);
+	  if (h != NULL
+	      && (h->root.type == bfd_link_hash_new
+		  || h->root.type == bfd_link_hash_undefined
+		  || h->root.type == bfd_link_hash_undefweak))
+	    {
+	      h->root.type = bfd_link_hash_defined;
+	      h->root.u.def.section = bfd_abs_section_ptr;
+	      h->root.u.def.value = max_stack;
+	      h->size = 0;
+	      h->type = 0;
+	      h->ref_regular = 1;
+	      h->def_regular = 1;
+	      h->ref_regular_nonweak = 1;
+	      h->forced_local = 1;
+	      h->non_elf = 0;
+	    }
+	}
+    }
+
+  return max_stack;
+}
+
+/* Provide an estimate of total stack required.  */
+
+static bfd_boolean
+spu_elf_stack_analysis (bfd *output_bfd,
+			struct bfd_link_info *info,
+			int emit_stack_syms)
+{
+  bfd *ibfd;
+  bfd_vma max_stack = 0;
+
+  if (!discover_functions (output_bfd, info))
+    return FALSE;
+
+  if (!build_call_tree (output_bfd, info))
+    return FALSE;
+
+  info->callbacks->info (_("Stack size for call graph root nodes.\n"));
+  info->callbacks->minfo (_("\nStack size for functions.  "
+			    "Annotations: '*' max stack, 't' tail call\n"));
+  for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next)
+    {
+      extern const bfd_target bfd_elf32_spu_vec;
+      asection *sec;
+
+      if (ibfd->xvec != &bfd_elf32_spu_vec)
+	continue;
+
+      for (sec = ibfd->sections; sec != NULL; sec = sec->next)
+	{
+	  struct _spu_elf_section_data *sec_data;
+	  struct spu_elf_stack_info *sinfo;
+
+	  if ((sec_data = spu_elf_section_data (sec)) != NULL
+	      && (sinfo = sec_data->stack_info) != NULL)
+	    {
+	      int i;
+	      for (i = 0; i < sinfo->num_fun; ++i)
+		{
+		  if (!sinfo->fun[i].non_root)
+		    {
+		      bfd_vma stack;
+		      const char *f1;
+
+		      stack = sum_stack (&sinfo->fun[i], info,
+					 emit_stack_syms);
+		      f1 = func_name (&sinfo->fun[i]);
+		      info->callbacks->info (_("  %s: 0x%v\n"),
+					      f1, stack);
+		      if (max_stack < stack)
+			max_stack = stack;
+		    }
+		}
+	    }
+	}
+    }
+
+  info->callbacks->info (_("Maximum stack required is 0x%v\n"), max_stack);
+  return TRUE;
+}
+
+/* Perform a final link.  */
+
+static bfd_boolean
+spu_elf_final_link (bfd *output_bfd, struct bfd_link_info *info)
+{
+  struct spu_link_hash_table *htab = spu_hash_table (info);
+
+  if (htab->stack_analysis
+      && !spu_elf_stack_analysis (output_bfd, info, htab->emit_stack_syms))
+    info->callbacks->einfo ("%X%P: stack analysis error: %E\n");
+
+  return bfd_elf_final_link (output_bfd, info);
+}
+
 /* Apply RELOCS to CONTENTS of INPUT_SECTION from INPUT_BFD.  */
 
 static bfd_boolean
@@ -1459,6 +2690,7 @@ spu_elf_relocate_section (bfd *output_bf
       bfd_reloc_status_type r;
       bfd_boolean unresolved_reloc;
       bfd_boolean warned;
+      bfd_boolean branch;
 
       r_symndx = ELF32_R_SYM (rel->r_info);
       r_type = ELF32_R_TYPE (rel->r_info);
@@ -1514,8 +2746,9 @@ spu_elf_relocate_section (bfd *output_bf
       /* If this symbol is in an overlay area, we may need to relocate
 	 to the overlay stub.  */
       addend = rel->r_addend;
-      if (needs_ovl_stub (sym_name, sec, input_section, htab,
-			  is_branch (contents + rel->r_offset)))
+      branch = (is_branch (contents + rel->r_offset)
+		|| is_hint (contents + rel->r_offset));
+      if (needs_ovl_stub (sym_name, sec, input_section, htab, branch))
 	{
 	  char *stub_name;
 	  struct spu_stub_hash_entry *sh;
@@ -1883,5 +3116,6 @@ spu_elf_section_processing (bfd *abfd AT
 #define elf_backend_post_process_headers        spu_elf_post_process_headers
 #define elf_backend_section_processing		spu_elf_section_processing
 #define elf_backend_special_sections		spu_elf_special_sections
+#define bfd_elf32_bfd_final_link		spu_elf_final_link
 
 #include "elf32-target.h"
Index: bfd/elf32-spu.h
===================================================================
RCS file: /cvs/src/src/bfd/elf32-spu.h,v
retrieving revision 1.2
diff -u -p -r1.2 elf32-spu.h
--- bfd/elf32-spu.h	27 Feb 2007 08:29:52 -0000	1.2
+++ bfd/elf32-spu.h	30 Apr 2007 13:17:41 -0000
@@ -20,10 +20,16 @@
 
 /* Extra info kept for SPU sections.  */
 
+struct spu_elf_stack_info;
+
 struct _spu_elf_section_data
 {
   struct bfd_elf_section_data elf;
 
+  /* Stack analysis info kept for this section.  */
+
+  struct spu_elf_stack_info *stack_info;
+
   /* Non-zero for overlay output sections.  */
   unsigned int ovl_index;
 };
@@ -41,9 +47,9 @@ extern void spu_elf_plugin (int);
 extern bfd_boolean spu_elf_open_builtin_lib (bfd **,
 					     const struct _ovl_stream *);
 extern bfd_boolean spu_elf_create_sections (bfd *,
-					    struct bfd_link_info *);
+					    struct bfd_link_info *, int, int);
 extern bfd_boolean spu_elf_find_overlays (bfd *, struct bfd_link_info *);
-extern bfd_boolean spu_elf_size_stubs (bfd *, struct bfd_link_info *, int,
+extern bfd_boolean spu_elf_size_stubs (bfd *, struct bfd_link_info *, int, int,
 				       asection **, asection **,
 				       asection **);
 extern bfd_boolean spu_elf_build_stubs (struct bfd_link_info *, int,
Index: include/bfdlink.h
===================================================================
RCS file: /cvs/src/src/include/bfdlink.h,v
retrieving revision 1.66
diff -u -p -r1.66 bfdlink.h
--- include/bfdlink.h	10 Apr 2007 18:00:25 -0000	1.66
+++ include/bfdlink.h	30 Apr 2007 13:18:05 -0000
@@ -455,8 +455,8 @@ struct bfd_link_info
   struct bfd_elf_dynamic_list *dynamic_list;
 };
 
-/* This structures holds a set of callback functions.  These are
-   called by the BFD linker routines.  Except for einfo, the first
+/* This structures holds a set of callback functions.  These are called
+   by the BFD linker routines.  Except for the info functions, the first
    argument to each callback function is the bfd_link_info structure
    being used and each function returns a boolean value.  If the
    function returns FALSE, then the BFD function which called it should
@@ -568,9 +568,15 @@ struct bfd_link_callbacks
   bfd_boolean (*notice)
     (struct bfd_link_info *, const char *name,
      bfd *abfd, asection *section, bfd_vma address);
-  /* General link info message.  */
+  /* Error or warning link info message.  */
   void (*einfo)
     (const char *fmt, ...);
+  /* General link info message.  */
+  void (*info)
+    (const char *fmt, ...);
+  /* Message to be printed in linker map file.  */
+  void (*minfo)
+    (const char *fmt, ...);
   /* This callback provides a chance for users of the BFD library to
      override its decision about whether to place two adjacent sections
      into the same segment.  */
Index: ld/gen-doc.texi
===================================================================
RCS file: /cvs/src/src/ld/gen-doc.texi,v
retrieving revision 1.8
diff -u -p -r1.8 gen-doc.texi
--- ld/gen-doc.texi	5 May 2006 00:51:37 -0000	1.8
+++ ld/gen-doc.texi	30 Apr 2007 13:18:06 -0000
@@ -3,14 +3,17 @@
 @set GENERIC
 
 @c 2. Specific target machines
-@set H8300
-@set I960
 @set ARM
+@set H8300
 @set HPPA
+@set I960
+@set M68HC11
 @set MMIX
 @set MSP430
 @set POWERPC
 @set POWERPC64
+@set Renesas
+@set SPU
 @set TICOFF
 @set WIN32
 @set XTENSA
Index: ld/ld.texinfo
===================================================================
RCS file: /cvs/src/src/ld/ld.texinfo,v
retrieving revision 1.192
diff -u -p -r1.192 ld.texinfo
--- ld/ld.texinfo	29 Mar 2007 17:16:05 -0000	1.192
+++ ld/ld.texinfo	30 Apr 2007 13:18:12 -0000
@@ -20,33 +20,18 @@
 @c Configure for the generation of man pages
 @set UsesEnvVars
 @set GENERIC
-@set ARC
 @set ARM
-@set D10V
-@set D30V
-@set H8/300
-@set H8/500
+@set H8300
 @set HPPA
-@set I370
-@set I80386
-@set I860
 @set I960
-@set M32R
 @set M68HC11
-@set M680X0
-@set MCORE
-@set MIPS
 @set MMIX
 @set MSP430
-@set PDP11
-@set PJ
 @set POWERPC
 @set POWERPC64
-@set SH
-@set SPARC
-@set TIC54X
-@set V850
-@set VAX
+@set Renesas
+@set SPU
+@set TICOFF
 @set WIN32
 @set XTENSA
 @end ifset
@@ -175,6 +160,9 @@ section entitled ``GNU Free Documentatio
 @ifset POWERPC64
 * PowerPC64 ELF64::             ld and PowerPC64 64-bit ELF Support
 @end ifset
+@ifset SPU
+* SPU ELF::			ld and SPU ELF Support
+@end ifset
 @ifset TICOFF
 * TI COFF::                     ld and the TI COFF
 @end ifset
@@ -5235,6 +5223,9 @@ functionality are not listed.
 @ifset POWERPC64
 * PowerPC64 ELF64::		@command{ld} and PowerPC64 64-bit ELF Support
 @end ifset
+@ifset SPU
+* SPU ELF::			@command{ld} and SPU ELF Support
+@end ifset
 @ifset TICOFF
 * TI COFF::                     @command{ld} and TI COFF
 @end ifset
@@ -5849,6 +5840,87 @@ Use this option to turn off this feature
 @end ifclear
 @end ifset
 
+@ifset SPU
+@ifclear GENERIC
+@raisesections
+@end ifclear
+
+@node SPU ELF
+@section @command{ld} and SPU ELF Support
+
+@cindex SPU ELF options
+@table @option
+
+@cindex SPU plugins
+@kindex --plugin
+@item --plugin
+This option marks an executable as a PIC plugin module.
+
+@cindex SPU overlays
+@kindex --no-overlays
+@item --no-overlays
+Normally, @command{ld} recognizes calls to functions within overlay
+regions, and redirects such calls to an overlay manager via a stub.
+@command{ld} also provides a built-in overlay manager.  This option
+turns off all this special overlay handling.
+
+@cindex SPU overlay stub symbols
+@kindex --emit-stub-syms
+@item --emit-stub-syms
+This option causes @command{ld} to label overlay stubs with a local
+symbol that encodes the stub type and destination.
+
+@cindex SPU extra overlay stubs
+@kindex --extra-overlay-stubs
+@item --extra-overlay-stubs
+This option causes @command{ld} to add overlay call stubs on all
+function calls out of overlay regions.  Normally stubs are not added
+on calls to non-overlay regions.
+
+@cindex SPU local store size
+@kindex --local-store=lo:hi
+@item --local-store=lo:hi
+@command{ld} usually checks that a final executable for SPU fits in
+the address range 0 to 256k.  This option may be used to change the
+range.  Disable the check entirely with @option{--local-store=0:0}.
+
+@cindex SPU 
+@kindex --stack-analysis
+@item --stack-analysis
+SPU local store space is limited.  Over-allocation of stack space
+unnecessarily limits space available for code and data, while
+under-allocation results in runtime failures.  If given this option,
+@command{ld} will provide an estimate of maximum stack usage.
+@command{ld} does this by examining symbols in code sections to
+determine the extents of functions, and looking at function prologues
+for stack adjusting instructions.  A call-graph is created by looking
+for relocations on branch instructions.  The graph is then searched
+for the maximum stack usage path.  Note that this analysis does not
+find calls made via function pointers, and does not handle recursion
+and other cycles in the call graph.  Stack usage may be
+under-estimated if your code makes such calls.  Also, stack usage for
+dynamic allocation, e.g. alloca, will not be detected.  If a link map
+is requested, detailed information about each function's stack usage
+and calls will be given.
+
+@cindex SPU 
+@kindex --emit-stack-syms
+@item --emit-stack-syms
+This option, if given along with @option{--stack-analysis} will result
+in @command{ld} emitting stack sizing symbols for each function.
+These take the form @code{__stack_<function_name>} for global
+functions, and @code{__stack_<number>_<function_name>} for static
+functions.  @code{<number>} is the section id in hex.  The value of
+such symbols is the stack requirement for the corresponding function.
+The symbol size will be zero, type @code{STT_NOTYPE}, binding
+@code{STB_LOCAL}, and section @code{SHN_ABS}. 
+@end table
+
+@ifclear GENERIC
+@lowersections
+@end ifclear
+@end ifset
+
 @ifset TICOFF
 @ifclear GENERIC
 @raisesections
Index: ld/ldmain.c
===================================================================
RCS file: /cvs/src/src/ld/ldmain.c,v
retrieving revision 1.120
diff -u -p -r1.120 ldmain.c
--- ld/ldmain.c	26 Apr 2007 14:46:59 -0000	1.120
+++ ld/ldmain.c	30 Apr 2007 13:18:12 -0000
@@ -161,6 +161,8 @@ static struct bfd_link_callbacks link_ca
   unattached_reloc,
   notice,
   einfo,
+  info_msg,
+  minfo,
   ldlang_override_segment_assignment
 };
 
Index: ld/ldmisc.c
===================================================================
RCS file: /cvs/src/src/ld/ldmisc.c,v
retrieving revision 1.32
diff -u -p -r1.32 ldmisc.c
--- ld/ldmisc.c	26 Apr 2007 14:46:59 -0000	1.32
+++ ld/ldmisc.c	30 Apr 2007 13:18:13 -0000
@@ -477,11 +477,14 @@ info_assert (const char *file, unsigned 
 void
 minfo (const char *fmt, ...)
 {
-  va_list arg;
-
-  va_start (arg, fmt);
-  vfinfo (config.map_file, fmt, arg, FALSE);
-  va_end (arg);
+  if (config.map_file != NULL)
+    {
+      va_list arg;
+
+      va_start (arg, fmt);
+      vfinfo (config.map_file, fmt, arg, FALSE);
+      va_end (arg);
+    }
 }
 
 void
Index: ld/emultempl/spuelf.em
===================================================================
RCS file: /cvs/src/src/ld/emultempl/spuelf.em,v
retrieving revision 1.4
diff -u -p -r1.4 spuelf.em
--- ld/emultempl/spuelf.em	26 Mar 2007 11:10:44 -0000	1.4
+++ ld/emultempl/spuelf.em	30 Apr 2007 13:18:14 -0000
@@ -34,6 +34,12 @@ static int non_overlay_stubs = 0;
 /* Whether to emit symbols for stubs.  */
 static int emit_stub_syms = 0;
 
+/* Non-zero to perform stack space analysis.  */
+static int stack_analysis = 0;
+
+/* Whether to emit symbols with stack requirements for each function.  */
+static int emit_stack_syms = 0;
+
 /* Range of valid addresses for loadable sections.  */
 static bfd_vma local_store_lo = 0;
 static bfd_vma local_store_hi = 0x3ffff;
@@ -70,7 +76,8 @@ spu_after_open (void)
   if (is_spu_target ()
       && !link_info.relocatable
       && link_info.input_bfds != NULL
-      && !spu_elf_create_sections (output_bfd, &link_info))
+      && !spu_elf_create_sections (output_bfd, &link_info,
+				   stack_analysis, emit_stack_syms))
     einfo ("%X%P: can not create note section: %E\n");
 
   gld${EMULATION_NAME}_after_open ();
@@ -187,7 +194,7 @@ spu_before_allocation (void)
 	  asection *stub, *ovtab;
 
 	  if (!spu_elf_size_stubs (output_bfd, &link_info, non_overlay_stubs,
-				   &stub, &ovtab, &toe))
+				   stack_analysis, &stub, &ovtab, &toe))
 	    einfo ("%X%P: can not size overlay stubs: %E\n");
 
 	  if (stub != NULL)
@@ -396,6 +403,8 @@ PARSE_AND_LIST_PROLOGUE='
 #define OPTION_SPU_STUB_SYMS		(OPTION_SPU_NO_OVERLAYS + 1)
 #define OPTION_SPU_NON_OVERLAY_STUBS	(OPTION_SPU_STUB_SYMS + 1)
 #define OPTION_SPU_LOCAL_STORE		(OPTION_SPU_NON_OVERLAY_STUBS + 1)
+#define OPTION_SPU_STACK_ANALYSIS	(OPTION_SPU_LOCAL_STORE + 1)
+#define OPTION_SPU_STACK_SYMS		(OPTION_SPU_STACK_ANALYSIS + 1)
 '
 
 PARSE_AND_LIST_LONGOPTS='
@@ -404,6 +413,8 @@ PARSE_AND_LIST_LONGOPTS='
   { "emit-stub-syms", no_argument, NULL, OPTION_SPU_STUB_SYMS },
   { "extra-overlay-stubs", no_argument, NULL, OPTION_SPU_NON_OVERLAY_STUBS },
   { "local-store", required_argument, NULL, OPTION_SPU_LOCAL_STORE },
+  { "stack-analysis", no_argument, NULL, OPTION_SPU_STACK_ANALYSIS },
+  { "emit-stack-syms", no_argument, NULL, OPTION_SPU_STACK_SYMS },
 '
 
 PARSE_AND_LIST_OPTIONS='
@@ -412,7 +423,9 @@ PARSE_AND_LIST_OPTIONS='
   --no-overlays         No overlay handling.\n\
   --emit-stub-syms      Add symbols on overlay call stubs.\n\
   --extra-overlay-stubs Add stubs on all calls out of overlay regions.\n\
-  --local-store=lo:hi   Valid address range.\n"
+  --local-store=lo:hi   Valid address range.\n\
+  --stack-analysis      Estimate maximum stack requirement.\n\
+  --emit-stack-syms     Add __stack_func giving stack needed for each func.\n"
 		   ));
 '
 
@@ -446,6 +459,14 @@ PARSE_AND_LIST_ARGS_CASES='
 	einfo (_("%P%F: invalid --local-store address range `%s'\''\n"), optarg);
       }
       break;
+
+    case OPTION_SPU_STACK_ANALYSIS:
+      stack_analysis = 1;
+      break;
+
+    case OPTION_SPU_STACK_SYMS:
+      emit_stack_syms = 1;
+      break;
 '
 
 LDEMUL_AFTER_OPEN=spu_after_open

-- 
Alan Modra
IBM OzLabs - Linux Technology Centre


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]