This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [patch/rfc][pr17902] GC unused parts of mergeable sections


And now that all the refactoring is in, here is a patch with just the GC.

The size savings are still the same: chromium's .rodata goes from
6985174 to 6851542 bytes.

The link time with --gc-secitons -O1 (probably the setup that makes
the cost of this patch most evident) goes from

6.422498558 seconds time elapsed
   ( +-  0.37% )

to

6.808367419 seconds time elapsed
   ( +-  0.22% )

What should be done for testing? Is it OK to write tests in assembly?
It is somewhat brittle to try to convince a C compiler to output a
dead string in a small test.

I will be cleaning up and trying to optimize this patch, but it would
be really nice to get feedback on whether the general idea is
reasonable. It is: if doing gc, use the gc pass to record which
offsets of a given SHF_MERGE section are used. That is stored with
dummy entries in the same maps that are use to merge the contents.

During regular SHF_MERGE processing, only include entries that are
already in the map.

Cheers,
Rafael
diff --git a/gold/gc.cc b/gold/gc.cc
index 843b2b8..2bf6472 100644
--- a/gold/gc.cc
+++ b/gold/gc.cc
@@ -25,10 +25,45 @@
 #include "object.h"
 #include "gc.h"
 #include "symtab.h"
+#include "merge.h"
 
 namespace gold
 {
 
+void Worklist::just_push(const Section_id &val) { this->work_list_.push(val); }
+
+void Worklist::push(const Section_id &val) {
+  this->work_list_.push(val);
+  Object *obj = val.first;
+  unsigned int shndx = val.second;
+  uint64_t flags = obj->section_flags(shndx);
+  gold_assert((flags & elfcpp::SHF_MERGE) == 0);
+}
+
+static void
+add_referenced_offset(Object *obj, unsigned int shndx, uint64_t offset) {
+  // FIXME: what is the safe way to do this cast?
+  Relobj* r_obj = static_cast<Relobj*>(obj);
+  Object_merge_map* object_merge_map = r_obj->get_or_create_merge_map();
+
+  // In the case of strings, findind the start and length  requires reading the
+  // contents, which requires decompression. To do that only once,
+  // we record only the reference offset here and computed the start and length
+  // do_add_input_section.
+  object_merge_map->add_reference(shndx, offset);
+}
+
+void Worklist::push(const Section_id &val, uint64_t offset) {
+  this->work_list_.push(val);
+  Object *obj = val.first;
+  unsigned int shndx = val.second;
+  uint64_t flags = obj->section_flags(shndx);
+  if ((flags & elfcpp::SHF_MERGE) == 0)
+    return;
+
+  add_referenced_offset(obj, shndx, offset);
+}
+
 // Garbage collection uses a worklist style algorithm to determine the 
 // transitive closure of all referenced sections.
 void 
@@ -63,11 +98,25 @@ Garbage_collection::do_transitive_closure()
           if (this->referenced_list().find(*it_v)
               == this->referenced_list().end())
             {
-              this->worklist().push(*it_v);   
+              this->worklist().just_push(*it_v);   
             }
         }
     }
   this->worklist_ready();
+  for (Sections_reachable::iterator I = this->referenced_list().begin(),
+                                    E = this->referenced_list().end();
+       I != E; ++I) {
+    Atom_reachable &atoms = this->atom_reloc_map()[*I];
+    for (Atom_reachable::iterator I2 = atoms.begin(), E2 = atoms.end();
+         I2 != E2; ++I2) {
+      const Atom &atom = *I2;
+      const Section_id id = atom.first;
+      Object* obj = id.first;
+      unsigned int sec_num = id.second;
+      uint64_t new_offset = atom.second;
+      add_referenced_offset(obj, sec_num, new_offset);
+    }
+  }
 }
 
 } // End namespace gold.
diff --git a/gold/gc.h b/gold/gc.h
index be4a63c..9bc0a1d 100644
--- a/gold/gc.h
+++ b/gold/gc.h
@@ -46,18 +46,49 @@ class Output_section;
 class General_options;
 class Layout;
 
+class Worklist
+{
+ public:
+  bool empty() const {
+    return work_list_.empty();
+  }
+  Section_id &front() {
+    return work_list_.front();
+  }
+  void pop() {
+    work_list_.pop();
+  }
+  void just_push(const Section_id& val);
+  void push(const Section_id& val);
+  void push(const Section_id& val, uint64_t offset);
+
+ private:
+  typedef std::queue<Section_id> Worklist_type;
+  Worklist_type work_list_;
+};
+
 class Garbage_collection
 {
  public:
 
   typedef Unordered_set<Section_id, Section_id_hash> Sections_reachable;
   typedef std::map<Section_id, Sections_reachable> Section_ref;
-  typedef std::queue<Section_id> Worklist_type;
+
   // This maps the name of the section which can be represented as a C
   // identifier (cident) to the list of sections that have that name.
   // Different object files can have cident sections with the same name.
   typedef std::map<std::string, Sections_reachable> Cident_section_map;
 
+  typedef std::pair<Section_id, uint64_t> Atom;
+  struct Atom_hash {
+    size_t operator()(const Atom &a) const {
+      const Section_id &s = a.first;
+      return reinterpret_cast<uintptr_t>(s.first) ^ s.second ^ a.second;
+    }
+  };
+  typedef Unordered_set<Atom, Atom_hash> Atom_reachable;
+  typedef std::map<Section_id, Atom_reachable> Atom_ref;
+
   Garbage_collection()
   : is_worklist_ready_(false)
   { }
@@ -72,7 +103,11 @@ class Garbage_collection
   section_reloc_map()
   { return this->section_reloc_map_; }
 
-  Worklist_type&
+  Atom_ref&
+  atom_reloc_map()
+  { return this->atom_reloc_map_; }
+
+  Worklist&
   worklist()
   { return this->work_list_; }
 
@@ -113,13 +148,26 @@ class Garbage_collection
     reachable.insert(dst_id);
   }
 
+  void add_reference_to_merge_section(Object *src_object,
+                                      unsigned int src_shndx,
+                                      Object *dst_object,
+                                      unsigned int dst_shndx, uint64_t offset) {
+    add_reference(src_object, src_shndx, dst_object, dst_shndx);
+    Section_id src_id(src_object, src_shndx);
+    Section_id dst_id(dst_object, dst_shndx);
+    Atom atom(dst_id, offset);
+    this->atom_reloc_map_[src_id].insert(atom);
+  }
+
  private:
 
-  Worklist_type work_list_;
+  Worklist work_list_;
   bool is_worklist_ready_;
   Section_ref section_reloc_map_;
   Sections_reachable referenced_list_;
   Cident_section_map cident_sections_;
+
+  Atom_ref atom_reloc_map_;
 };
 
 // Data to pass between successive invocations of do_layout
@@ -235,6 +283,10 @@ gc_process_relocs(
       typedef typename elfcpp::Elf_types<size>::Elf_Addr Address;
       Address dst_off;
 
+      // If the relocation points to a section, this includes the addend.
+      // Otherwise it doesn't.
+      Address gc_ref_off;
+
       if (r_sym < local_count)
         {
           gold_assert(plocal_syms != NULL);
@@ -244,7 +296,10 @@ gc_process_relocs(
           bool is_ordinary;
 	  dst_indx = src_obj->adjust_sym_shndx(r_sym, dst_indx, &is_ordinary);
           dst_obj = src_obj;
-	  dst_off = lsym.get_st_value() + addend;
+	  gc_ref_off = dst_off = lsym.get_st_value();
+	  dst_off += addend;
+	  if (lsym.get_st_type() == elfcpp::STT_SECTION)
+	    gc_ref_off += addend;
 
           if (is_icf_tracked)
             {
@@ -296,6 +351,7 @@ gc_process_relocs(
               dst_indx = gsym->shndx(&is_ordinary);
             }
 	  dst_off = static_cast<const Sized_symbol<size>*>(gsym)->value();
+	  gc_ref_off = dst_off;
 	  dst_off += addend;
 
 	  // When doing safe folding, check to see if this relocation is that
@@ -348,7 +404,14 @@ gc_process_relocs(
         }
       if (parameters->options().gc_sections())
         {
-	  symtab->gc()->add_reference(src_obj, src_indx, dst_obj, dst_indx);
+	  Garbage_collection* gc = symtab->gc();
+	  uint64_t dst_flags = dst_obj->section_flags(dst_indx);
+	  if (dst_flags & elfcpp::SHF_MERGE)
+	    gc->add_reference_to_merge_section(src_obj, src_indx, dst_obj,
+					       dst_indx, gc_ref_off);
+	  else
+	    gc->add_reference(src_obj, src_indx, dst_obj, dst_indx);
+
 	  parameters->sized_target<size, big_endian>()
 	    ->gc_add_reference(symtab, src_obj, src_indx,
 			       dst_obj, dst_indx, dst_off);
diff --git a/gold/merge.cc b/gold/merge.cc
index d395312..b8688e6 100644
--- a/gold/merge.cc
+++ b/gold/merge.cc
@@ -33,6 +33,33 @@ namespace gold
 
 // Class Object_merge_map.
 
+void
+Object_merge_map::Input_merge_map::add_reference(
+    section_offset_type input_offset) {
+  this->sorted = false;
+  Input_merge_entry entry;
+  entry.input_offset = input_offset;
+  entry.length = -1;
+  entry.output_offset = -1;
+  this->entries.push_back(entry);
+}
+
+void
+Object_merge_map::Input_merge_map::ensure_sorted() {
+  if (this->sorted)
+    return;
+  std::sort(this->entries.begin(), this->entries.end(), Input_merge_compare());
+  this->sorted = true;
+}
+
+void
+Object_merge_map::Input_merge_map::unique() {
+  ensure_sorted();
+  Entries::iterator I = std::unique(this->entries.begin(), this->entries.end(),
+                                    Input_merge_same_offset());
+  entries.erase(I, entries.end());
+}
+
 // Destructor.
 
 Object_merge_map::~Object_merge_map()
@@ -69,7 +96,11 @@ Object_merge_map::get_or_make_input_merge_map(
     {
       // For a given input section in a given object, every mapping
       // must be done with the same Merge_map.
-      gold_assert(map->output_data == output_data);
+      // FIXME: comment
+      if (map->output_data == NULL)
+        map->output_data = output_data;
+      else
+        gold_assert(map->output_data == output_data);
       return map;
     }
 
@@ -145,6 +176,12 @@ Object_merge_map::Input_merge_map::add_mapping(
   this->entries.push_back(entry);
 }
 
+void Object_merge_map::add_reference(unsigned int shndx,
+                                     section_offset_type input_offset) {
+  Input_merge_map* map = this->get_or_make_input_merge_map(NULL, shndx);
+  map->add_reference(input_offset);
+}
+
 // Get the output offset for an input address.
 
 bool
@@ -156,12 +193,7 @@ Object_merge_map::get_output_offset(unsigned int shndx,
   if (map == NULL)
     return false;
 
-  if (!map->sorted)
-    {
-      std::sort(map->entries.begin(), map->entries.end(),
-		Input_merge_compare());
-      map->sorted = true;
-    }
+  map->ensure_sorted();
 
   Input_merge_entry entry;
   entry.input_offset = input_offset;
@@ -366,9 +398,42 @@ Output_merge_data::do_add_input_section(Relobj* object, unsigned int shndx)
   Object_merge_map* merge_map = object->get_or_create_merge_map();
   Object_merge_map::Input_merge_map* input_merge_map =
     merge_map->get_or_make_input_merge_map(this, shndx);
+  uint64_t flags = object->section_flags(shndx);
+
+  bool gc = parameters->options().gc_sections() && (flags & elfcpp::SHF_ALLOC);
+  Object_merge_map::Input_merge_map::Entries &entries =
+      input_merge_map->entries;
+
+  if (gc) {
+    for (Object_merge_map::Input_merge_map::Entries::iterator
+             I = entries.begin(),
+             E = entries.end();
+         I != E; ++I) {
+      I->input_offset &= ~(entsize - 1);
+      I->length = entsize;
+    }
+
+    input_merge_map->unique();
+  }
+
+  Object_merge_map::Input_merge_map::Entries::iterator entry_i =
+    entries.begin();
+  Object_merge_map::Input_merge_map::Entries::iterator entry_e =
+    entries.end();
 
-  for (section_size_type i = 0; i < len; i += entsize, p += entsize)
+  section_offset_type len_u = len;
+  for (section_offset_type i = 0; i < len_u; i += entsize, p += entsize)
     {
+      if (gc)
+        {
+          while (entry_i != entry_e && entry_i->input_offset < i)
+            ++entry_i;
+          if (entry_i == entry_e)
+            break;
+          if (entry_i->input_offset != i)
+            continue;
+        }
+
       // Add the constant to the section contents.  If we find that it
       // is already in the hash table, we will remove it again.
       Merge_data_key k = this->len_;
@@ -385,7 +450,15 @@ Output_merge_data::do_add_input_section(Relobj* object, unsigned int shndx)
 	}
 
       // Record the offset of this constant in the output section.
-      input_merge_map->add_mapping(i, entsize, k);
+      if (gc)
+        {
+          entry_i->length = entsize;
+          entry_i->output_offset = k;
+        }
+      else
+        {
+          input_merge_map->add_mapping(i, entsize, k);
+        }
     }
 
   // For script processing, we keep the input sections.
@@ -500,6 +573,31 @@ Output_merge_string<Char_type>::do_add_input_section(Relobj* object,
     ++count;
   merged_strings.reserve(count + 1);
 
+  Object_merge_map* merge_map = object->get_or_create_merge_map();
+  Object_merge_map::Input_merge_map* input_merge_map =
+    merge_map->get_or_make_input_merge_map(this, shndx);
+
+  uint64_t flags = object->section_flags(shndx);
+  bool gc = parameters->options().gc_sections() && (flags & elfcpp::SHF_ALLOC);
+  Object_merge_map::Input_merge_map::Entries &entries =
+      input_merge_map->entries;
+
+  if (gc) {
+    for (Object_merge_map::Input_merge_map::Entries::iterator
+             I = entries.begin(),
+             E = entries.end();
+         I != E; ++I) {
+      section_offset_type start_offset = I->input_offset;
+      while (start_offset > 0 && pdata[start_offset - 1] != 0)
+        --start_offset;
+      I->input_offset = start_offset;
+
+      I->length = (string_length(&pdata[start_offset]) + 1) * sizeof(Char_type);
+    }
+
+    input_merge_map->unique();
+  }
+
   // The index I is in bytes, not characters.
   section_size_type i = 0;
 
@@ -520,10 +618,12 @@ Output_merge_string<Char_type>::do_add_input_section(Relobj* object,
 	      != init_align_modulo))
 	  has_misaligned_strings = true;
 
-      Stringpool::Key key;
-      this->stringpool_.add_with_length(p, len, true, &key);
+      if (object->is_offset_referenced(shndx, i)) {
+        Stringpool::Key key;
+        this->stringpool_.add_with_length(p, len, true, &key);
 
-      merged_strings.push_back(Merged_string(i, key));
+        merged_strings.push_back(Merged_string(i, key));
+      }
       p += len + 1;
       i += (len + 1) * sizeof(Char_type);
     }
@@ -569,8 +669,20 @@ Output_merge_string<Char_type>::finalize_merged_data()
       section_offset_type last_output_offset = 0;
       Relobj *object = (*l)->object;
       Object_merge_map* merge_map = object->get_or_create_merge_map();
+      unsigned shndx = (*l)->shndx;
       Object_merge_map::Input_merge_map* input_merge_map =
-        merge_map->get_or_make_input_merge_map(this, (*l)->shndx);
+        merge_map->get_or_make_input_merge_map(this, shndx);
+
+      uint64_t flags = object->section_flags(shndx);
+      bool gc =
+          parameters->options().gc_sections() && (flags & elfcpp::SHF_ALLOC);
+      Object_merge_map::Input_merge_map::Entries &entries =
+        input_merge_map->entries;
+
+      Object_merge_map::Input_merge_map::Entries::iterator entry_i =
+        entries.begin();
+      Object_merge_map::Input_merge_map::Entries::iterator entry_e =
+        entries.end();
 
       for (typename Merged_strings::const_iterator p =
 	     (*l)->merged_strings.begin();
@@ -578,10 +690,21 @@ Output_merge_string<Char_type>::finalize_merged_data()
 	   ++p)
 	{
 	  section_size_type length = p->offset - last_input_offset;
-	  if (length > 0)
-	    input_merge_map->add_mapping(last_input_offset, length,
-                                         last_output_offset);
-	  last_input_offset = p->offset;
+	  if (length > 0) {
+	    if (gc) {
+              while (entry_i != entry_e &&
+                     entry_i->input_offset < last_input_offset)
+                ++entry_i;
+	      if (entry_i == entry_e)
+		break;
+	      if (entry_i->input_offset == last_input_offset)
+		entry_i->output_offset = last_output_offset;
+	    } else {
+	      input_merge_map->add_mapping(last_input_offset, length,
+					   last_output_offset);
+	    }
+	  }
+          last_input_offset = p->offset;
 	  if (p->stringpool_key != 0)
 	    last_output_offset =
 	        this->stringpool_.get_offset_from_key(p->stringpool_key);
diff --git a/gold/merge.h b/gold/merge.h
index 54caed8..3c68a5c 100644
--- a/gold/merge.h
+++ b/gold/merge.h
@@ -59,6 +59,9 @@ class Object_merge_map
               section_offset_type offset, section_size_type length,
               section_offset_type output_offset);
 
+  void
+  add_reference(unsigned int shndx, section_offset_type offset);
+
   // Get the output offset for an input address.  MERGE_MAP is the map
   // we are looking for, or NULL if we don't care.  The input address
   // is at offset OFFSET in section SHNDX.  This sets *OUTPUT_OFFSET
@@ -104,6 +107,9 @@ class Object_merge_map
     void add_mapping(section_offset_type input_offset, section_size_type length,
                      section_offset_type output_offset);
 
+    void
+    add_reference(section_offset_type input_offset);
+
     typedef std::vector<Input_merge_entry> Entries;
 
     // We store these with the Relobj, and we look them up by input
@@ -134,6 +140,9 @@ class Object_merge_map
     Input_merge_map()
       : output_data(NULL), entries(), sorted(true)
     { }
+
+    void unique();
+    void ensure_sorted();
   };
 
   // Get or make the Input_merge_map to use for the section SHNDX
@@ -151,6 +160,13 @@ class Object_merge_map
     { return i1.input_offset < i2.input_offset; }
   };
 
+  struct Input_merge_same_offset
+  {
+    bool
+    operator()(const Input_merge_entry& i1, const Input_merge_entry& i2) const
+    { return i1.input_offset == i2.input_offset; }
+  };
+
   // Map input section indices to merge maps.
   typedef std::map<unsigned int, Input_merge_map*> Section_merge_maps;
 
diff --git a/gold/object.cc b/gold/object.cc
index f28305b..8a3b040 100644
--- a/gold/object.cc
+++ b/gold/object.cc
@@ -406,6 +406,23 @@ Relobj::finalize_incremental_relocs(Layout* layout, bool clear_counts)
   layout->incremental_inputs()->set_reloc_count(rindex);
 }
 
+bool Relobj::is_offset_referenced(unsigned int shndx, uint64_t offset) {
+  if (!parameters->options().gc_sections())
+    return true;
+
+  uint64_t flags = this->section_flags(shndx);
+  // We don't gc non alloc sections.
+  if ((flags & elfcpp::SHF_ALLOC) == 0)
+    return true;
+
+  // Only SHF_MERGE sections are partially gced.
+  if ((flags & elfcpp::SHF_MERGE) == 0)
+    return true;
+
+  section_offset_type output_offset;
+  return this->merge_output_offset(shndx, offset, &output_offset);
+}
+
 Object_merge_map*
 Relobj::get_or_create_merge_map()
 {
@@ -2205,7 +2222,9 @@ Sized_relobj_file<size, big_endian>::do_count_local_symbols(Stringpool* pool,
 
       // Decide whether this symbol should go into the output file.
 
-      if ((shndx < shnum && out_sections[shndx] == NULL)
+      if ((shndx < shnum
+	   && (out_sections[shndx] == NULL ||
+	       !this->is_offset_referenced(shndx, sym.get_st_value())))
 	  || shndx == this->discarded_eh_frame_shndx_)
 	{
 	  lv.set_no_output_symtab_entry();
@@ -2378,6 +2397,9 @@ Sized_relobj_file<size, big_endian>::compute_final_local_value_internal(
 	    }
 	  else if (!lv_in->is_section_symbol())
 	    {
+              if (!this->is_offset_referenced(shndx, lv_in->input_value()))
+                return This::CFLV_DISCARDED;
+
 	      // This is not a section symbol.  We can determine
 	      // the final value now.
 	      lv_out->set_output_value(
diff --git a/gold/object.h b/gold/object.h
index fc93abd..1ee1e2d 100644
--- a/gold/object.h
+++ b/gold/object.h
@@ -338,6 +338,8 @@ class Object
 {
  public:
   typedef std::vector<Symbol*> Symbols;
+  typedef std::vector<uint64_t> Offsets_reachable;
+  typedef std::map<unsigned int, Offsets_reachable> Offset_ref;
 
   // NAME is the name of the object as we would report it to the user
   // (e.g., libfoo.a(bar.o) if this is in an archive.  INPUT_FILE is
@@ -1280,6 +1282,9 @@ class Relobj : public Object
   is_big_endian() const
   { return this->do_is_big_endian(); }
 
+  bool
+  is_offset_referenced(unsigned int shndx, uint64_t offset);
+
  protected:
   // The output section to be used for each input section, indexed by
   // the input section number.  The output section is NULL if the
diff --git a/gold/symtab.cc b/gold/symtab.cc
index 045327a..435ee5d 100644
--- a/gold/symtab.cc
+++ b/gold/symtab.cc
@@ -637,8 +637,9 @@ Symbol_table::gc_mark_undef_symbols(Layout* layout)
     }
 }
 
+template<int size>
 void
-Symbol_table::gc_mark_symbol(Symbol* sym)
+Symbol_table::gc_mark_sized_symbol(Sized_symbol<size>* sym)
 {
   // Add the object and section to the work list.
   bool is_ordinary;
@@ -646,11 +647,22 @@ Symbol_table::gc_mark_symbol(Symbol* sym)
   if (is_ordinary && shndx != elfcpp::SHN_UNDEF)
     {
       gold_assert(this->gc_!= NULL);
-      this->gc_->worklist().push(Section_id(sym->object(), shndx));
+      this->gc_->worklist().push(Section_id(sym->object(), shndx),
+                                 sym->value());
     }
   parameters->target().gc_mark_symbol(this, sym);
 }
 
+void
+Symbol_table::gc_mark_symbol(Symbol* sym)
+{
+  const int size = parameters->target().get_size();
+  if (size == 32)
+    gc_mark_sized_symbol(this->get_sized_symbol<32>(sym));
+  else
+    gc_mark_sized_symbol(this->get_sized_symbol<64>(sym));
+}
+
 // When doing garbage collection, keep symbols that have been seen in
 // dynamic objects.
 inline void 
diff --git a/gold/symtab.h b/gold/symtab.h
index 9413360..1d4a2e8 100644
--- a/gold/symtab.h
+++ b/gold/symtab.h
@@ -1385,6 +1385,10 @@ class Symbol_table
   gc_mark_undef_symbols(Layout*);
 
   // This tells garbage collection that this symbol is referenced.
+  template<int size>
+  void
+  gc_mark_sized_symbol(Sized_symbol<size>* sym);
+
   void
   gc_mark_symbol(Symbol* sym);
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]