This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
Re: [patch/rfc][pr17902] GC unused parts of mergeable sections
- From: Rafael EspÃndola <rafael dot espindola at gmail dot com>
- To: Binutils <binutils at sourceware dot org>
- Cc: Cary Coutant <ccoutant at google dot com>, Nico Weber <thakis at chromium dot org>
- Date: Tue, 31 Mar 2015 14:16:13 -0400
- Subject: Re: [patch/rfc][pr17902] GC unused parts of mergeable sections
- Authentication-results: sourceware.org; auth=none
- References: <CAG3jReJsh2pXteaHkE17r0WGSOwH6gWniX9y2K2nJV-zfAxDwQ at mail dot gmail dot com> <CAG3jReJ_aV54dXENEjDt_YXhuqSTibryT0zC7HTiv2Sh=jqnCA at mail dot gmail dot com> <CAG3jReLcHButGB8dShJ4S1H1ZgjYkZ9xShpB1Kcz9RnBmz9UQA at mail dot gmail dot com> <CAG3jReJh10hH6mLeTHersEyr+e=hnHBYvL6nXxV9YMB6Uu75Sg at mail dot gmail dot com>
And now that all the refactoring is in, here is a patch with just the GC.
The size savings are still the same: chromium's .rodata goes from
6985174 to 6851542 bytes.
The link time with --gc-secitons -O1 (probably the setup that makes
the cost of this patch most evident) goes from
6.422498558 seconds time elapsed
( +- 0.37% )
to
6.808367419 seconds time elapsed
( +- 0.22% )
What should be done for testing? Is it OK to write tests in assembly?
It is somewhat brittle to try to convince a C compiler to output a
dead string in a small test.
I will be cleaning up and trying to optimize this patch, but it would
be really nice to get feedback on whether the general idea is
reasonable. It is: if doing gc, use the gc pass to record which
offsets of a given SHF_MERGE section are used. That is stored with
dummy entries in the same maps that are use to merge the contents.
During regular SHF_MERGE processing, only include entries that are
already in the map.
Cheers,
Rafael
diff --git a/gold/gc.cc b/gold/gc.cc
index 843b2b8..2bf6472 100644
--- a/gold/gc.cc
+++ b/gold/gc.cc
@@ -25,10 +25,45 @@
#include "object.h"
#include "gc.h"
#include "symtab.h"
+#include "merge.h"
namespace gold
{
+void Worklist::just_push(const Section_id &val) { this->work_list_.push(val); }
+
+void Worklist::push(const Section_id &val) {
+ this->work_list_.push(val);
+ Object *obj = val.first;
+ unsigned int shndx = val.second;
+ uint64_t flags = obj->section_flags(shndx);
+ gold_assert((flags & elfcpp::SHF_MERGE) == 0);
+}
+
+static void
+add_referenced_offset(Object *obj, unsigned int shndx, uint64_t offset) {
+ // FIXME: what is the safe way to do this cast?
+ Relobj* r_obj = static_cast<Relobj*>(obj);
+ Object_merge_map* object_merge_map = r_obj->get_or_create_merge_map();
+
+ // In the case of strings, findind the start and length requires reading the
+ // contents, which requires decompression. To do that only once,
+ // we record only the reference offset here and computed the start and length
+ // do_add_input_section.
+ object_merge_map->add_reference(shndx, offset);
+}
+
+void Worklist::push(const Section_id &val, uint64_t offset) {
+ this->work_list_.push(val);
+ Object *obj = val.first;
+ unsigned int shndx = val.second;
+ uint64_t flags = obj->section_flags(shndx);
+ if ((flags & elfcpp::SHF_MERGE) == 0)
+ return;
+
+ add_referenced_offset(obj, shndx, offset);
+}
+
// Garbage collection uses a worklist style algorithm to determine the
// transitive closure of all referenced sections.
void
@@ -63,11 +98,25 @@ Garbage_collection::do_transitive_closure()
if (this->referenced_list().find(*it_v)
== this->referenced_list().end())
{
- this->worklist().push(*it_v);
+ this->worklist().just_push(*it_v);
}
}
}
this->worklist_ready();
+ for (Sections_reachable::iterator I = this->referenced_list().begin(),
+ E = this->referenced_list().end();
+ I != E; ++I) {
+ Atom_reachable &atoms = this->atom_reloc_map()[*I];
+ for (Atom_reachable::iterator I2 = atoms.begin(), E2 = atoms.end();
+ I2 != E2; ++I2) {
+ const Atom &atom = *I2;
+ const Section_id id = atom.first;
+ Object* obj = id.first;
+ unsigned int sec_num = id.second;
+ uint64_t new_offset = atom.second;
+ add_referenced_offset(obj, sec_num, new_offset);
+ }
+ }
}
} // End namespace gold.
diff --git a/gold/gc.h b/gold/gc.h
index be4a63c..9bc0a1d 100644
--- a/gold/gc.h
+++ b/gold/gc.h
@@ -46,18 +46,49 @@ class Output_section;
class General_options;
class Layout;
+class Worklist
+{
+ public:
+ bool empty() const {
+ return work_list_.empty();
+ }
+ Section_id &front() {
+ return work_list_.front();
+ }
+ void pop() {
+ work_list_.pop();
+ }
+ void just_push(const Section_id& val);
+ void push(const Section_id& val);
+ void push(const Section_id& val, uint64_t offset);
+
+ private:
+ typedef std::queue<Section_id> Worklist_type;
+ Worklist_type work_list_;
+};
+
class Garbage_collection
{
public:
typedef Unordered_set<Section_id, Section_id_hash> Sections_reachable;
typedef std::map<Section_id, Sections_reachable> Section_ref;
- typedef std::queue<Section_id> Worklist_type;
+
// This maps the name of the section which can be represented as a C
// identifier (cident) to the list of sections that have that name.
// Different object files can have cident sections with the same name.
typedef std::map<std::string, Sections_reachable> Cident_section_map;
+ typedef std::pair<Section_id, uint64_t> Atom;
+ struct Atom_hash {
+ size_t operator()(const Atom &a) const {
+ const Section_id &s = a.first;
+ return reinterpret_cast<uintptr_t>(s.first) ^ s.second ^ a.second;
+ }
+ };
+ typedef Unordered_set<Atom, Atom_hash> Atom_reachable;
+ typedef std::map<Section_id, Atom_reachable> Atom_ref;
+
Garbage_collection()
: is_worklist_ready_(false)
{ }
@@ -72,7 +103,11 @@ class Garbage_collection
section_reloc_map()
{ return this->section_reloc_map_; }
- Worklist_type&
+ Atom_ref&
+ atom_reloc_map()
+ { return this->atom_reloc_map_; }
+
+ Worklist&
worklist()
{ return this->work_list_; }
@@ -113,13 +148,26 @@ class Garbage_collection
reachable.insert(dst_id);
}
+ void add_reference_to_merge_section(Object *src_object,
+ unsigned int src_shndx,
+ Object *dst_object,
+ unsigned int dst_shndx, uint64_t offset) {
+ add_reference(src_object, src_shndx, dst_object, dst_shndx);
+ Section_id src_id(src_object, src_shndx);
+ Section_id dst_id(dst_object, dst_shndx);
+ Atom atom(dst_id, offset);
+ this->atom_reloc_map_[src_id].insert(atom);
+ }
+
private:
- Worklist_type work_list_;
+ Worklist work_list_;
bool is_worklist_ready_;
Section_ref section_reloc_map_;
Sections_reachable referenced_list_;
Cident_section_map cident_sections_;
+
+ Atom_ref atom_reloc_map_;
};
// Data to pass between successive invocations of do_layout
@@ -235,6 +283,10 @@ gc_process_relocs(
typedef typename elfcpp::Elf_types<size>::Elf_Addr Address;
Address dst_off;
+ // If the relocation points to a section, this includes the addend.
+ // Otherwise it doesn't.
+ Address gc_ref_off;
+
if (r_sym < local_count)
{
gold_assert(plocal_syms != NULL);
@@ -244,7 +296,10 @@ gc_process_relocs(
bool is_ordinary;
dst_indx = src_obj->adjust_sym_shndx(r_sym, dst_indx, &is_ordinary);
dst_obj = src_obj;
- dst_off = lsym.get_st_value() + addend;
+ gc_ref_off = dst_off = lsym.get_st_value();
+ dst_off += addend;
+ if (lsym.get_st_type() == elfcpp::STT_SECTION)
+ gc_ref_off += addend;
if (is_icf_tracked)
{
@@ -296,6 +351,7 @@ gc_process_relocs(
dst_indx = gsym->shndx(&is_ordinary);
}
dst_off = static_cast<const Sized_symbol<size>*>(gsym)->value();
+ gc_ref_off = dst_off;
dst_off += addend;
// When doing safe folding, check to see if this relocation is that
@@ -348,7 +404,14 @@ gc_process_relocs(
}
if (parameters->options().gc_sections())
{
- symtab->gc()->add_reference(src_obj, src_indx, dst_obj, dst_indx);
+ Garbage_collection* gc = symtab->gc();
+ uint64_t dst_flags = dst_obj->section_flags(dst_indx);
+ if (dst_flags & elfcpp::SHF_MERGE)
+ gc->add_reference_to_merge_section(src_obj, src_indx, dst_obj,
+ dst_indx, gc_ref_off);
+ else
+ gc->add_reference(src_obj, src_indx, dst_obj, dst_indx);
+
parameters->sized_target<size, big_endian>()
->gc_add_reference(symtab, src_obj, src_indx,
dst_obj, dst_indx, dst_off);
diff --git a/gold/merge.cc b/gold/merge.cc
index d395312..b8688e6 100644
--- a/gold/merge.cc
+++ b/gold/merge.cc
@@ -33,6 +33,33 @@ namespace gold
// Class Object_merge_map.
+void
+Object_merge_map::Input_merge_map::add_reference(
+ section_offset_type input_offset) {
+ this->sorted = false;
+ Input_merge_entry entry;
+ entry.input_offset = input_offset;
+ entry.length = -1;
+ entry.output_offset = -1;
+ this->entries.push_back(entry);
+}
+
+void
+Object_merge_map::Input_merge_map::ensure_sorted() {
+ if (this->sorted)
+ return;
+ std::sort(this->entries.begin(), this->entries.end(), Input_merge_compare());
+ this->sorted = true;
+}
+
+void
+Object_merge_map::Input_merge_map::unique() {
+ ensure_sorted();
+ Entries::iterator I = std::unique(this->entries.begin(), this->entries.end(),
+ Input_merge_same_offset());
+ entries.erase(I, entries.end());
+}
+
// Destructor.
Object_merge_map::~Object_merge_map()
@@ -69,7 +96,11 @@ Object_merge_map::get_or_make_input_merge_map(
{
// For a given input section in a given object, every mapping
// must be done with the same Merge_map.
- gold_assert(map->output_data == output_data);
+ // FIXME: comment
+ if (map->output_data == NULL)
+ map->output_data = output_data;
+ else
+ gold_assert(map->output_data == output_data);
return map;
}
@@ -145,6 +176,12 @@ Object_merge_map::Input_merge_map::add_mapping(
this->entries.push_back(entry);
}
+void Object_merge_map::add_reference(unsigned int shndx,
+ section_offset_type input_offset) {
+ Input_merge_map* map = this->get_or_make_input_merge_map(NULL, shndx);
+ map->add_reference(input_offset);
+}
+
// Get the output offset for an input address.
bool
@@ -156,12 +193,7 @@ Object_merge_map::get_output_offset(unsigned int shndx,
if (map == NULL)
return false;
- if (!map->sorted)
- {
- std::sort(map->entries.begin(), map->entries.end(),
- Input_merge_compare());
- map->sorted = true;
- }
+ map->ensure_sorted();
Input_merge_entry entry;
entry.input_offset = input_offset;
@@ -366,9 +398,42 @@ Output_merge_data::do_add_input_section(Relobj* object, unsigned int shndx)
Object_merge_map* merge_map = object->get_or_create_merge_map();
Object_merge_map::Input_merge_map* input_merge_map =
merge_map->get_or_make_input_merge_map(this, shndx);
+ uint64_t flags = object->section_flags(shndx);
+
+ bool gc = parameters->options().gc_sections() && (flags & elfcpp::SHF_ALLOC);
+ Object_merge_map::Input_merge_map::Entries &entries =
+ input_merge_map->entries;
+
+ if (gc) {
+ for (Object_merge_map::Input_merge_map::Entries::iterator
+ I = entries.begin(),
+ E = entries.end();
+ I != E; ++I) {
+ I->input_offset &= ~(entsize - 1);
+ I->length = entsize;
+ }
+
+ input_merge_map->unique();
+ }
+
+ Object_merge_map::Input_merge_map::Entries::iterator entry_i =
+ entries.begin();
+ Object_merge_map::Input_merge_map::Entries::iterator entry_e =
+ entries.end();
- for (section_size_type i = 0; i < len; i += entsize, p += entsize)
+ section_offset_type len_u = len;
+ for (section_offset_type i = 0; i < len_u; i += entsize, p += entsize)
{
+ if (gc)
+ {
+ while (entry_i != entry_e && entry_i->input_offset < i)
+ ++entry_i;
+ if (entry_i == entry_e)
+ break;
+ if (entry_i->input_offset != i)
+ continue;
+ }
+
// Add the constant to the section contents. If we find that it
// is already in the hash table, we will remove it again.
Merge_data_key k = this->len_;
@@ -385,7 +450,15 @@ Output_merge_data::do_add_input_section(Relobj* object, unsigned int shndx)
}
// Record the offset of this constant in the output section.
- input_merge_map->add_mapping(i, entsize, k);
+ if (gc)
+ {
+ entry_i->length = entsize;
+ entry_i->output_offset = k;
+ }
+ else
+ {
+ input_merge_map->add_mapping(i, entsize, k);
+ }
}
// For script processing, we keep the input sections.
@@ -500,6 +573,31 @@ Output_merge_string<Char_type>::do_add_input_section(Relobj* object,
++count;
merged_strings.reserve(count + 1);
+ Object_merge_map* merge_map = object->get_or_create_merge_map();
+ Object_merge_map::Input_merge_map* input_merge_map =
+ merge_map->get_or_make_input_merge_map(this, shndx);
+
+ uint64_t flags = object->section_flags(shndx);
+ bool gc = parameters->options().gc_sections() && (flags & elfcpp::SHF_ALLOC);
+ Object_merge_map::Input_merge_map::Entries &entries =
+ input_merge_map->entries;
+
+ if (gc) {
+ for (Object_merge_map::Input_merge_map::Entries::iterator
+ I = entries.begin(),
+ E = entries.end();
+ I != E; ++I) {
+ section_offset_type start_offset = I->input_offset;
+ while (start_offset > 0 && pdata[start_offset - 1] != 0)
+ --start_offset;
+ I->input_offset = start_offset;
+
+ I->length = (string_length(&pdata[start_offset]) + 1) * sizeof(Char_type);
+ }
+
+ input_merge_map->unique();
+ }
+
// The index I is in bytes, not characters.
section_size_type i = 0;
@@ -520,10 +618,12 @@ Output_merge_string<Char_type>::do_add_input_section(Relobj* object,
!= init_align_modulo))
has_misaligned_strings = true;
- Stringpool::Key key;
- this->stringpool_.add_with_length(p, len, true, &key);
+ if (object->is_offset_referenced(shndx, i)) {
+ Stringpool::Key key;
+ this->stringpool_.add_with_length(p, len, true, &key);
- merged_strings.push_back(Merged_string(i, key));
+ merged_strings.push_back(Merged_string(i, key));
+ }
p += len + 1;
i += (len + 1) * sizeof(Char_type);
}
@@ -569,8 +669,20 @@ Output_merge_string<Char_type>::finalize_merged_data()
section_offset_type last_output_offset = 0;
Relobj *object = (*l)->object;
Object_merge_map* merge_map = object->get_or_create_merge_map();
+ unsigned shndx = (*l)->shndx;
Object_merge_map::Input_merge_map* input_merge_map =
- merge_map->get_or_make_input_merge_map(this, (*l)->shndx);
+ merge_map->get_or_make_input_merge_map(this, shndx);
+
+ uint64_t flags = object->section_flags(shndx);
+ bool gc =
+ parameters->options().gc_sections() && (flags & elfcpp::SHF_ALLOC);
+ Object_merge_map::Input_merge_map::Entries &entries =
+ input_merge_map->entries;
+
+ Object_merge_map::Input_merge_map::Entries::iterator entry_i =
+ entries.begin();
+ Object_merge_map::Input_merge_map::Entries::iterator entry_e =
+ entries.end();
for (typename Merged_strings::const_iterator p =
(*l)->merged_strings.begin();
@@ -578,10 +690,21 @@ Output_merge_string<Char_type>::finalize_merged_data()
++p)
{
section_size_type length = p->offset - last_input_offset;
- if (length > 0)
- input_merge_map->add_mapping(last_input_offset, length,
- last_output_offset);
- last_input_offset = p->offset;
+ if (length > 0) {
+ if (gc) {
+ while (entry_i != entry_e &&
+ entry_i->input_offset < last_input_offset)
+ ++entry_i;
+ if (entry_i == entry_e)
+ break;
+ if (entry_i->input_offset == last_input_offset)
+ entry_i->output_offset = last_output_offset;
+ } else {
+ input_merge_map->add_mapping(last_input_offset, length,
+ last_output_offset);
+ }
+ }
+ last_input_offset = p->offset;
if (p->stringpool_key != 0)
last_output_offset =
this->stringpool_.get_offset_from_key(p->stringpool_key);
diff --git a/gold/merge.h b/gold/merge.h
index 54caed8..3c68a5c 100644
--- a/gold/merge.h
+++ b/gold/merge.h
@@ -59,6 +59,9 @@ class Object_merge_map
section_offset_type offset, section_size_type length,
section_offset_type output_offset);
+ void
+ add_reference(unsigned int shndx, section_offset_type offset);
+
// Get the output offset for an input address. MERGE_MAP is the map
// we are looking for, or NULL if we don't care. The input address
// is at offset OFFSET in section SHNDX. This sets *OUTPUT_OFFSET
@@ -104,6 +107,9 @@ class Object_merge_map
void add_mapping(section_offset_type input_offset, section_size_type length,
section_offset_type output_offset);
+ void
+ add_reference(section_offset_type input_offset);
+
typedef std::vector<Input_merge_entry> Entries;
// We store these with the Relobj, and we look them up by input
@@ -134,6 +140,9 @@ class Object_merge_map
Input_merge_map()
: output_data(NULL), entries(), sorted(true)
{ }
+
+ void unique();
+ void ensure_sorted();
};
// Get or make the Input_merge_map to use for the section SHNDX
@@ -151,6 +160,13 @@ class Object_merge_map
{ return i1.input_offset < i2.input_offset; }
};
+ struct Input_merge_same_offset
+ {
+ bool
+ operator()(const Input_merge_entry& i1, const Input_merge_entry& i2) const
+ { return i1.input_offset == i2.input_offset; }
+ };
+
// Map input section indices to merge maps.
typedef std::map<unsigned int, Input_merge_map*> Section_merge_maps;
diff --git a/gold/object.cc b/gold/object.cc
index f28305b..8a3b040 100644
--- a/gold/object.cc
+++ b/gold/object.cc
@@ -406,6 +406,23 @@ Relobj::finalize_incremental_relocs(Layout* layout, bool clear_counts)
layout->incremental_inputs()->set_reloc_count(rindex);
}
+bool Relobj::is_offset_referenced(unsigned int shndx, uint64_t offset) {
+ if (!parameters->options().gc_sections())
+ return true;
+
+ uint64_t flags = this->section_flags(shndx);
+ // We don't gc non alloc sections.
+ if ((flags & elfcpp::SHF_ALLOC) == 0)
+ return true;
+
+ // Only SHF_MERGE sections are partially gced.
+ if ((flags & elfcpp::SHF_MERGE) == 0)
+ return true;
+
+ section_offset_type output_offset;
+ return this->merge_output_offset(shndx, offset, &output_offset);
+}
+
Object_merge_map*
Relobj::get_or_create_merge_map()
{
@@ -2205,7 +2222,9 @@ Sized_relobj_file<size, big_endian>::do_count_local_symbols(Stringpool* pool,
// Decide whether this symbol should go into the output file.
- if ((shndx < shnum && out_sections[shndx] == NULL)
+ if ((shndx < shnum
+ && (out_sections[shndx] == NULL ||
+ !this->is_offset_referenced(shndx, sym.get_st_value())))
|| shndx == this->discarded_eh_frame_shndx_)
{
lv.set_no_output_symtab_entry();
@@ -2378,6 +2397,9 @@ Sized_relobj_file<size, big_endian>::compute_final_local_value_internal(
}
else if (!lv_in->is_section_symbol())
{
+ if (!this->is_offset_referenced(shndx, lv_in->input_value()))
+ return This::CFLV_DISCARDED;
+
// This is not a section symbol. We can determine
// the final value now.
lv_out->set_output_value(
diff --git a/gold/object.h b/gold/object.h
index fc93abd..1ee1e2d 100644
--- a/gold/object.h
+++ b/gold/object.h
@@ -338,6 +338,8 @@ class Object
{
public:
typedef std::vector<Symbol*> Symbols;
+ typedef std::vector<uint64_t> Offsets_reachable;
+ typedef std::map<unsigned int, Offsets_reachable> Offset_ref;
// NAME is the name of the object as we would report it to the user
// (e.g., libfoo.a(bar.o) if this is in an archive. INPUT_FILE is
@@ -1280,6 +1282,9 @@ class Relobj : public Object
is_big_endian() const
{ return this->do_is_big_endian(); }
+ bool
+ is_offset_referenced(unsigned int shndx, uint64_t offset);
+
protected:
// The output section to be used for each input section, indexed by
// the input section number. The output section is NULL if the
diff --git a/gold/symtab.cc b/gold/symtab.cc
index 045327a..435ee5d 100644
--- a/gold/symtab.cc
+++ b/gold/symtab.cc
@@ -637,8 +637,9 @@ Symbol_table::gc_mark_undef_symbols(Layout* layout)
}
}
+template<int size>
void
-Symbol_table::gc_mark_symbol(Symbol* sym)
+Symbol_table::gc_mark_sized_symbol(Sized_symbol<size>* sym)
{
// Add the object and section to the work list.
bool is_ordinary;
@@ -646,11 +647,22 @@ Symbol_table::gc_mark_symbol(Symbol* sym)
if (is_ordinary && shndx != elfcpp::SHN_UNDEF)
{
gold_assert(this->gc_!= NULL);
- this->gc_->worklist().push(Section_id(sym->object(), shndx));
+ this->gc_->worklist().push(Section_id(sym->object(), shndx),
+ sym->value());
}
parameters->target().gc_mark_symbol(this, sym);
}
+void
+Symbol_table::gc_mark_symbol(Symbol* sym)
+{
+ const int size = parameters->target().get_size();
+ if (size == 32)
+ gc_mark_sized_symbol(this->get_sized_symbol<32>(sym));
+ else
+ gc_mark_sized_symbol(this->get_sized_symbol<64>(sym));
+}
+
// When doing garbage collection, keep symbols that have been seen in
// dynamic objects.
inline void
diff --git a/gold/symtab.h b/gold/symtab.h
index 9413360..1d4a2e8 100644
--- a/gold/symtab.h
+++ b/gold/symtab.h
@@ -1385,6 +1385,10 @@ class Symbol_table
gc_mark_undef_symbols(Layout*);
// This tells garbage collection that this symbol is referenced.
+ template<int size>
+ void
+ gc_mark_sized_symbol(Sized_symbol<size>* sym);
+
void
gc_mark_symbol(Symbol* sym);