This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
[gold][aarch64] Patch for erratum-835769
- From: Han Shen <shenhan at google dot com>
- To: binutils <binutils at sourceware dot org>, Cary Coutant <ccoutant at gmail dot com>
- Cc: Bhaskar <bjanakiraman at google dot com>, Luis Lozano <llozano at google dot com>, Andrew Hsieh <andrewhsieh at google dot com>, Egor Pasko <pasko at google dot com>
- Date: Thu, 11 Jun 2015 16:48:36 -0700
- Subject: [gold][aarch64] Patch for erratum-835769
- Authentication-results: sourceware.org; auth=none
Hi Cary, this is the patch for erratum 835769, which is the 2nd of the 2 errata
needed to be fixed before Android land adopts gold (the other was 843419, which
was fixed by my CL a few days ago).
835769 is, in nature, very similar to 843419. What I did is to a) add a sequence
scanner (similarly implemented as in bfd) and b) define a new stub
type (the stub generation/relocation is the same)
Tested by bfd 835769 test case.
Build on x86_64 ubuntu and aarch64 machine under -O2 and -O0.
Ok for trunk?
2015-05-12 Han Shen <shenhan@google.com>
* aarch64.cc (AArch64_insn_utilities::BYTES_PER_INSN): Move
defintion outside class definition.
(AArch64_insn_utilities::AARCH64_ZR): New static constant.
(AArch64_insn_utilities::aarch64_op31): New member.
(AArch64_insn_utilities::aarch64_ra): New member.
(AArch64_insn_utilities::aarch64_mac): New member.
(AArch64_insn_utilities::aarch64_mlxl): New member.
(ST_E_835769): New global enum member.
(Stub_table::relocate_stubs): Add 835769 handler.
(Stub_template_repertoire::Stub_template_repertoire): Install new
stub type.
(AArch64_relobj::scan_errata): This func is renamed from
scan_erratum_843419.
(AArch64_relobj::do_count_local_symbols): Add 835769 handler.
(AArch64_relobj::do_relocate_sections): Add 835769 handler.
(AArch64_relobj::scan_sections_for_stubs): Add 835769 handler.
(Target_aarch64::scan_erratum_835769_span): New method.
(Target_aarch64::create_erratum_stub): New method.
(Target_aarch64::is_erratum_835769_sequence): New method.
(Target_aarch64::scan_erratum_843419_sequence): Move part of the
code into create_erratum_stub.
* options.h (fix_cortex_a53_835769): New option.
-Han
diff --git a/gold/aarch64.cc b/gold/aarch64.cc
index 8dfd933..062b1ff 100644
--- a/gold/aarch64.cc
+++ b/gold/aarch64.cc
@@ -75,20 +75,35 @@ template<bool big_endian>
class AArch64_insn_utilities
{
public:
typedef typename elfcpp::Swap<32, big_endian>::Valtype Insntype;
- static const int BYTES_PER_INSN = 4;
+ static const int BYTES_PER_INSN;
+
+ // Zero register encoding - 31.
+ static const unsigned int AARCH64_ZR;
static unsigned int
aarch64_bit(Insntype insn, int pos)
{ return ((1 << pos) & insn) >> pos; }
static unsigned int
aarch64_bits(Insntype insn, int pos, int l)
{ return (insn >> pos) & ((1 << l) - 1); }
+ // Get the encoding field "op31" of 3-source data processing insns. "op31" is
+ // the name defined in armv8 insn manual C3.5.9.
+ static unsigned int
+ aarch64_op31(Insntype insn)
+ { return aarch64_bits(insn, 21, 3); }
+
+ // Get the encoding field "ra" of 3-source data processing insns. "ra" is the
+ // third source register. See armv8 insn manual C3.5.9.
+ static unsigned int
+ aarch64_ra(Insntype insn)
+ { return aarch64_bits(insn, 10, 5); }
+
static bool
is_adrp(const Insntype insn)
{ return (insn & 0x9F000000) == 0x90000000; }
static unsigned int
@@ -328,12 +343,46 @@ public:
return false;
}
return true;
}
return false;
+ } // End of "aarch64_mem_op_p".
+
+ // Return true if INSN is mac insn.
+ static bool
+ aarch64_mac(Insntype insn)
+ { return (insn & 0xff000000) == 0x9b000000; }
+
+ // Return true if INSN is multiply-accumulate.
+ // (This is similar to implementaton in elfnn-aarch64.c.)
+ static bool
+ aarch64_mlxl(Insntype insn)
+ {
+ uint32_t op31 = aarch64_op31(insn);
+ if (aarch64_mac(insn)
+ && (op31 == 0 || op31 == 1 || op31 == 5)
+ /* Exclude MUL instructions which are encoded as a multiple accumulate
+ with RA = XZR. */
+ && aarch64_ra(insn) != AARCH64_ZR)
+ {
+ return true;
+ }
+ return false;
}
-};
+}; // End of "AArch64_insn_utilities".
+
+
+// Insn length in byte.
+
+template<bool big_endian>
+const int AArch64_insn_utilities<big_endian>::BYTES_PER_INSN = 4;
+
+
+// Zero register encoding - 31.
+
+template<bool big_endian>
+const unsigned int AArch64_insn_utilities<big_endian>::AARCH64_ZR = 0x1f;
// Output_data_got_aarch64 class.
template<int size, bool big_endian>
@@ -601,12 +650,15 @@ enum
ST_LONG_BRANCH_PCREL = 3,
// Stub for erratum 843419 handling.
ST_E_843419 = 4,
+ // Stub for erratum 835769 handling.
+ ST_E_835769 = 5,
+
// Number of total stub types.
- ST_NUMBER = 5
+ ST_NUMBER = 6
};
// Struct that wraps insns for a particular stub. All stub templates are
// created/initialized as constants by Stub_template_repertoire.
@@ -693,20 +745,24 @@ Stub_template_repertoire<big_endian>::Stub_template_repertoire()
{
0x00000000, /* Placeholder for erratum insn. */
0x14000000, /* b <label> */
};
+ // ST_E_835769 has the same stub template as ST_E_843419.
+ const static Insntype* ST_E_835769_INSNS = ST_E_843419_INSNS;
+
#define install_insn_template(T) \
const static Stub_template<big_endian> template_##T = { \
T##_INSNS, sizeof(T##_INSNS) / sizeof(T##_INSNS[0]) }; \
this->stub_templates_[T] = &template_##T
install_insn_template(ST_NONE);
install_insn_template(ST_ADRP_BRANCH);
install_insn_template(ST_LONG_BRANCH_ABS);
install_insn_template(ST_LONG_BRANCH_PCREL);
install_insn_template(ST_E_843419);
+ install_insn_template(ST_E_835769);
#undef install_insn_template
}
@@ -951,13 +1007,13 @@ Erratum_stub<size, big_endian>::do_write(unsigned char* view, section_size_type)
{
typedef typename elfcpp::Swap<32, big_endian>::Valtype Insntype;
const Insntype* insns = this->insns();
uint32_t num_insns = this->insn_num();
Insntype* ip = reinterpret_cast<Insntype*>(view);
- // For current implemnted erratum 843419, (and 835769 which is to be
- // implemented soon), the first insn in the stub is always a copy of the
- // problematic insn (in 843419, the mem access insn), followed by a jump-back.
+ // For current implemnted erratum 843419 and 835769, the first insn in the
+ // stub is always a copy of the problematic insn (in 843419, the mem access
+ // insn, in 835769, the mac insn), followed by a jump-back.
elfcpp::Swap<32, big_endian>::writeval(ip, this->erratum_insn());
for (uint32_t i = 1; i < num_insns; ++i)
elfcpp::Swap<32, big_endian>::writeval(ip + i, insns[i]);
}
@@ -1459,10 +1515,11 @@ relocate_stubs(const The_relocate_info* relinfo,
// Branch offset that is to be filled in "b" insn.
int b_offset = 0;
switch ((*i)->type())
{
case ST_E_843419:
+ case ST_E_835769:
// For the erratum, the 2nd insn is a b-insn to be patched
// (relocated).
stub_b_insn_address = stub_address + 1 * BPI;
b_offset = (*i)->destination_address() - stub_b_insn_address;
AArch64_relocate_functions<size, big_endian>::construct_b(
@@ -1578,16 +1635,16 @@ class AArch64_relobj : public Sized_relobj_file<size, big_endian>
{
gold_assert(shndx < this->stub_tables_.size());
this->stub_tables_[shndx] = stub_table;
}
- // Entrance to erratum_843419 scanning.
+ // Entrance to errata scanning.
void
- scan_erratum_843419(unsigned int shndx,
- const elfcpp::Shdr<size, big_endian>&,
- Output_section*, const Symbol_table*,
- The_target_aarch64*);
+ scan_errata(unsigned int shndx,
+ const elfcpp::Shdr<size, big_endian>&,
+ Output_section*, const Symbol_table*,
+ The_target_aarch64*);
// Scan all relocation sections for stub generation.
void
scan_sections_for_stubs(The_target_aarch64*, const Symbol_table*,
const Layout*);
@@ -1680,11 +1737,12 @@ AArch64_relobj<size, big_endian>::do_count_local_symbols(
{
Sized_relobj_file<size, big_endian>::do_count_local_symbols(pool, dynpool);
// Only erratum-fixing work needs mapping symbols, so skip this time consuming
// processing if not fixing erratum.
- if (!parameters->options().fix_cortex_a53_843419())
+ if (!parameters->options().fix_cortex_a53_843419()
+ && !parameters->options().fix_cortex_a53_835769())
return;
const unsigned int loccount = this->local_symbol_count();
if (loccount == 0)
return;
@@ -1810,11 +1868,12 @@ AArch64_relobj<size, big_endian>::do_relocate_sections(
// We do not generate stubs if doing a relocatable link.
if (parameters->options().relocatable())
return;
- if (parameters->options().fix_cortex_a53_843419())
+ if (parameters->options().fix_cortex_a53_843419()
+ || parameters->options().fix_cortex_a53_835769())
this->fix_errata(pviews);
Relocate_info<size, big_endian> relinfo;
relinfo.symtab = symtab;
relinfo.layout = layout;
@@ -1936,15 +1995,15 @@ AArch64_relobj<size, big_endian>::section_needs_reloc_stub_scanning(
return this->text_section_is_scannable(text_shdr, text_shndx,
out_sections[text_shndx], symtab);
}
-// Scan section SHNDX for erratum 843419.
+// Scan section SHNDX for erratum 843419 and 835769.
template<int size, bool big_endian>
void
-AArch64_relobj<size, big_endian>::scan_erratum_843419(
+AArch64_relobj<size, big_endian>::scan_errata(
unsigned int shndx, const elfcpp::Shdr<size, big_endian>& shdr,
Output_section* os, const Symbol_table* symtab,
The_target_aarch64* target)
{
if (shdr.get_sh_size() == 0
@@ -1992,11 +2051,22 @@ AArch64_relobj<size, big_endian>::scan_erratum_843419(
if (p != this->mapping_symbol_info_.end()
&& p->first.shndx_ == shndx)
span_end = convert_to_section_size_type(p->first.offset_);
else
span_end = convert_to_section_size_type(shdr.get_sh_size());
- target->scan_erratum_843419_span(
+
+ // Here we do not share the scanning code of both errata. For 843419,
+ // only the last few insns of each page are examined, which is fast,
+ // whereas, for 835769, every insn pair needs to be checked.
+
+ if (parameters->options().fix_cortex_a53_843419())
+ target->scan_erratum_843419_span(
+ this, shndx, span_start, span_end,
+ const_cast<unsigned char*>(input_view), output_address);
+
+ if (parameters->options().fix_cortex_a53_835769())
+ target->scan_erratum_835769_span(
this, shndx, span_start, span_end,
const_cast<unsigned char*>(input_view), output_address);
}
}
}
@@ -2033,12 +2103,13 @@ AArch64_relobj<size, big_endian>::scan_sections_for_stubs(
// Do relocation stubs scanning.
const unsigned char* p = pshdrs + shdr_size;
for (unsigned int i = 1; i < shnum; ++i, p += shdr_size)
{
const elfcpp::Shdr<size, big_endian> shdr(p);
- if (parameters->options().fix_cortex_a53_843419())
- scan_erratum_843419(i, shdr, out_sections[i], symtab, target);
+ if (parameters->options().fix_cortex_a53_843419()
+ || parameters->options().fix_cortex_a53_835769())
+ scan_errata(i, shdr, out_sections[i], symtab, target);
if (this->section_needs_reloc_stub_scanning(shdr, out_sections, symtab,
pshdrs))
{
unsigned int index = this->adjust_shndx(shdr.get_sh_info());
AArch64_address output_offset =
@@ -2734,15 +2805,25 @@ class Target_aarch64 : public Sized_target<size, big_endian>
&& parameters->target().is_big_endian() == big_endian);
return static_cast<This*>(parameters->sized_target<size, big_endian>());
}
- // Scan erratum for a part of a section.
+ // Scan erratum 843419 for a part of a section.
void
scan_erratum_843419_span(
AArch64_relobj<size, big_endian>*,
- unsigned int shndx,
+ unsigned int,
+ const section_size_type,
+ const section_size_type,
+ unsigned char*,
+ Address);
+
+ // Scan erratum 835769 for a part of a section.
+ void
+ scan_erratum_835769_span(
+ AArch64_relobj<size, big_endian>*,
+ unsigned int,
const section_size_type,
const section_size_type,
unsigned char*,
Address);
@@ -3037,16 +3118,31 @@ class Target_aarch64 : public Sized_target<size, big_endian>
{
gold_assert(this->plt_ != NULL);
return this->plt_;
}
+ // Helper method to create erratum stubs for ST_E_843419 and ST_E_835769.
+ void create_erratum_stub(
+ AArch64_relobj<size, big_endian>* relobj,
+ unsigned int shndx,
+ section_size_type erratum_insn_offset,
+ Address erratum_address,
+ typename Insn_utilities::Insntype erratum_insn,
+ int erratum_type);
+
// Return whether this is a 3-insn erratum sequence.
bool is_erratum_843419_sequence(
typename elfcpp::Swap<32,big_endian>::Valtype insn1,
typename elfcpp::Swap<32,big_endian>::Valtype insn2,
typename elfcpp::Swap<32,big_endian>::Valtype insn3);
+ // Return whether this is a 835769 sequence.
+ // (Similarly implemented as in elfnn-aarch64.c.)
+ bool is_erratum_835769_sequence(
+ typename elfcpp::Swap<32,big_endian>::Valtype,
+ typename elfcpp::Swap<32,big_endian>::Valtype);
+
// Get the dynamic reloc section, creating it if necessary.
Reloc_section*
rela_dyn_section(Layout*);
// Get the section to use for TLSDESC relocations.
@@ -7671,10 +7767,140 @@ Target_aarch64<size, big_endian>::is_erratum_843419_sequence(
}
return false;
}
+// Return whether this is a 835769 sequence.
+// (Similarly implemented as in elfnn-aarch64.c.)
+
+template<int size, bool big_endian>
+bool
+Target_aarch64<size, big_endian>::is_erratum_835769_sequence(
+ typename elfcpp::Swap<32,big_endian>::Valtype insn1,
+ typename elfcpp::Swap<32,big_endian>::Valtype insn2)
+{
+ uint32_t rt;
+ uint32_t rt2;
+ uint32_t rn;
+ uint32_t rm;
+ uint32_t ra;
+ bool pair;
+ bool load;
+
+ if (Insn_utilities::aarch64_mlxl(insn2)
+ && Insn_utilities::aarch64_mem_op_p (insn1, &rt, &rt2, &pair, &load))
+ {
+ /* Any SIMD memory op is independent of the subsequent MLA
+ by definition of the erratum. */
+ if (Insn_utilities::aarch64_bit(insn1, 26))
+ return true;
+
+ /* If not SIMD, check for integer memory ops and MLA relationship. */
+ rn = Insn_utilities::aarch64_rn(insn2);
+ ra = Insn_utilities::aarch64_ra(insn2);
+ rm = Insn_utilities::aarch64_rm(insn2);
+
+ /* If this is a load and there's a true(RAW) dependency, we are safe
+ and this is not an erratum sequence. */
+ if (load &&
+ (rt == rn || rt == rm || rt == ra
+ || (pair && (rt2 == rn || rt2 == rm || rt2 == ra))))
+ return false;
+
+ /* We conservatively put out stubs for all other cases (including
+ writebacks). */
+ return true;
+ }
+
+ return false;
+}
+
+
+// Helper method to create erratum stub for ST_E_843419 and ST_E_835769.
+
+template<int size, bool big_endian>
+void
+Target_aarch64<size, big_endian>::create_erratum_stub(
+ AArch64_relobj<size, big_endian>* relobj,
+ unsigned int shndx,
+ section_size_type erratum_insn_offset,
+ Address erratum_address,
+ typename Insn_utilities::Insntype erratum_insn,
+ int erratum_type)
+{
+ gold_assert(erratum_type == ST_E_843419 || erratum_type == ST_E_835769);
+ The_stub_table* stub_table = relobj->stub_table(shndx);
+ gold_assert(stub_table != NULL);
+ if (stub_table->find_erratum_stub(relobj,
+ shndx,
+ erratum_insn_offset) == NULL)
+ {
+ const int BPI = AArch64_insn_utilities<big_endian>::BYTES_PER_INSN;
+ The_erratum_stub* stub = new The_erratum_stub(
+ relobj, erratum_type, shndx, erratum_insn_offset);
+ stub->set_erratum_insn(erratum_insn);
+ stub->set_erratum_address(erratum_address);
+ // For erratum ST_E_843419 and ST_E_835769, the destination address is
+ // always the next insn after erratum insn.
+ stub->set_destination_address(erratum_address + BPI);
+ stub_table->add_erratum_stub(stub);
+ }
+}
+
+
+// Scan erratum for section SHNDX range [output_address + span_start,
+// output_address + span_end). Note here we do not share the code with
+// scan_erratum_843419_span function, because for 843419 we optimize by only
+// scanning the last few insns of a page, whereas for 835769, we need scan every
+// insns.
+
+template<int size, bool big_endian>
+void
+Target_aarch64<size, big_endian>::scan_erratum_835769_span(
+ AArch64_relobj<size, big_endian>* relobj,
+ unsigned int shndx,
+ const section_size_type span_start,
+ const section_size_type span_end,
+ unsigned char* input_view,
+ Address output_address)
+{
+ typedef typename Insn_utilities::Insntype Insntype;
+
+ const int BPI = AArch64_insn_utilities<big_endian>::BYTES_PER_INSN;
+
+ // Adjust output_address and view to the start of span.
+ output_address += span_start;
+ input_view += span_start;
+
+ section_size_type span_length = span_end - span_start;
+ section_size_type offset = 0;
+ for (offset = 0; offset + BPI < span_length; offset += BPI)
+ {
+ Insntype* ip = reinterpret_cast<Insntype*>(input_view + offset);
+ Insntype insn1 = ip[0];
+ Insntype insn2 = ip[1];
+ if (is_erratum_835769_sequence(insn1, insn2))
+ {
+ Insntype erratum_insn = insn2;
+ // "span_start + offset" is the offset for insn1. So for insn2, it is
+ // "span_start + offset + BPI".
+ section_size_type erratum_insn_offset = span_start + offset + BPI;
+ Address erratum_address = output_address + offset + BPI;
+ gold_warning(_("Erratum 835769 found and fixed at \"%s\", "
+ "section %d, offset 0x%08x."),
+ relobj->name().c_str(), shndx,
+ (unsigned int)(span_start + offset));
+
+ this->create_erratum_stub(relobj, shndx,
+ erratum_insn_offset, erratum_address,
+ erratum_insn, ST_E_835769);
+ offset += BPI; // Skip mac insn.
+ }
+ }
+} // End of "Target_aarch64::scan_erratum_835769_span".
+
+
// Scan erratum for section SHNDX range
// [output_address + span_start, output_address + span_end).
template<int size, bool big_endian>
void
@@ -7747,32 +7973,17 @@ Target_aarch64<size, big_endian>::scan_erratum_843419_span(
{
gold_warning(_("Erratum 843419 found and fixed at \"%s\", "
"section %d, offset 0x%08x."),
relobj->name().c_str(), shndx,
(unsigned int)(span_start + offset));
- unsigned int errata_insn_offset =
+ unsigned int erratum_insn_offset =
span_start + offset + insn_offset;
- The_stub_table* stub_table = relobj->stub_table(shndx);
- gold_assert(stub_table != NULL);
- if (stub_table->find_erratum_stub(relobj,
- shndx,
- errata_insn_offset) == NULL)
- {
- The_erratum_stub* stub = new The_erratum_stub(
- relobj, ST_E_843419, shndx,
- errata_insn_offset);
- Address erratum_address =
- output_address + offset + insn_offset;
- // Stub destination address is the next insn after the
- // erratum.
- Address dest_address = erratum_address
- + Insn_utilities::BYTES_PER_INSN;
- stub->set_erratum_insn(erratum_insn);
- stub->set_erratum_address(erratum_address);
- stub->set_destination_address(dest_address);
- stub_table->add_erratum_stub(stub);
- }
+ Address erratum_address =
+ output_address + offset + insn_offset;
+ create_erratum_stub(relobj, shndx,
+ erratum_insn_offset, erratum_address,
+ erratum_insn, ST_E_843419);
}
}
// Advance to next candidate instruction. We only consider instruction
// sequences starting at a page offset of 0xff8 or 0xffc.
diff --git a/gold/options.h b/gold/options.h
index 658ad42..6b05c26 100644
--- a/gold/options.h
+++ b/gold/options.h
@@ -803,14 +803,19 @@ class General_options
DEFINE_bool(fix_cortex_a8, options::TWO_DASHES, '\0', false,
N_("(ARM only) Fix binaries for Cortex-A8 erratum."),
N_("(ARM only) Do not fix binaries for Cortex-A8 erratum."));
DEFINE_bool(fix_cortex_a53_843419, options::TWO_DASHES, '\0', false,
- N_("(AArch64 only) Scan binaries for Cortex-A53 errata 843419."),
- N_("(AArch64 only) Do not scan binaries for Cortex-A53 "
+ N_("(AArch64 only) Fix binaries for Cortex-A53 errata 843419."),
+ N_("(AArch64 only) Do not fix binaries for Cortex-A53 "
"errata 843419."));
+ DEFINE_bool(fix_cortex_a53_835769, options::TWO_DASHES, '\0', false,
+ N_("(AArch64 only) Fix binaries for Cortex-A53 errata 835769."),
+ N_("(AArch64 only) Do not fix binaries for Cortex-A53 "
+ "errata 835769."));
+
DEFINE_bool(fix_arm1176, options::TWO_DASHES, '\0', true,
N_("(ARM only) Fix binaries for ARM1176 erratum."),
N_("(ARM only) Do not fix binaries for ARM1176 erratum."));
DEFINE_bool(merge_exidx_entries, options::TWO_DASHES, '\0', true,