This is the mail archive of the gdb-patches@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[1/2] RFC: add a DWARF assembler


This patch is preparation for the test case in the next patch.

A while back I wrote a DWARF assembler in Tcl.  It provides a reasonably
easy way to write custom DWARF in a platform-independent way.  It is
somewhat higher level than just using the assembler.

I wanted to quickly turn a test case from the bug in question into
something suitable for the test suite, and I thought this was simpler
than trying to build a new clang so I could rebuild the test case with
the right options, etc.

Let me know what you think.

Tom

	* lib/dwarf.exp (namespace Dwarf): New.
---
 gdb/testsuite/lib/dwarf.exp |  728 +++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 728 insertions(+), 0 deletions(-)

diff --git a/gdb/testsuite/lib/dwarf.exp b/gdb/testsuite/lib/dwarf.exp
index 9028714..28e8e26 100644
--- a/gdb/testsuite/lib/dwarf.exp
+++ b/gdb/testsuite/lib/dwarf.exp
@@ -28,3 +28,731 @@ proc dwarf2_support {} {
 
     return 0
 }
+
+# A DWARF assembler.
+#
+# All the variables in this namespace are private to the
+# implementation.  Also, any procedure whose name starts with "_" is
+# private as well.  Do not use these.
+#
+# Exported functions are documented at their definition.
+#
+# In addition to the hand-written functions documented below, this
+# module automatically generates a function for each DWARF tag.  For
+# most tags, two forms are made: a full name, and one with the
+# "DW_TAG_" prefix stripped.  For example, you can use either
+# 'DW_TAG_compile_unit' or 'compile_unit' interchangeably.
+#
+# There are two exceptions to this rule: DW_TAG_variable and
+# DW_TAG_namespace.  For these, the full name must always be used,
+# as the short name conflicts with Tcl builtins.  (Should future
+# versions of Tcl or DWARF add more conflicts, this list will grow.
+# If you want to be safe you should always use the full names.)
+#
+# Each tag procedure is defined like:
+#
+# proc DW_TAG_mumble {{attrs {}} {children {}}} { ... }
+#
+# ATTRS is an optional list of attributes.
+# It is run through 'subst' in the caller's context before processing.
+#
+# Each attribute in the list has one of two forms:
+#   1. { NAME VALUE }
+#   2. { NAME VALUE FORM }
+#
+# In each case, NAME is the attribute's name.
+# This can either be the full name, like 'DW_AT_name', or a shortened
+# name, like 'name'.  These are fully equivalent.
+#
+# If FORM is given, it should name a DW_FORM_ constant.
+# This can either be the short form, like 'DW_FORM_addr', or a
+# shortened version, like 'addr'.  If the form is given, VALUE
+# is its value; see below.  In some cases, additional processing
+# is done; for example, DW_FORM_strp manages the .debug_str
+# section automatically.
+#
+# If FORM is 'SPECIAL_expr', then VALUE is treated as a location
+# expression.  The effective form is then DW_FORM_block, and VALUE
+# is passed to the (internal) '_location' proc to be translated.
+# This proc implements a miniature DW_OP_ assembler.
+#
+# If FORM is not given, it is guessed:
+# * If VALUE starts with the "@" character, the rest of VALUE is
+#   looked up as a DWARF constant, and DW_FORM_sdata is used.  For
+#   example, '@DW_LANG_c89' could be used.
+# * If VALUE starts with the ":" character, then it is a label
+#   reference.  The rest of VALUE is taken to be the name of a label,
+#   and DW_FORM_ref4 is used.  See 'new_label' and 'define_label'.
+# * Otherwise, VALUE is taken to be a string and DW_FORM_string is
+#   used.
+# More form-guessing functionality may be added.
+#
+# CHILDREN is just Tcl code that can be used to define child DIEs.  It
+# is evaluated in the caller's context.
+#
+# Currently this code is missing nice support for CFA handling, and
+# probably other things as well.
+
+namespace eval Dwarf {
+    # True if the module has been initialized.
+    variable _initialized 0
+
+    # Constants from dwarf2.h.
+    variable _constants
+    # DW_AT short names.
+    variable _AT
+    # DW_FORM short names.
+    variable _FORM
+    # DW_OP short names.
+    variable _OP
+
+    # The current output file.
+    variable _output_file
+
+    # Current CU count.
+    variable _cu_count
+
+    # The current CU's base label.
+    variable _cu_label
+
+    # The current CU's version.
+    variable _cu_version
+
+    # The current CU's address size.
+    variable _cu_addr_size
+    # The current CU's offset size.
+    variable _cu_offset_size
+
+    # Label generation number.
+    variable _label_num
+
+    # The deferred output array.  The index is the section name; the
+    # contents hold the data for that section.
+    variable _deferred_output
+
+    # If empty, we should write directly to the output file.
+    # Otherwise, this is the name of a section to write to.
+    variable _defer
+
+    # The next available abbrev number in the current CU's abbrev
+    # table.
+    variable _abbrev_num
+
+    # The string table for this assembly.  The key is the string; the
+    # value is the label for that string.
+    variable _strings
+
+    proc _process_one_constant {name value} {
+	variable _constants
+	variable _AT
+	variable _FORM
+	variable _OP
+
+	set _constants($name) $value
+
+	if {![regexp "^DW_(\[A-Z\]+)_(\[A-Za-z0-9_\]+)$" $name \
+		  ignore prefix name2]} {
+	    error "non-matching name: $name"
+	}
+
+	if {$name2 == "lo_user" || $name2 == "hi_user"} {
+	    return
+	}
+
+	# We only try to shorten some very common things.
+	# FIXME: CFA?
+	switch -exact -- $prefix {
+	    TAG {
+		# Create two procedures for the tag.  These call
+		# _handle_DW_TAG with the full tag name baked in; this
+		# does all the actual work.
+		proc $name {{attrs {}} {children {}}} \
+		    "_handle_DW_TAG $name \$attrs \$children"
+
+		# Filter out ones that are known to clash.
+		if {$name2 == "variable" || $name2 == "namespace"} {
+		    set name2 "tag_$name2"
+		}
+
+		if {[info commands $name2] != {}} {
+		    error "duplicate proc name: from $name"
+		}
+
+		proc $name2 {{attrs {}} {children {}}} \
+		    "_handle_DW_TAG $name \$attrs \$children"
+	    }
+
+	    AT {
+		set _AT($name2) $name
+	    }
+
+	    FORM {
+		set _FORM($name2) $name
+	    }
+
+	    OP {
+		set _OP($name2) $name
+	    }
+
+	    default {
+		return
+	    }
+	}
+    }
+
+    proc _read_constants {} {
+	global srcdir hex decimal
+	variable _constants
+
+	# DWARF name-matching regexp.
+	set dwrx "DW_\[a-zA-Z0-9_\]+"
+	# Whitespace regexp.
+	set ws "\[ \t\]+"
+
+	set fd [open [file join $srcdir .. .. include dwarf2.h]]
+	while {![eof $fd]} {
+	    set line [gets $fd]
+	    if {[regexp -- "^${ws}($dwrx)${ws}=${ws}($hex|$decimal),?$" \
+		     $line ignore name value ignore2]} {
+		_process_one_constant $name $value
+	    }
+	}
+	close $fd
+
+	set fd [open [file join $srcdir .. .. include dwarf2.def]]
+	while {![eof $fd]} {
+	    set line [gets $fd]
+	    if {[regexp -- \
+		     "^DW_\[A-Z_\]+${ws}\\(($dwrx),${ws}($hex|$decimal)\\)$" \
+		     $line ignore name value ignore2]} {
+		_process_one_constant $name $value
+	    }
+	}
+	close $fd
+
+	set _constants(SPECIAL_expr) $_constants(DW_FORM_block)
+    }
+
+    proc _quote {string} {
+	# FIXME
+	return "\"${string}\\0\""
+    }
+
+    proc _handle_DW_FORM {form value} {
+	switch -exact -- $form {
+	    DW_FORM_string  {
+		_op .ascii [_quote $value]
+	    }
+
+	    DW_FORM_flag_present {
+		# We don't need to emit anything.
+	    }
+
+	    DW_FORM_data4 -
+	    DW_FORM_ref4 {
+		_op .4byte $value
+	    }
+
+	    DW_FORM_ref_addr {
+		variable _cu_offset_size
+		variable _cu_version
+		variable _cu_addr_size
+
+		if {$_cu_version == 2} {
+		    set size $_cu_addr_size
+		} else {
+		    set size $_cu_offset_size
+		}
+
+		_op .${size}byte $value
+	    }
+
+	    DW_FORM_ref1 -
+	    DW_FORM_flag -
+	    DW_FORM_data1 {
+		_op .byte $value
+	    }
+
+	    DW_FORM_sdata {
+		_op .sleb128 $value
+	    }
+
+	    DW_FORM_ref_udata -
+	    DW_FORM_udata {
+		_op .uleb128 $value
+	    }
+
+	    DW_FORM_addr {
+		variable _cu_addr_size
+
+		_op .${_cu_addr_size}byte $value
+	    }
+
+	    DW_FORM_data2 -
+	    DW_FORM_ref2 {
+		_op .2byte $value
+	    }
+
+	    DW_FORM_data8 -
+	    DW_FORM_ref8 -
+	    DW_FORM_ref_sig8 {
+		_op .8byte $value
+	    }
+
+	    DW_FORM_strp {
+		variable _strings
+		variable _cu_offset_size
+
+		if {![info exists _strings($value)]} {
+		    set _strings($value) [new_label strp]
+		    _defer_output .debug_string {
+			define_label $_strings($value)
+			_op .ascii [_quote $value]
+		    }
+		}
+
+		_op .${_cu_offset_size}byte $_strings($value) "strp: $value"
+	    }
+
+	    SPECIAL_expr {
+		set l1 [new_label "expr_start"]
+		set l2 [new_label "expr_end"]
+		_op .uleb128 "$l2 - $l1" "expression"
+		define_label $l1
+		_location $value
+		define_label $l2
+	    }
+
+	    DW_FORM_block2 -
+	    DW_FORM_block4 -
+
+	    DW_FORM_block -
+	    DW_FORM_block1 -
+
+	    DW_FORM_ref2 -
+	    DW_FORM_indirect -
+	    DW_FORM_sec_offset -
+	    DW_FORM_exprloc -
+
+	    DW_FORM_GNU_addr_index -
+	    DW_FORM_GNU_str_index -
+	    DW_FORM_GNU_ref_alt -
+	    DW_FORM_GNU_strp_alt -
+
+	    default {
+		error "unhandled form $form"
+	    }
+	}
+    }
+
+    proc _guess_form {value varname} {
+	upvar $varname new_value
+
+	switch -exact -- [string range $value 0 0] {
+	    @ {
+		# Constant reference.
+		variable _constants
+
+		set new_value $_constants([string range $value 1 end])
+		# Just the simplest.
+		return DW_FORM_sdata
+	    }
+
+	    : {
+		# Label reference.
+		variable _cu_label
+
+		set new_value "[string range $value 1 end] - $_cu_label"
+
+		return DW_FORM_ref4
+	    }
+
+	    default {
+		return DW_FORM_string
+	    }
+	}
+    }
+
+    # Map NAME to its canonical form.
+    proc _map_name {name ary} {
+	variable $ary
+
+	if {[info exists ${ary}($name)]} {
+	    set name [set ${ary}($name)]
+	}
+
+	return $name
+    }
+
+    proc _handle_DW_TAG {tag_name {attrs {}} {children {}}} {
+	variable _abbrev_num
+	variable _constants
+
+	set has_children [expr {[string length $children] > 0}]
+	set my_abbrev [incr _abbrev_num]
+
+	# We somewhat wastefully emit a new abbrev entry for each tag.
+	# There's no reason for this other than laziness.
+	_defer_output .debug_abbrev {
+	    _op .uleb128 $my_abbrev "Abbrev start"
+	    _op .uleb128 $_constants($tag_name) $tag_name
+	    _op .byte $has_children "has_children"
+	}
+
+	_op .uleb128 $my_abbrev "Abbrev ($tag_name)"
+
+	foreach attr $attrs {
+	    set attr_name [_map_name [lindex $attr 0] _AT]
+	    set attr_value [uplevel 2 [list subst [lindex $attr 1]]]
+	    if {[llength $attr] > 2} {
+		set attr_form [lindex $attr 2]
+	    } else {
+		set attr_form [_guess_form $attr_value attr_value]
+	    }
+	    set attr_form [_map_name $attr_form _FORM]
+
+	    _handle_DW_FORM $attr_form $attr_value
+
+	    _defer_output .debug_abbrev {
+		_op .uleb128 $_constants($attr_name) $attr_name
+		_op .uleb128 $_constants($attr_form) $attr_form
+	    }
+	}
+
+	_defer_output .debug_abbrev {
+	    # Terminator.
+	    _op .byte 0x0 Terminator
+	    _op .byte 0x0 Terminator
+	}
+
+	if {$has_children} {
+	    uplevel 2 $children
+
+	    # Terminate children.
+	    _op .byte 0x0 "Terminate children"
+	}
+    }
+
+    proc _emit {string} {
+	variable _output_file
+	variable _defer
+	variable _deferred_output
+
+	if {$_defer == ""} {
+	    puts $_output_file $string
+	} else {
+	    append _deferred_output($_defer) ${string}\n
+	}
+    }
+
+    proc _section {name} {
+	_emit "        .section $name"
+    }
+
+    proc _defer_output {section body} {
+	variable _defer
+	variable _deferred_output
+
+	set old_defer $_defer
+	set _defer $section
+
+	if {![info exists _deferred_output($_defer)]} {
+	    set _deferred_output($_defer) ""
+	    _section $section
+	}
+
+	uplevel $body
+
+	set _defer $old_defer
+    }
+
+    proc _defer_to_string {body} {
+	variable _defer
+	variable _deferred_output
+
+	set old_defer $_defer
+	set _defer temp
+
+	set _deferred_output($_defer) ""
+
+	uplevel $body
+
+	set result $_deferred_output($_defer)
+	unset _deferred_output($_defer)
+
+	set _defer $old_defer
+	return $result
+    }
+
+    proc _write_deferred_output {} {
+	variable _output_file
+	variable _deferred_output
+
+	foreach section [array names _deferred_output] {
+	    # The data already has a newline.
+	    puts -nonewline $_output_file $_deferred_output($section)
+	}
+
+	# Save some memory.
+	unset _deferred_output
+    }
+
+    proc _op {name value {comment ""}} {
+	set text "        ${name}        ${value}"
+	if {$comment != ""} {
+	    # Try to make stuff line up nicely.
+	    while {[string length $text] < 40} {
+		append text " "
+	    }
+	    append text "/* ${comment} */"
+	}
+	_emit $text
+    }
+
+    proc _compute_label {name} {
+	return ".L${name}"
+    }
+
+    # Return a name suitable for use as a label.  If BASE_NAME is
+    # specified, it is incorporated into the label name; this is to
+    # make debugging the generated assembler easier.  If BASE_NAME is
+    # not specified a generic default is used.  This proc does not
+    # define the label; see 'define_label'.  'new_label' attempts to
+    # ensure that label names are unique.
+    proc new_label {{base_name label}} {
+	variable _label_num
+
+	return [_compute_label ${base_name}[incr _label_num]]
+    }
+
+    # Define a label named NAME.  Ordinarily, NAME comes from a call
+    # to 'new_label', but this is not required.
+    proc define_label {name} {
+	_emit "${name}:"
+    }
+
+    # Declare a global label.  This is typically used to refer to
+    # labels defined in other files, for example a function defined in
+    # a .c file.
+    proc extern {args} {
+	foreach name $args {
+	    _op .global $name
+	}
+    }
+
+    # A higher-level interface to label handling.
+    #
+    # ARGS is a list of label descriptors.  Each one is either a
+    # single element, or a list of two elements -- a name and some
+    # text.  For each descriptor, 'new_label' is invoked.  If the list
+    # form is used, the second element in the list is passed as an
+    # argument.  The label name is used to define a variable in the
+    # enclosing scope; this can be used to refer to the label later.
+    # The label name is also used to define a new proc whose name is
+    # the label name plus a trailing ":".  This proc takes a body as
+    # an argument and can be used to define the label at that point;
+    # then the body, if any, is evaluated in the caller's context.
+    #
+    # For example:
+    #
+    # declare_labels int_label
+    # something { ... $int_label }   ;# refer to the label
+    # int_label: constant { ... }    ;# define the label
+    proc declare_labels {args} {
+	foreach arg $args {
+	    set name [lindex $arg 0]
+	    set text [lindex $arg 1]
+
+	    upvar $name label_var
+	    if {$text == ""} {
+		set label_var [new_label]
+	    } else {
+		set label_var [new_label $text]
+	    }
+
+	    proc ${name}: {args} [format {
+		define_label %s
+		uplevel $args
+	    } $label_var]
+	}
+    }
+
+    # This is a miniature assembler for location expressions.  It is
+    # suitable for use in the attributes to a DIE.  Its output is
+    # prefixed with "=" to make it automatically use DW_FORM_block.
+    # BODY is split by lines, and each line is taken to be a list.
+    # (FIXME should use 'info complete' here.)
+    # Each list's first element is the opcode, either short or long
+    # forms are accepted.
+    # FIXME argument handling
+    # FIXME move docs
+    proc _location {body} {
+	variable _constants
+
+	foreach line [split $body \n] {
+	    if {[lindex $line 0] == ""} {
+		continue
+	    }
+	    set opcode [_map_name [lindex $line 0] _OP]
+	    _op .byte $_constants($opcode) $opcode
+
+	    switch -exact -- $opcode {
+		DW_OP_addr {
+		    variable _cu_addr_size
+
+		    _op .${_cu_addr_size}byte [lindex $line 1]
+		}
+
+		DW_OP_const1u -
+		DW_OP_const1s {
+		    _op .byte [lindex $line 1]
+		}
+
+		DW_OP_const2u -
+		DW_OP_const2s {
+		    _op .2byte [lindex $line 1]
+		}
+
+		DW_OP_const4u -
+		DW_OP_const4s {
+		    _op .4byte [lindex $line 1]
+		}
+
+		DW_OP_const8u -
+		DW_OP_const8s {
+		    _op .8byte [lindex $line 1]
+		}
+
+		DW_OP_constu {
+		    _op .uleb128 [lindex $line 1]
+		}
+		DW_OP_consts {
+		    _op .sleb128 [lindex $line 1]
+		}
+
+		default {
+		    if {[llength $line] > 1} {
+			error "Unimplemented: operands in location for $opcode"
+		    }
+		}
+	    }
+	}
+    }
+
+    # Emit a DWARF CU.
+    # IS_64 is a boolean which is true if you want to emit 64-bit
+    # DWARF, and false for 32-bit DWARF.
+    # VERSION is the DWARF version number to emit.
+    # ADDR_SIZE is the size of addresses in bytes.
+    # BODY is Tcl code that emits the DIEs which make up the body of
+    # the CU.  It is evaluated in the caller's context.
+    proc cu {is_64 version addr_size body} {
+	variable _cu_count
+	variable _abbrev_num
+	variable _cu_label
+	variable _cu_version
+	variable _cu_addr_size
+	variable _cu_offset_size
+
+	set _cu_version $version
+	if {$is_64} {
+	    set _cu_offset_size 8
+	} else {
+	    set _cu_offset_size 4
+	}
+	set _cu_addr_size $addr_size
+
+	_section .debug_info
+
+	set cu_num [incr _cu_count]
+	set my_abbrevs [_compute_label "abbrev${cu_num}_begin"]
+	set _abbrev_num 1
+
+	set _cu_label [_compute_label "cu${cu_num}_begin"]
+	set start_label [_compute_label "cu${cu_num}_start"]
+	set end_label [_compute_label "cu${cu_num}_end"]
+	
+	define_label $_cu_label
+	if {$is_64} {
+	    _op .4byte 0xffffffff
+	    _op .8byte "$end_label - $start_label"
+	} else {
+	    _op .4byte "$end_label - $start_label"
+	}
+	define_label $start_label
+	_op .2byte $version Version
+	_op .4byte $my_abbrevs Abbrevs
+	_op .byte $addr_size "Pointer size"
+
+	_defer_output .debug_abbrev {
+	    define_label $my_abbrevs
+	}
+
+	uplevel $body
+
+	_defer_output .debug_abbrev {
+	    # Emit the terminator.
+	    _op .byte 0x0 Terminator
+	    _op .byte 0x0 Terminator
+	}
+
+	define_label $end_label
+    }
+
+    proc _empty_array {name} {
+	upvar $name the_array
+
+	catch {unset the_array}
+	set the_array(_) {}
+	unset the_array(_)
+    }
+
+    # The top-level interface to the DWARF assembler.
+    # FILENAME is the name of the file where the generated assembly
+    # code is written.
+    # BODY is Tcl code to emit the assembly.  It is evaluated via
+    # "eval" -- not uplevel as you might expect, because it is
+    # important to run the body in the Dwarf namespace.
+    #
+    # A typical invocation is something like:
+    #    Dwarf::assemble $file {
+    #        cu 0 2 8 {
+    #            compile_unit {
+    #            ...
+    #            }
+    #        }
+    #        cu 0 2 8 {
+    #        ...
+    #        }
+    #    }
+    proc assemble {filename body} {
+	variable _initialized
+	variable _output_file
+	variable _deferred_output
+	variable _defer
+	variable _label_num
+	variable _strings
+
+	if {!$_initialized} {
+	    _read_constants
+	    set _initialized 1
+	}
+
+	set _output_file [open $filename w]
+	set _cu_count 0
+	_empty_array _deferred_output
+	set _defer ""
+	set _label_num 0
+	_empty_array _strings
+
+	# Not "uplevel" here, because we want to evaluate in this
+	# namespace.  This is somewhat bad because it means we can't
+	# readily refer to outer variables.
+	eval $body
+
+	_write_deferred_output
+
+	catch {close $_output_file}
+	set _output_file {}
+    }
+}
-- 
1.7.7.6


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]