This is the mail archive of the gdb-patches@sources.redhat.com mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFA]: dwarf2expr.[ch]


Daniel Berlin <dberlin@dberlin.org> writes:
> I also can't remember why get_subr was needed, and never implemented it.
> Refresh my memory if you could, since you wrote the spec?
> :)

It's for DW_OP_call*, which the evaluator here doesn't implement.

> I've only reposted it for completeness sake, i'll include it again when i 
> revise the loc_computed patch.

Actually, let's keep this evaluator discussion separate --- we can get
this reviewed and committed without dealing with anything else.
Nothing will call it immediately, but that's fine.

> Index: dwarf2expr.h
> ===================================================================
> RCS file: dwarf2expr.h
> diff -N dwarf2expr.h
> *** /dev/null	1 Jan 1970 00:00:00 -0000
> --- dwarf2expr.h	8 Jul 2002 23:41:18 -0000
> ***************
> *** 0 ****
> --- 1,57 ----
> + #if !defined (DWARF2EXPR_H)
> + #define DWARF2EXPR_H
> + struct dwarf_expr_context
> + {
> +   /* The stack of values, allocated with xmalloc.  */
> +   CORE_ADDR *stack;
> + 
> +   /* The number of values currently pushed on the stack, and the
> +      number of elements allocated to the stack.  */
> +   int stack_len, stack_allocated;
> + 
> +   /* The size of an address, in bits.  */
> +   int addr_size;

I think the evaluator should either consistently avoid depending on
GDB's facilities, or it should just use them.  We're already using the
CORE_ADDR typedef.  The evaluator code uses extract_unsigned_integer,
which consults the current gdbarch to choose an endianness.  It's
inconsistent to have the addr_size here.  So this should probably be
deleted.

> + 
> +   /* Return the value of register number REGNUM.  */
> +   CORE_ADDR (*read_reg) (void *baton, int regnum);
> +   void *read_reg_baton;
> + 
> +   /* Return the LEN-byte value at ADDR.  */
> +   CORE_ADDR (*read_mem) (void *baton, CORE_ADDR addr, size_t len);
> +   void *read_mem_baton;
> + 
> +   /* Return the location expression for the frame base attribute.  The
> +      result must be live until the current expression evaluation is
> +      complete.  */
> +   void (*get_frame_base) (void *baton, unsigned char **begin, 
> + 			  unsigned char **end);
> +   void *get_frame_base_baton;

This should be a start and length, to be consistent with
dwarf_expr_eval, and most of the rest of GDB.

> + 
> +   /* Return the location expression for the dwarf expression
> +      subroutine in the die at OFFSET in the current compilation unit.
> +      The result must be live until the current expression evaluation
> +      is complete.  */
> +   unsigned char *(*get_subr) (void *baton, off_t offset);
> +   void *get_subr_baton;

This should probably return a start and length, too.

> + 
> +   /* Return the `object address' for DW_OP_push_object_address.  */
> +   CORE_ADDR (*get_object_address) (void *baton);
> +   void *get_object_address_baton;
> + 
> +   /* The current depth of dwarf expression recursion, via DW_OP_call*,
> +      DW_OP_fbreg, DW_OP_push_object_address, etc., and the maximum
> +      depth we'll tolerate before raising an error.  */
> +   int recursion_depth, max_recursion_depth;
> + 
> +   int in_reg;

This needs a comment.  I assume it's for location expressions.

> +   void (*error) (const char *fmt, ...);

Since we're already using CORE_ADDR, TARGET_PTR_BIT, TARGET_CHAR_BIT,
and extract_{,un}signed_integer, we might as well just use GDB's error
function, too.  So this could be deleted.

> + };
> + struct dwarf_expr_context * new_dwarf_expr_context();
> + void free_dwarf_expr_context (struct dwarf_expr_context *ctx);
> + void dwarf_expr_push (struct dwarf_expr_context *ctx, CORE_ADDR value);
> + void dwarf_expr_pop (struct dwarf_expr_context *ctx);
> + void dwarf_expr_eval (struct dwarf_expr_context *ctx, unsigned char *addr, 
> + 		      size_t len);
> + CORE_ADDR dwarf_expr_fetch (struct dwarf_expr_context *ctx, int n);

It's be nice if these had comments.  Didn't I write any in my original
spec?


> Index: dwarf2expr.c
> ===================================================================
> RCS file: dwarf2expr.c
> diff -N dwarf2expr.c
> *** /dev/null	1 Jan 1970 00:00:00 -0000
> --- dwarf2expr.c	8 Jul 2002 23:41:18 -0000
> ***************
> *** 0 ****
> --- 1,479 ----
> + 
> + #include "defs.h"
> + #include "symtab.h"
> + #include "gdbtypes.h"
> + #include "elf/dwarf2.h"
> + #include "dwarf2expr.h"
> + struct dwarf_expr_context * new_dwarf_expr_context()
> + {
> +   struct dwarf_expr_context *retval;
> +   retval = xcalloc (1, sizeof (struct dwarf_expr_context));
> +   return retval;
> + }
> + void free_dwarf_expr_context (struct dwarf_expr_context *ctx)
> + {
> +   free (ctx->stack);
> +   free (ctx);  
> + }

I think these need to be calls to xfree.  Among other things, xfree
won't mind if ctx->stack is zero.

> + static void dwarf_expr_grow_stack (struct dwarf_expr_context *ctx, size_t need)
> + {
> +   if (ctx->stack_len + need > ctx->stack_allocated)
> +     {
> +       ctx->stack = xrealloc (ctx->stack, 
> + 			     (ctx->stack_len + need) * sizeof (CORE_ADDR));
> +       ctx->stack_allocated = ctx->stack_len + need;
> +     }
> + }

Since the initial stack size is zero, this is going to reallocate the
stack on every push at first.  Allocating an initial stack, and
doubling it until big enough when we need more space would be best.

> + void dwarf_expr_push (struct dwarf_expr_context *ctx, CORE_ADDR value)
> + {
> +   dwarf_expr_grow_stack (ctx, 1);
> +   ctx->stack[ctx->stack_len++] = value;
> + }
> + void dwarf_expr_pop (struct dwarf_expr_context *ctx)
> + {
> +   ctx->stack_len--;
> + }

This should check for an empty stack, to protect GDB from broken
compilers.  I'll grant there's a lot of error checking missing from
both the STABS and Dwarf 2 readers, but we should still do it.

> + static void execute_stack_op (struct dwarf_expr_context *, 
> + 			      unsigned char *, 
> + 			      unsigned char *);
> + void dwarf_expr_eval (struct dwarf_expr_context *ctx, unsigned char *addr, size_t len)
> + {
> +   execute_stack_op (ctx, addr, addr + len);
> + }
> + 
> + CORE_ADDR dwarf_expr_fetch (struct dwarf_expr_context *ctx, int n)
> + {
> +   if (ctx->stack_len < n)
> +     (ctx->error) ("Asked for position %d of stack, stack only has %d elements on it\n", 
> + 		  n, ctx->stack_len);
> +   return ctx->stack[ctx->stack_len - (1+n)];
> + 
> + }     

This should check for underflow, too.  Look at what DW_OP_rot will do
on an empty stack.

> + /* Decode the unsigned LEB128 constant at BUF into the variable pointed to
> +    by R, and return the new value of BUF.  */
> + 
> + static unsigned char *
> + read_uleb128 (unsigned char *buf, ULONGEST *r)
> + {
> +   unsigned shift = 0;
> +   ULONGEST result = 0;
> +   
> +   while (1)
> +     {
> +       unsigned char byte = *buf++;
> +       result |= (byte & 0x7f) << shift;
> +       if ((byte & 0x80) == 0)
> + 	break;
> +       shift += 7;
> +     }
> +   *r = result;
> +   return buf;
> + }
> + 
> + /* Decode the signed LEB128 constant at BUF into the variable pointed to
> +    by R, and return the new value of BUF.  */
> + 
> + static unsigned char *
> + read_sleb128 (unsigned char *buf, LONGEST *r)
> + {
> +   unsigned shift = 0;
> +   LONGEST result = 0;
> +   unsigned char byte;
> +   
> +   while (1)
> +     {
> +       byte = *buf++;
> +       result |= (byte & 0x7f) << shift;
> +       shift += 7;
> +       if ((byte & 0x80) == 0)
> + 	break;
> +     }
> +   if (shift < (sizeof (*r) * 8) && (byte & 0x40) != 0)
> +     result |= - (1 << shift);
> +   
> +   *r = result;
> +   return buf;
> + }
> + static void
> + execute_stack_op (struct dwarf_expr_context *ctx, unsigned char *op_ptr, 
> + 		  unsigned char *op_end)
> + {
> +   while (op_ptr < op_end)
> +     {
> +       enum dwarf_location_atom op = *op_ptr++;      
> +       ULONGEST result, reg;
> +       LONGEST offset;
> +       ctx->in_reg = 0;
> +       switch (op)
> + 	{
> + 	case DW_OP_lit0:
> + 	case DW_OP_lit1:
> + 	case DW_OP_lit2:
> + 	case DW_OP_lit3:
> + 	case DW_OP_lit4:
> + 	case DW_OP_lit5:
> + 	case DW_OP_lit6:
> + 	case DW_OP_lit7:
> + 	case DW_OP_lit8:
> + 	case DW_OP_lit9:
> + 	case DW_OP_lit10:
> + 	case DW_OP_lit11:
> + 	case DW_OP_lit12:
> + 	case DW_OP_lit13:
> + 	case DW_OP_lit14:
> + 	case DW_OP_lit15:
> + 	case DW_OP_lit16:
> + 	case DW_OP_lit17:
> + 	case DW_OP_lit18:
> + 	case DW_OP_lit19:
> + 	case DW_OP_lit20:
> + 	case DW_OP_lit21:
> + 	case DW_OP_lit22:
> + 	case DW_OP_lit23:
> + 	case DW_OP_lit24:
> + 	case DW_OP_lit25:
> + 	case DW_OP_lit26:
> + 	case DW_OP_lit27:
> + 	case DW_OP_lit28:
> + 	case DW_OP_lit29:
> + 	case DW_OP_lit30:
> + 	case DW_OP_lit31:
> + 	  result = op - DW_OP_lit0;
> + 	  break;
> +           
> + 	case DW_OP_addr:
> + 	  result = (CORE_ADDR) 
> + 	    extract_unsigned_integer (op_ptr, 
> + 				      TARGET_PTR_BIT / TARGET_CHAR_BIT);
> + 	  op_ptr += TARGET_PTR_BIT / TARGET_CHAR_BIT;

I think the evaluator needs to use TARGET_ADDR_BIT throughout, not
TARGET_PTR_BIT.  On machines with separate code and data address
spaces, we may have (say) 16-bit addresses (TARGET_PTR_BIT == 16), but
use a 32-bit unified address space for linking (TARGET_ADDR_BIT ==
32).  Certainly Dwarf expressions need to work with addresses, not
pointers.

> + 	  break;
> +           
> + 	case DW_OP_const1u:
> + 	  result = extract_unsigned_integer (op_ptr, 1);
> + 	  op_ptr += 1;
> + 	  break;
> + 	case DW_OP_const1s:
> + 	  result = extract_signed_integer (op_ptr, 1);
> + 	  op_ptr += 1;
> + 	  break;
> + 	case DW_OP_const2u:
> + 	  result = extract_unsigned_integer (op_ptr, 2);
> + 	  op_ptr += 2;
> + 	  break;
> + 	case DW_OP_const2s:
> + 	  result = extract_signed_integer (op_ptr, 2);
> + 	  op_ptr += 2;
> + 	  break;
> + 	case DW_OP_const4u:
> + 	  result = extract_unsigned_integer (op_ptr, 4);
> + 	  op_ptr += 4;
> + 	  break;
> + 	case DW_OP_const4s:
> + 	  result = extract_signed_integer (op_ptr, 4);
> + 	  op_ptr += 4;
> + 	  break;
> + 	case DW_OP_const8u:
> + 	  result = extract_unsigned_integer (op_ptr, 8);
> + 	  op_ptr += 8;
> + 	  break;
> + 	case DW_OP_const8s:
> + 	  result = extract_signed_integer (op_ptr, 8);
> + 	  op_ptr += 8;
> + 	  break;
> + 	case DW_OP_constu:
> + 	  op_ptr = read_uleb128 (op_ptr, &result);
> + 	  break;
> + 	case DW_OP_consts:
> + 	  op_ptr = read_sleb128 (op_ptr, &offset);
> + 	  result = offset;
> + 	  break;
> + 	  
> + 	case DW_OP_reg0:
> + 	case DW_OP_reg1:
> + 	case DW_OP_reg2:
> + 	case DW_OP_reg3:
> + 	case DW_OP_reg4:
> + 	case DW_OP_reg5:
> + 	case DW_OP_reg6:
> + 	case DW_OP_reg7:
> + 	case DW_OP_reg8:
> + 	case DW_OP_reg9:
> + 	case DW_OP_reg10:
> + 	case DW_OP_reg11:
> + 	case DW_OP_reg12:
> + 	case DW_OP_reg13:
> + 	case DW_OP_reg14:
> + 	case DW_OP_reg15:
> + 	case DW_OP_reg16:
> + 	case DW_OP_reg17:
> + 	case DW_OP_reg18:
> + 	case DW_OP_reg19:
> + 	case DW_OP_reg20:
> + 	case DW_OP_reg21:
> + 	case DW_OP_reg22:
> + 	case DW_OP_reg23:
> + 	case DW_OP_reg24:
> + 	case DW_OP_reg25:
> + 	case DW_OP_reg26:
> + 	case DW_OP_reg27:
> + 	case DW_OP_reg28:
> + 	case DW_OP_reg29:
> + 	case DW_OP_reg30:
> + 	case DW_OP_reg31:      
> + 	  {
> + 	    result = (ctx->read_reg) (ctx->read_reg_baton, op - DW_OP_reg0);
> + 	    ctx->in_reg = 1;
> + 	  }
> + 	  break;
> + 	case DW_OP_regx:  
> + 	  {
> + 	    op_ptr = read_uleb128 (op_ptr, &reg);
> + 	    result = (ctx->read_reg) (ctx->read_reg_baton, reg);
> + 	    ctx->in_reg = 1;
> + 	  }
> + 	  break;

This isn't right.  For DW_OP_reg*, we have to indicate to the caller
that the result is in a register, and give them the register number.
This leaves the *contents* of the register on the stack, so there's no
way for the caller to get the register number.

Handling DW_OP_piece is going to be a pain.  We do need to handle it...

> + 	case DW_OP_breg0:
> + 	case DW_OP_breg1:
> + 	case DW_OP_breg2:
> + 	case DW_OP_breg3:
> + 	case DW_OP_breg4:
> + 	case DW_OP_breg5:
> + 	case DW_OP_breg6:
> + 	case DW_OP_breg7:
> + 	case DW_OP_breg8:
> + 	case DW_OP_breg9:
> + 	case DW_OP_breg10:
> + 	case DW_OP_breg11:
> + 	case DW_OP_breg12:
> + 	case DW_OP_breg13:
> + 	case DW_OP_breg14:
> + 	case DW_OP_breg15:
> + 	case DW_OP_breg16:
> + 	case DW_OP_breg17:
> + 	case DW_OP_breg18:
> + 	case DW_OP_breg19:
> + 	case DW_OP_breg20:
> + 	case DW_OP_breg21:
> + 	case DW_OP_breg22:
> + 	case DW_OP_breg23:
> + 	case DW_OP_breg24:
> + 	case DW_OP_breg25:
> + 	case DW_OP_breg26:
> + 	case DW_OP_breg27:
> + 	case DW_OP_breg28:
> + 	case DW_OP_breg29:
> + 	case DW_OP_breg30:
> + 	case DW_OP_breg31:
> + 	  {           
> + 	    op_ptr = read_sleb128 (op_ptr, &offset);
> + 	    result = (ctx->read_reg) (ctx->read_reg_baton, op - DW_OP_breg0);
> + 	    result += offset;
> + 	  }
> + 	  break;
> + 	case DW_OP_bregx:
> + 	  {
> + 	    op_ptr = read_uleb128 (op_ptr, &reg);
> + 	    op_ptr = read_sleb128 (op_ptr, &offset);
> + 	    result = (ctx->read_reg) (ctx->read_reg_baton, reg);
> + 	    result += offset;
> + 	  }
> + 	  break;
> + 	case DW_OP_fbreg:
> + 	  {
> + 	    unsigned char *datastart;
> + 	    unsigned char *dataend;
> + 	    unsigned int before_stack_len;
> + 	    
> + 	    op_ptr = read_sleb128 (op_ptr, &offset);
> + 	    /* Rather than create a whole new context, we simply
> + 	       record the stack length before execution, then reset it
> + 	       afterwards, effectively erasing whatever the recursive
> + 	       call put there.  */
> + 	    before_stack_len = ctx->stack_len;
> + 	    (ctx->get_frame_base)(ctx->get_frame_base_baton, &datastart, &dataend);
> +             dwarf_expr_eval (ctx, datastart, dataend - datastart);

Using START, LEN consistently throughout would clean up little things
like these.

> +             result = dwarf_expr_fetch (ctx, 0) + offset;
> + 	    ctx->stack_len = before_stack_len;
> + 	    ctx->in_reg = 0;
> + 	  }
> + 	  break;
> + 	case DW_OP_dup:
> + 	  result = dwarf_expr_fetch (ctx, 0);
> + 	  break;
> +           
> + 	case DW_OP_drop:
> +           dwarf_expr_pop(ctx);
> + 	  goto no_push;
> +           
> + 	case DW_OP_pick:
> + 	  offset = *op_ptr++;
> + 	  result = dwarf_expr_fetch (ctx, offset);
> + 	  break;
> +           
> + 	case DW_OP_over:
> + 	  result = dwarf_expr_fetch (ctx, 1);
> + 	  break;
> + 	  
> + 	case DW_OP_rot:
> + 	  {
> + 	    CORE_ADDR t1, t2, t3;
> +             
> + 	    if (ctx->stack_len < 3)
> + 	      (ctx->error) ("Not enough elements for DW_OP_rot. Need 3, have %d\n", 
> + 			    ctx->stack_len);
> + 	    t1 = ctx->stack[ctx->stack_len - 1];
> + 	    t2 = ctx->stack[ctx->stack_len - 2];
> + 	    t3 = ctx->stack[ctx->stack_len - 3];
> + 	    ctx->stack[ctx->stack_len - 1] = t2;
> + 	    ctx->stack[ctx->stack_len - 2] = t3;
> + 	    ctx->stack[ctx->stack_len - 3] = t1;
> + 	    goto no_push;
> + 	  }
> +           
> + 	case DW_OP_deref:
> + 	case DW_OP_deref_size:
> + 	case DW_OP_abs:
> + 	case DW_OP_neg:
> + 	case DW_OP_not:
> + 	case DW_OP_plus_uconst:
> + 	  /* Unary operations.  */
> + 	  result = dwarf_expr_fetch (ctx, 0);
> + 	  dwarf_expr_pop (ctx);
> + 
> + 	  switch (op)
> + 	    {
> + 	    case DW_OP_deref:
> + 	      {
> + 		result = (CORE_ADDR) 
> + 		  (ctx->read_mem) (ctx->read_mem_baton, 
> + 				   result, 
> + 				   TARGET_PTR_BIT / TARGET_CHAR_BIT);

Since CORE_ADDR may be wider than the target's address, I think this
should mask off and/or sign extend as appropriate, depending on the
current gdbarch.  Same anywhere we call ctx->read_mem, I think.

> + 	    case DW_OP_deref_size:
> + 	      {
> + 		result = (ctx->read_mem) (ctx->read_mem_baton, result, *op_ptr++);		
> + 	      }
> + 	      break;
> +               
> + 	    case DW_OP_abs:
> + 	      if ((signed int) result < 0)
> + 		result = -result;
> + 	      break;
> + 	    case DW_OP_neg:
> + 	      result = -result;
> + 	      break;
> + 	    case DW_OP_not:
> + 	      result = ~result;
> + 	      break;
> + 	    case DW_OP_plus_uconst:
> + 	      op_ptr = read_uleb128 (op_ptr, &reg);
> + 	      result += reg;
> + 	      break;
> + 	    }
> + 	  break;
> +           
> + 	case DW_OP_and:
> + 	case DW_OP_div:
> + 	case DW_OP_minus:
> + 	case DW_OP_mod:
> + 	case DW_OP_mul:
> + 	case DW_OP_or:
> + 	case DW_OP_plus:
> + 	case DW_OP_le:
> + 	case DW_OP_ge:
> + 	case DW_OP_eq:
> + 	case DW_OP_lt:
> + 	case DW_OP_gt:
> + 	case DW_OP_ne:
> + 	  {
> + 	    /* Binary operations.  */
> + 	    CORE_ADDR first, second;
> + 	    second = dwarf_expr_fetch (ctx, 0);
> + 	    first = dwarf_expr_fetch (ctx, 1);
> + 	    dwarf_expr_pop (ctx);
> + 	    dwarf_expr_pop (ctx);
> + 	    switch (op)
> + 	      {
> + 	      case DW_OP_and:
> + 		result = second & first;
> + 		break;
> + 	      case DW_OP_div:
> + 		result = (LONGEST)second / (LONGEST)first;
> + 		break;

This needs to mask off any bits of the CORE_ADDR above the current
target's address size.

> + 	      case DW_OP_minus:
> + 		result = second - first;
> + 		break;
> + 	      case DW_OP_mod:
> + 		result = (LONGEST)second % (LONGEST)first;
> + 		break;

Needs to mask here, too.

> + 	      case DW_OP_mul:
> + 		result = second * first;
> + 		break;
> + 	      case DW_OP_or:
> + 		result = second | first;
> + 		break;
> + 	      case DW_OP_plus:
> + 		result = second + first;
> + 		break;
> + 	      case DW_OP_shl:
> + 		result = second << first;
> + 		break;
> + 	      case DW_OP_shr:
> + 		result = second >> first;
> + 		break;
> + 	      case DW_OP_shra:
> + 		result = (LONGEST)second >> first;
> + 		break;

shr and shra need to mask.

> + 	      case DW_OP_xor:
> + 		result = second ^ first;
> + 		break;
> + 	      case DW_OP_le:
> + 		result = (LONGEST)first <= (LONGEST)second;
> + 		break;
> + 	      case DW_OP_ge:
> + 		result = (LONGEST)first >= (LONGEST)second;
> + 		break;
> + 	      case DW_OP_eq:
> + 		result = (LONGEST)first == (LONGEST)second;
> + 		break;
> + 	      case DW_OP_lt:
> + 		result = (LONGEST)first < (LONGEST)second;
> + 		break;
> + 	      case DW_OP_gt:
> + 		result = (LONGEST)first > (LONGEST)second;
> + 		break;
> + 	      case DW_OP_ne:
> + 		result = (LONGEST)first != (LONGEST)second;
> + 		break;

All the comparison operators need to mask.

> + 	      }
> + 	  }
> + 	  break;
> +           
> + 	case DW_OP_skip:
> + 	  offset = extract_signed_integer (op_ptr, 2);
> + 	  op_ptr += 2;
> + 	  op_ptr += offset;
> + 	  goto no_push;
> +           
> + 	case DW_OP_bra:
> + 	  offset = extract_signed_integer (op_ptr, 2);
> + 	  op_ptr += 2;
> + 	  if (dwarf_expr_fetch (ctx, 0) != 0)
> + 	    op_ptr += offset;
> + 	  dwarf_expr_pop (ctx);
> + 	  goto no_push;
> +           
> + 	case DW_OP_nop:
> + 	  goto no_push;
> +           
> + 	default:
> + 	  abort ();

This should be a call to internal_error; GDB doesn't generally call abort.

> +          }
> +       
> +       /* Most things push a result value.  */
> +       dwarf_expr_push (ctx, result);
> +     no_push:;
> +     }   
> + }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]