This is the mail archive of the gdb-patches@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 2/2] Read memory in multiple lines in dcache_xfer_memory.


Hi, this is an optimization to dcache reading contents from target
memory.  Nowadays, when GDB requests to read target memory and
requests go through dcache, dcache will read one cache line in on
time, regardless the size of the requested data.  If GDB read a large
amount of data from target, dcache will read multiple times from
target memory (read in one cache line per time).  In remote debugging,
it means multiple RSP packets to transfer memory from GDBserver, which
is slow.

This patch is to teach dcache to read continuous target memory as much
as possible in one time, and update the multiple cache lines when the
contents are read in.  It can be done by several steps:

 1.  When GDB requests to read data [memaddr, memaddr + len), a
collection of ranges is created to record readable ranges, because
some memory may be marked as write-only.
 2.  Then, we'll check the cache state of these readable ranges.  Some
of them are cached, and some are not.  We record the uncached ranges.
 3.  Iterate the collection of uncached ranges, and issue target_read
to read these uncached ranges from the target memory and update cache
lines.  For cached ranges, read from cache lines directly.

I am using a perf test case backtrace to measure the speed-up of this
patch.  Every time, 'set dcache line-size N' and
'set dcache size 4096 * 64 / N', to make sure the total size of dcache
is unchanged.

With this patch, the number of 'm' RSP packet is reduced
dramatically:

cache line size:	Original        Patched
2			4657894         31224
4       		2317896         28616
8			158948          21462
16			579474          14308
32			293314          14308
64			150234          14308
128			78694           10738
256			42938           8960
512			25046           8064
1024			16100           7616
2048			9184            7392

Performance comparison:

                        cache line size  Patched Original
backtrace        cpu_time        2       4.44    33.83
backtrace        cpu_time        4       3.88    14.27
backtrace        cpu_time        8       3.1     7.92
backtrace        cpu_time        16      2.48    4.79
backtrace        cpu_time        32      2.25    2.51
backtrace        cpu_time        64      1.16    1.93
backtrace        cpu_time        128     1.02    1.69
backtrace        cpu_time        256     1.06    1.37
backtrace        cpu_time        512     1.11    1.17
backtrace        cpu_time        1024    1.1     1.22
backtrace        cpu_time        2048    1.13    1.17
backtrace        wall_time       2       5.49653506279   74.0839848518
backtrace        wall_time       4       4.70916986465   29.94830513
backtrace        wall_time       8       4.11279582977   15.6743021011
backtrace        wall_time       16      3.68633985519   8.83114910126
backtrace        wall_time       32      3.63511800766   5.79059791565
backtrace        wall_time       64      1.61371517181   3.67003703117
backtrace        wall_time       128     1.50599694252   2.60381913185
backtrace        wall_time       256     1.47533297539   2.05611109734
backtrace        wall_time       512     1.48193001747   1.80505800247
backtrace        wall_time       1024    1.50955080986   1.69646501541
backtrace        wall_time       2048    1.54235315323   1.61461496353
backtrace        vmsize          2       104568          104576
backtrace        vmsize          4       100556          102388
backtrace        vmsize          8       95384   97540
backtrace        vmsize          16      94092   94092
backtrace        vmsize          32      93348   93276
backtrace        vmsize          64      93148   92928
backtrace        vmsize          128     93148   93100
backtrace        vmsize          256     93148   93100
backtrace        vmsize          512     93148   93100
backtrace        vmsize          1024    93148   93100
backtrace        vmsize          2048    93148   93100

gdb:

2013-10-18  Yao Qi  <yao@codesourcery.com>

	* dcache.c: Include "memrange.h".
	Update comments.
	(dcache_read_line): Remove.
	(dcache_peek_byte): Remove.
	(dcache_ranges_readable): New function.
	(dcache_ranges_uncached): New function.
	(dcache_xfer_memory): Read multiple cache lines from target
	memory in one time.
---
 gdb/dcache.c |  331 ++++++++++++++++++++++++++++++++++++++++++---------------
 1 files changed, 244 insertions(+), 87 deletions(-)

diff --git a/gdb/dcache.c b/gdb/dcache.c
index 316f3dd..65bbad1 100644
--- a/gdb/dcache.c
+++ b/gdb/dcache.c
@@ -25,6 +25,7 @@
 #include "target.h"
 #include "inferior.h"
 #include "splay-tree.h"
+#include "memrange.h"
 
 /* Commands with a prefix of `{set,show} dcache'.  */
 static struct cmd_list_element *dcache_set_list = NULL;
@@ -60,8 +61,8 @@ static struct cmd_list_element *dcache_show_list = NULL;
 /* NOTE: Interaction of dcache and memory region attributes
 
    As there is no requirement that memory region attributes be aligned
-   to or be a multiple of the dcache page size, dcache_read_line() and
-   dcache_write_line() must break up the page by memory region.  If a
+   to or be a multiple of the dcache page size, dcache_xfer_memory must
+   break up the page by memory region.  If a
    chunk does not have the cache attribute set, an invalid memory type
    is set, etc., then the chunk is skipped.  Those chunks are handled
    in target_xfer_memory() (or target_xfer_memory_partial()).
@@ -122,8 +123,6 @@ typedef void (block_func) (struct dcache_block *block, void *param);
 
 static struct dcache_block *dcache_hit (DCACHE *dcache, CORE_ADDR addr);
 
-static int dcache_read_line (DCACHE *dcache, struct dcache_block *db);
-
 static struct dcache_block *dcache_alloc (DCACHE *dcache, CORE_ADDR addr);
 
 static void dcache_info (char *exp, int tty);
@@ -305,56 +304,6 @@ dcache_hit (DCACHE *dcache, CORE_ADDR addr)
   return db;
 }
 
-/* Fill a cache line from target memory.
-   The result is 1 for success, 0 if the (entire) cache line
-   wasn't readable.  */
-
-static int
-dcache_read_line (DCACHE *dcache, struct dcache_block *db)
-{
-  CORE_ADDR memaddr;
-  gdb_byte *myaddr;
-  int len;
-  int res;
-  int reg_len;
-  struct mem_region *region;
-
-  len = dcache->line_size;
-  memaddr = db->addr;
-  myaddr  = db->data;
-
-  while (len > 0)
-    {
-      /* Don't overrun if this block is right at the end of the region.  */
-      region = lookup_mem_region (memaddr);
-      if (region->hi == 0 || memaddr + len < region->hi)
-	reg_len = len;
-      else
-	reg_len = region->hi - memaddr;
-
-      /* Skip non-readable regions.  The cache attribute can be ignored,
-         since we may be loading this for a stack access.  */
-      if (region->attrib.mode == MEM_WO)
-	{
-	  memaddr += reg_len;
-	  myaddr  += reg_len;
-	  len     -= reg_len;
-	  continue;
-	}
-      
-      res = target_read (&current_target, TARGET_OBJECT_RAW_MEMORY,
-			 NULL, myaddr, memaddr, reg_len);
-      if (res < reg_len)
-	return 0;
-
-      memaddr += res;
-      myaddr += res;
-      len -= res;
-    }
-
-  return 1;
-}
-
 /* Get a free cache block, put or keep it on the valid list,
    and return its address.  */
 
@@ -395,28 +344,6 @@ dcache_alloc (DCACHE *dcache, CORE_ADDR addr)
   return db;
 }
 
-/* Using the data cache DCACHE, store in *PTR the contents of the byte at
-   address ADDR in the remote machine.  
-
-   Returns 1 for success, 0 for error.  */
-
-static int
-dcache_peek_byte (DCACHE *dcache, CORE_ADDR addr, gdb_byte *ptr)
-{
-  struct dcache_block *db = dcache_hit (dcache, addr);
-
-  if (!db)
-    {
-      db = dcache_alloc (dcache, addr);
-
-      if (!dcache_read_line (dcache, db))
-         return 0;
-    }
-
-  *ptr = db->data[XFORM (dcache, addr)];
-  return 1;
-}
-
 /* Write the byte at PTR into ADDR in the data cache.
 
    The caller is responsible for also promptly writing the data
@@ -473,6 +400,105 @@ dcache_init (void)
   return dcache;
 }
 
+/* Check the readability of memory range [MEMORY, MEMORY + LEN) and
+   return the readable ranges and caller is responsible to release it.  */
+
+static VEC(mem_range_s) *
+dcache_ranges_readable (CORE_ADDR memaddr, int len)
+{
+  VEC(mem_range_s) *readable_memory = NULL;
+
+  while (len > 0)
+    {
+      struct mem_range *r;
+      int reg_len;
+      /* Don't overrun if this block is right at the end of the region.  */
+      struct mem_region *region = lookup_mem_region (memaddr);
+
+      if (region->hi == 0 || memaddr + len < region->hi)
+	reg_len = len;
+      else
+	reg_len = region->hi - memaddr;
+
+      /* Skip non-readable regions.  The cache attribute can be ignored,
+	 since we may be loading this for a stack access.  */
+      if (region->attrib.mode == MEM_WO)
+	{
+	  memaddr += reg_len;
+	  len -= reg_len;
+	  continue;
+	}
+
+      r = VEC_safe_push (mem_range_s, readable_memory, NULL);
+      r->start = memaddr;
+      r->length = reg_len;
+
+      memaddr += reg_len;
+      len -= reg_len;
+    }
+
+  return readable_memory;
+}
+
+/* Return the uncached ranges from RANGES.   */
+
+static VEC(mem_range_s) *
+dcache_ranges_uncached (DCACHE *dcache, VEC(mem_range_s) *ranges)
+{
+  int b;
+  struct mem_range *rb;
+  VEC(mem_range_s) *uncached = NULL;
+
+  for (b = 0; VEC_iterate (mem_range_s, ranges, b, rb); b++)
+    {
+      CORE_ADDR memaddr_start = rb->start;
+      CORE_ADDR memaddr_end = rb->start;
+
+      while (memaddr_end < rb->start + rb->length)
+	{
+	  struct dcache_block *db = dcache_hit (dcache, memaddr_end);
+
+	  if (db != NULL)
+	    {
+	      /* Set MEMADDR_END to the start address of this cache line.  */
+	      memaddr_end = align_down (memaddr_end, dcache->line_size);
+
+	      if (memaddr_end > memaddr_start)
+		{
+		  struct mem_range *r;
+
+		  r = VEC_safe_push (mem_range_s, uncached, NULL);
+		  r->start = memaddr_start;
+		  r->length = memaddr_end - memaddr_start;
+		}
+	    }
+
+	  /* Increase memaddr_end to a dcache->line_size-aligned value.  */
+	  if (memaddr_end < align_up (memaddr_end, dcache->line_size))
+	    memaddr_end = align_up (memaddr_end, dcache->line_size);
+	  else
+	    memaddr_end += dcache->line_size;
+
+	  if (db != NULL)
+	    memaddr_start = memaddr_end;
+	}
+
+      if (memaddr_end > rb->start + rb->length)
+	memaddr_end = rb->start + rb->length;
+
+      if (memaddr_start < memaddr_end)
+	{
+	  struct mem_range *r;
+
+	  r = VEC_safe_push (mem_range_s, uncached, NULL);
+
+	  r->start = memaddr_start;
+	  r->length = memaddr_end - memaddr_start;
+	}
+    }
+
+  return uncached;
+}
 
 /* Read or write LEN bytes from inferior memory at MEMADDR, transferring
    to or from debugger address MYADDR.  Write to inferior if SHOULD_WRITE is
@@ -489,9 +515,6 @@ dcache_xfer_memory (struct target_ops *ops, DCACHE *dcache,
 		    CORE_ADDR memaddr, gdb_byte *myaddr,
 		    int len, int should_write)
 {
-  int i;
-  int res;
-
   /* If this is a different inferior from what we've recorded,
      flush the cache.  */
 
@@ -506,8 +529,10 @@ dcache_xfer_memory (struct target_ops *ops, DCACHE *dcache,
 
   if (should_write)
     {
-      res = target_write (ops, TARGET_OBJECT_RAW_MEMORY,
-			  NULL, myaddr, memaddr, len);
+      int res = target_write (ops, TARGET_OBJECT_RAW_MEMORY,
+			      NULL, myaddr, memaddr, len);
+      int i;
+
       if (res <= 0)
 	return res;
       /* Update LEN to what was actually written.  */
@@ -527,16 +552,148 @@ dcache_xfer_memory (struct target_ops *ops, DCACHE *dcache,
     }
   else
     {
-      for (i = 0; i < len; i++)
+      int i;
+      struct mem_range *r;
+      /* The starting address of each cached range.  */
+      CORE_ADDR cached_addr = memaddr;
+
+      VEC(mem_range_s) *memory;
+      VEC(mem_range_s) *uncached = NULL;
+
+      /* Find readable ranges in range [MEMADDR, MEMADDR + LEN),
+	 supposing write-only regions are wo1 and wo2.  Then,
+	 readable ranges are r1, r2 and r3.
+
+	 MEMADDR                               MEMADDR + LEN
+	 |<------------------------------------------------->|
+		|<-- wo1 -->|        |<-- wo2 -->|
+
+	 |<-r1->|           |<--r2-->|           |<---r3---->|  */
+      memory = dcache_ranges_readable (memaddr, len);
+
+      /* GDB will read from these three readable ranges, r1, r2 and r3.
+	 GDB has to check the corresponding cache lines' state (cached
+	 or uncached) to determine whether to read from the target
+	 memory or the cache lines.
+
+	 MEMADDR                               MEMADDR + LEN
+	 |<------------------------------------------------->|
+		|<-- wo1 -->|        |<-- wo2 -->|
+
+	 |<-r1->|           |<--r2-->|           |<---r3---->|
+
+	 -u-|-----c----|-----u----|-----c----|-----c----|--u--
+	 'u' stands for unchaced 'c' stands for cached.
+
+	 |u1|-c1-|          |  u2 |c2|           |--c3--| u3  |
+
+	 Uncached ranges are u1, u2 and u3, and cached ranges are c1,
+	 c2 and c3.  */
+      uncached = dcache_ranges_uncached (dcache, memory);
+
+      VEC_free (mem_range_s, memory);
+
+      /* Iterate each uncached range.  Read memory from cache lines if
+	 memory address is not within the uncached range, otherwise, read
+	 from the target memory and update corresponding cache lines.  */
+
+      for (i = 0; VEC_iterate (mem_range_s, uncached, i, r); i++)
 	{
-	  if (!dcache_peek_byte (dcache, memaddr + i, myaddr + i))
+	  int j;
+
+	  if (cached_addr < r->start)
 	    {
-	      /* That failed.  Discard its cache line so we don't have a
-		 partially read line.  */
-	      dcache_invalidate_line (dcache, memaddr + i);
-	      return i;
+	      /* Read memory [cached_addr, MIN (r->start, MEMADDR + LEN))
+		 from cache lines.  */
+
+	      for (; cached_addr < r->start && cached_addr < (memaddr + len);
+		   cached_addr++)
+		{
+		  struct dcache_block *db = dcache_hit (dcache, cached_addr);
+
+		  gdb_assert (db != NULL);
+
+		  myaddr[cached_addr - memaddr]
+		    = db->data[XFORM (dcache, cached_addr)];
+		}
+	    }
+	  cached_addr = r->start + r->length;
+
+	  /* Part of the memory range [MEMADDR, MEMADDR + LEN) is
+	     not cached.  */
+	  if (r->start < len + memaddr)
+	    {
+	      /* MEMADDR_START and MEMADDR_END are aligned on
+		 dcache->line_size, because dcache->line_size is the
+		 minimal unit to update cache and fetch from the target
+		 memory.  */
+	      CORE_ADDR memaddr_start
+		= align_down (r->start, dcache->line_size);
+	      CORE_ADDR memaddr_end
+		= align_up (r->start + r->length, dcache->line_size);
+	      int res;
+	      int len1 = memaddr_end - memaddr_start;
+	      int len2;
+	      gdb_byte *buf = xmalloc (len1);
+
+	      /* Read multiple cache lines to cover memory range
+		 [r->start, r->start + MIN (r->length,
+		 LEN + MEMADDR - r->start)) from target.  */
+
+	      res = target_read (&current_target, TARGET_OBJECT_RAW_MEMORY,
+				 NULL, buf, memaddr_start, len1);
+
+	      if (res == -1)
+		{
+		  VEC_free (mem_range_s, uncached);
+		  xfree (buf);
+		  return r->start - memaddr;
+		}
+
+	      /* Copy contents to MYADDR.  */
+	      len2 = r->length;
+	      if (len2 > len + memaddr - r->start)
+		len2 = len + memaddr - r->start;
+
+	      memcpy ((r->start - memaddr) + myaddr,
+		      buf + (r->start - memaddr_start),
+		      len2);
+
+	      /* Update cache lines in range
+		 [MEMADDR_START, MEMADDR_START + LEN1).  */
+	      for (j = 0; j < (len1 / dcache->line_size); j++)
+		{
+		  struct dcache_block *db
+		    = dcache_hit (dcache, memaddr_start + j * dcache->line_size);
+
+		  gdb_assert (db == NULL);
+
+		  db = dcache_alloc (dcache, memaddr_start + j * dcache->line_size);
+
+		  memcpy (db->data, &buf[j * dcache->line_size], dcache->line_size);
+		}
+
+	      xfree (buf);
+
+	      if (res < len1)
+		{
+		  VEC_free (mem_range_s, uncached);
+		  return r->start - memaddr + res;
+		}
 	    }
 	}
+
+      VEC_free (mem_range_s, uncached);
+
+      for (; cached_addr < (memaddr + len); cached_addr++)
+	{
+	  struct dcache_block *db = dcache_hit (dcache, cached_addr);
+
+	  gdb_assert (db != NULL);
+
+	  myaddr[cached_addr - memaddr]
+	    = db->data[XFORM (dcache, cached_addr)];
+	}
     }
 
   return len;
-- 
1.7.7.6


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]