This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[PATCH 1/1] benchtest: Run benchmark only once for each input

From: Ashwin Sekhar T K <ashwin dot sekhar at caviumnetworks dot com>
To: libc-alpha at sourceware dot org
Cc: Ashwin Sekhar T K <ashwin dot sekhar at caviumnetworks dot com>
Date: Thu, 6 Jul 2017 01:51:52 -0700
Subject: [PATCH 1/1] benchtest: Run benchmark only once for each input
Authentication-results: sourceware.org; auth=none
Authentication-results: sourceware.org; dkim=none (message not signed) header.d=none;sourceware.org; dmarc=none action=none header.from=caviumnetworks.com;
References: <20170706085152.36466-1-ashwin.sekhar@caviumnetworks.com>
Spamdiagnosticmetadata: NSPM
Spamdiagnosticoutput: 1:99

Currently for each input in the input file, the benchtest framework
runs the microbenchmark in a loop a specific number of times before
it moves onto the next input. This has the drawback that it might
eliminate the cache effects and branch effects that we see in real
world programs.

This patch changes the framework in such a way that the microbenchmark
is run only once for each input before moving onto the next input.

	* benchtests/bench-skeleton.c: Change benchmarking logic from running
	multiple times for an input to running only once for an input.
	* benchtests/scripts/bench.py: Add variable 'runs' to args struct to
	count the total number of test runs for an input. Add macro RUNS to
	easily access this count. Modify RESULT_ACCUM macro accordingly.
---
 benchtests/bench-skeleton.c | 47 +++++++++++++++++++++------------------------
 benchtests/scripts/bench.py | 12 ++++++++++--
 2 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
index 09eb78df1b..5707f1da65 100644
--- a/benchtests/bench-skeleton.c
+++ b/benchtests/bench-skeleton.c
@@ -35,7 +35,7 @@
 int
 main (int argc, char **argv)
 {
-  unsigned long i, k;
+  unsigned long i;
   struct timespec runtime;
   timing_t start, end;
   bool detailed = false;
@@ -48,15 +48,9 @@ main (int argc, char **argv)
 
   memset (&runtime, 0, sizeof (runtime));
 
-  unsigned long iters, res;
-
 #ifdef BENCH_INIT
   BENCH_INIT ();
 #endif
-  TIMING_INIT (res);
-
-  iters = 1000 * res;
-
   json_init (&json_ctx, 2, stdout);
 
   /* Begin function.  */
@@ -68,35 +62,40 @@ main (int argc, char **argv)
       clock_gettime (CLOCK_MONOTONIC_RAW, &runtime);
       runtime.tv_sec += DURATION;
 
-      double d_total_i = 0;
-      timing_t total = 0, max = 0, min = 0x7fffffffffffffff;
-      int64_t c = 0;
+      double d_total_i = 0, d_max = 0, d_min = 0x7fffffffffffffff;
+      timing_t total = 0;
       while (1)
 	{
 	  for (i = 0; i < NUM_SAMPLES (v); i++)
 	    {
-	      uint64_t cur;
+	      uint64_t cur, iters = 0;
+	      double d_cur;
+
 	      TIMING_NOW (start);
-	      for (k = 0; k < iters; k++)
-		BENCH_FUNC (v, i);
+	    run_bench:
+	      BENCH_FUNC (v, i);
 	      TIMING_NOW (end);
-
 	      TIMING_DIFF (cur, start, end);
+	      iters++;
+
+	      /* If benchmark ran quickly than the clock resolution, re-run
+		 until it can be captured by the clock.  */
+	      if (cur == 0)
+		goto run_bench;
 
-	      if (cur > max)
-		max = cur;
+	      d_cur = (double)cur / iters;
+	      if (d_cur > d_max)
+		d_max = d_cur;
 
-	      if (cur < min)
-		min = cur;
+	      if (d_cur < d_min)
+		d_min = d_cur;
 
 	      TIMING_ACCUM (total, cur);
 	      /* Accumulate timings for the value.  In the end we will divide
 	         by the total iterations.  */
-	      RESULT_ACCUM (cur, v, i, c * iters, (c + 1) * iters);
-
+	      RESULT_ACCUM (cur, v, i, iters);
 	      d_total_i += iters;
 	    }
-	  c++;
 	  struct timespec curtime;
 
 	  memset (&curtime, 0, sizeof (curtime));
@@ -106,19 +105,17 @@ main (int argc, char **argv)
 	}
 
       double d_total_s;
-      double d_iters;
 
     done:
       d_total_s = total;
-      d_iters = iters;
 
       /* Begin variant.  */
       json_attr_object_begin (&json_ctx, VARIANT (v));
 
       json_attr_double (&json_ctx, "duration", d_total_s);
       json_attr_double (&json_ctx, "iterations", d_total_i);
-      json_attr_double (&json_ctx, "max", max / d_iters);
-      json_attr_double (&json_ctx, "min", min / d_iters);
+      json_attr_double (&json_ctx, "max", d_max);
+      json_attr_double (&json_ctx, "min", d_min);
       json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);
 
       if (detailed)
diff --git a/benchtests/scripts/bench.py b/benchtests/scripts/bench.py
index 8c1c9eeb2b..d9d27e70c7 100755
--- a/benchtests/scripts/bench.py
+++ b/benchtests/scripts/bench.py
@@ -51,6 +51,7 @@ struct args
 {
 %(args)s
   double timing;
+  int runs;
 };
 
 struct _variants
@@ -82,8 +83,15 @@ struct _variants variants[%(num_variants)d] = {
 # Epilogue for the generated source file.
 EPILOGUE = '''
 #define RESULT(__v, __i) (variants[(__v)].in[(__i)].timing)
-#define RESULT_ACCUM(r, v, i, old, new) \\
-        ((RESULT ((v), (i))) = (RESULT ((v), (i)) * (old) + (r)) / ((new) + 1))
+#define RUNS(__v, __i) (variants[(__v)].in[(__i)].runs)
+#define RESULT_ACCUM(r, v, i, c) \\
+  do \\
+  { \\
+    int old = RUNS ((v), (i)); \\
+    RESULT ((v), (i)) = (RESULT ((v), (i)) * old + (r)) / (old + c); \\
+    RUNS ((v), (i)) = old + c; \\
+  } \\
+  while (0)
 #define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j);})
 #define FUNCNAME "%(func)s"
 #include "bench-skeleton.c"'''
-- 
2.12.2

Follow-Ups:
- Re: [PATCH 1/1] benchtest: Run benchmark only once for each input
  - From: Siddhesh Poyarekar

References:
- [PATCH 0/1] benchtest: Run benchmark only once for each input
  - From: Ashwin Sekhar T K

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]