This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] Improve math benchmark infrastructure
On Monday 19 June 2017 10:22 PM, Wilco Dijkstra wrote:
> Done, see below:
>
> Improve support for math function benchmarking. This patch adds
> a feature that allows accurate benchmarking of traces extracted
> from real workloads. This is done by iterating over all samples
> rather than repeating each sample many times (which completely
> ignores branch prediction and cache effects). A trace can be
> added to existing math function inputs via
> "## name: workload-<name>", followed by the trace.
>
> OK for commit?
OK with a tiny nit fix below.
> ChangeLog:
> 2017-06-19 Wilco Dijkstra <wdijkstr@arm.com>
>
> * benchtests/README: Describe workload feature.
> * benchtests/bench-skeleton.c (main): Add support for
> benchmarking traces from workloads.
> --
> diff --git a/benchtests/README b/benchtests/README
> index 2c5f38113593ea7da90895266c8fd523fa21c5a1..67333707d5bbc2c6cf5a4de5698c18dfdf086076 100644
> --- a/benchtests/README
> +++ b/benchtests/README
> @@ -102,6 +102,12 @@ the same file by using the `name' directive that looks something like this:
> See the pow-inputs file for an example of what such a partitioned input file
> would look like.
>
> +It is also possible to measure throughput of a (partial) trace extracted from
> +a real workload. In this case the whole trace is iterated over 'iter' times
"the whole trace is iterated over multiple times"
since 'iter' is not defined here.
> +rather than repeating every input multiple times. This can be done via:
> +
> + ##name: workload-<name>
> +
> Benchmark Sets:
> ==============
>
> diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
> index 09eb78df1bce2d9f5e410e3e82821eb9b271e70d..8c98ed673c055a5cf4d774604eb7bf0a383cecb2 100644
> --- a/benchtests/bench-skeleton.c
> +++ b/benchtests/bench-skeleton.c
> @@ -68,34 +68,50 @@ main (int argc, char **argv)
> clock_gettime (CLOCK_MONOTONIC_RAW, &runtime);
> runtime.tv_sec += DURATION;
>
> + bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0;
> double d_total_i = 0;
> timing_t total = 0, max = 0, min = 0x7fffffffffffffff;
> int64_t c = 0;
> + uint64_t cur;
> while (1)
> {
> - for (i = 0; i < NUM_SAMPLES (v); i++)
> + if (is_bench)
> {
> - uint64_t cur;
> + /* Benchmark a real trace of calls - all samples are iterated
> + over once before repeating. This models actual use more
> + accurately than repeating the same sample many times. */
> TIMING_NOW (start);
> for (k = 0; k < iters; k++)
> - BENCH_FUNC (v, i);
> + for (i = 0; i < NUM_SAMPLES (v); i++)
> + BENCH_FUNC (v, i);
> TIMING_NOW (end);
> -
> TIMING_DIFF (cur, start, end);
> + TIMING_ACCUM (total, cur);
> + d_total_i += iters * NUM_SAMPLES (v);
> + }
> + else
> + for (i = 0; i < NUM_SAMPLES (v); i++)
> + {
> + TIMING_NOW (start);
> + for (k = 0; k < iters; k++)
> + BENCH_FUNC (v, i);
> + TIMING_NOW (end);
>
> - if (cur > max)
> - max = cur;
> + TIMING_DIFF (cur, start, end);
>
> - if (cur < min)
> - min = cur;
> + if (cur > max)
> + max = cur;
>
> - TIMING_ACCUM (total, cur);
> - /* Accumulate timings for the value. In the end we will divide
> - by the total iterations. */
> - RESULT_ACCUM (cur, v, i, c * iters, (c + 1) * iters);
> + if (cur < min)
> + min = cur;
>
> - d_total_i += iters;
> - }
> + TIMING_ACCUM (total, cur);
> + /* Accumulate timings for the value. In the end we will divide
> + by the total iterations. */
> + RESULT_ACCUM (cur, v, i, c * iters, (c + 1) * iters);
> +
> + d_total_i += iters;
> + }
> c++;
> struct timespec curtime;
>
> @@ -117,11 +133,18 @@ main (int argc, char **argv)
>
> json_attr_double (&json_ctx, "duration", d_total_s);
> json_attr_double (&json_ctx, "iterations", d_total_i);
> - json_attr_double (&json_ctx, "max", max / d_iters);
> - json_attr_double (&json_ctx, "min", min / d_iters);
> - json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);
> + if (is_bench)
> + {
> + json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i);
> + }
Redundant braces.
> + else
> + {
> + json_attr_double (&json_ctx, "max", max / d_iters);
> + json_attr_double (&json_ctx, "min", min / d_iters);
> + json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);
> + }
>
> - if (detailed)
> + if (detailed && !is_bench)
> {
> json_array_begin (&json_ctx, "timings");
>
>