This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH v2] Use HP_TIMING for benchmarks if available
- From: Siddhesh Poyarekar <siddhesh at redhat dot com>
- To: libc-alpha at sourceware dot org
- Date: Mon, 13 May 2013 13:08:51 +0530
- Subject: [PATCH v2] Use HP_TIMING for benchmarks if available
- References: <20130415081936 dot GN9444 at spoyarek dot pnq dot redhat dot com> <20130422050122 dot GC1412 at spoyarek dot pnq dot redhat dot com>
Hi,
Here's an updated patch to use HP_TIMING to measure performance in
benchmarks. clock_gettime is still kept as a fallback when
!HP_TIMING_AVAIL. Additionally, I've also added support to override
HP_TIMING to use clock_gettime by executing:
make USE_CLOCK_GETTIME=1 bench
One would need a 'make bench-clean' to ensure that the sources are
rebuilt whenever one needs to switch between clock_gettime and
HP_TIMING.
Siddhesh
* benchtests/Makefile (CPPFLAGS-nonlib): Add
-DUSE_CLOCK_GETTIME if USE_CLOCK_GETTIME is defined.
(bench-deps): Add bench-timing.h.
* benchtests-bench-skeleton.c: Include bench-timing.h.
(main): Use TIMING_* macros instead of clock_gettime.
* benchtests/bench-timing.h: New file.
diff --git a/benchtests/Makefile b/benchtests/Makefile
index 19e1be6..9cc42b4 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -86,13 +86,17 @@ endif
CPPFLAGS-nonlib = -DDURATION=$(BENCH_DURATION)
+ifdef USE_CLOCK_GETTIME
+CPPFLAGS-nonlib += -DUSE_CLOCK_GETTIME
+endif
+
# This makes sure CPPFLAGS-nonlib and CFLAGS-nonlib are passed
# for all these modules.
cpp-srcs-left := $(binaries-bench:=.c)
lib := nonlib
include $(patsubst %,$(..)cppflags-iterator.mk,$(cpp-srcs-left))
-bench-deps := bench-skeleton.c Makefile
+bench-deps := bench-skeleton.c bench-timing.h Makefile
run-bench = $(test-wrapper-env) \
GCONV_PATH=$(common-objpfx)iconvdata LC_ALL=C \
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
index 404900b..f6af846 100644
--- a/benchtests/bench-skeleton.c
+++ b/benchtests/bench-skeleton.c
@@ -21,6 +21,7 @@
#include <stdio.h>
#include <time.h>
#include <inttypes.h>
+#include "bench-timing.h"
volatile unsigned int dontoptimize = 0;
@@ -45,7 +46,8 @@ int
main (int argc, char **argv)
{
unsigned long i, k;
- struct timespec start, end, runtime;
+ struct timespec runtime;
+ timing_t start, end;
startup();
@@ -53,13 +55,9 @@ main (int argc, char **argv)
memset (&start, 0, sizeof (start));
memset (&end, 0, sizeof (end));
- clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start);
+ unsigned long iters;
- /* Measure 1000 times the resolution of the clock. So for a 1ns resolution
- clock, we measure 1000 iterations of the function call at a time.
- Measurements close to the minimum clock resolution won't make much sense,
- but it's better than having nothing at all. */
- unsigned long iters = 1000 * start.tv_nsec;
+ TIMING_INIT (iters);
for (int v = 0; v < NUM_VARIANTS; v++)
{
@@ -68,19 +66,18 @@ main (int argc, char **argv)
runtime.tv_sec += DURATION;
double d_total_i = 0;
- uint64_t total = 0, max = 0, min = 0x7fffffffffffffff;
+ timing_t total = 0, max = 0, min = 0x7fffffffffffffff;
while (1)
{
for (i = 0; i < NUM_SAMPLES (v); i++)
{
- clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &start);
+ uint64_t cur;
+ TIMING_NOW (start);
for (k = 0; k < iters; k++)
BENCH_FUNC (v, i);
- clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &end);
+ TIMING_NOW (end);
- uint64_t cur = (end.tv_nsec - start.tv_nsec
- + ((end.tv_sec - start.tv_sec)
- * (uint64_t) 1000000000));
+ TIMING_DIFF (cur, start, end);
if (cur > max)
max = cur;
@@ -88,7 +85,7 @@ main (int argc, char **argv)
if (cur < min)
min = cur;
- total += cur;
+ TIMING_ACCUM (total, cur);
d_total_i += iters;
}
@@ -104,13 +101,11 @@ main (int argc, char **argv)
double d_iters;
done:
- d_total_s = total * 1e-9;
+ d_total_s = total;
d_iters = iters;
- printf ("%s: ITERS:%g: TOTAL:%gs, MAX:%gns, MIN:%gns, %g iter/s\n",
- VARIANT (v),
- d_total_i, d_total_s, max / d_iters, min / d_iters,
- d_total_i / d_total_s);
+ TIMING_PRINT_STATS (VARIANT (v), d_total_s, d_iters, d_total_i, max,
+ min);
}
return 0;
diff --git a/benchtests/bench-timing.h b/benchtests/bench-timing.h
new file mode 100644
index 0000000..264d4b8
--- /dev/null
+++ b/benchtests/bench-timing.h
@@ -0,0 +1,72 @@
+/* Define timing macros.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <hp-timing.h>
+#include <stdint.h>
+
+#if HP_TIMING_AVAIL && !defined USE_CLOCK_GETTIME
+# define GL(x) _##x
+# define GLRO(x) _##x
+hp_timing_t _dl_hp_timing_overhead;
+typedef hp_timing_t timing_t;
+
+# define TIMING_INIT(iters) \
+({ \
+ HP_TIMING_DIFF_INIT(); \
+ (iters) = 1000; \
+})
+
+# define TIMING_NOW(var) HP_TIMING_NOW (var)
+# define TIMING_DIFF(diff, start, end) HP_TIMING_DIFF ((diff), (start), (end))
+# define TIMING_ACCUM(sum, diff) HP_TIMING_ACCUM_NT ((sum), (diff))
+
+# define TIMING_PRINT_STATS(func, d_total_s, d_iters, d_total_i, max, min) \
+ printf ("%s: ITERS:%g: TOTAL:%gMcy, MAX:%gcy, MIN:%gcy, %g calls/Mcy\n", \
+ (func), (d_total_i), (d_total_s) * 1e-6, (max) / (d_iters), \
+ (min) / (d_iters), 1e6 * (d_total_i) / (d_total_s));
+
+#else
+typedef uint64_t timing_t;
+
+/* Measure 1000 times the resolution of the clock. So for a 1ns
+ resolution clock, we measure 1000 iterations of the function call at a
+ time. Measurements close to the minimum clock resolution won't make
+ much sense, but it's better than having nothing at all. */
+# define TIMING_INIT(iters) \
+({ \
+ struct timespec start; \
+ clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start); \
+ (iters) = 1000 * start.tv_nsec; \
+})
+
+# define TIMING_NOW(var) \
+({ \
+ struct timespec tv; \
+ clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &tv); \
+ (var) = (uint64_t) (tv.tv_nsec + (uint64_t) 1000000000 * tv.tv_sec); \
+})
+
+# define TIMING_DIFF(diff, start, end) (diff) = (end) - (start)
+# define TIMING_ACCUM(sum, diff) (sum) += (diff)
+
+# define TIMING_PRINT_STATS(func, d_total_s, d_iters, d_total_i, max, min) \
+ printf ("%s: ITERS:%g: TOTAL:%gs, MAX:%gns, MIN:%gns, %g iter/s\n", \
+ (func), (d_total_i), (d_total_s) * 1e-9, (max) / (d_iters), \
+ (min) / (d_iters), 1e9 * (d_total_i) / (d_total_s))
+
+#endif