This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Use HP_TIMING for benchmarks if available


Ping!

On Mon, Apr 15, 2013 at 01:49:36PM +0530, Siddhesh Poyarekar wrote:
> Hi,
> 
> Here's a patch that adds support for using HP_TIMING in benchmark
> measurements when it is available.  clock_gettime is still kept as a
> fallback when !HP_TIMING_AVAIL.  Additionally, I've also added support
> to override HP_TIMING to use clock_gettime by executing:
> 
> make USE_CLOCK_GETTIME=1 bench
> 
> One would need a 'make bench-clean' to ensure that the sources are
> rebuilt whenever one needs to switch between clock_gettime and
> HP_TIMING.  Another easy way is to just touch benchtests/Makefile.
> 
> I have verified that the measurements on x6_64 are consistent over
> multiple runs, so my concern of scheduler overhead causing jitters was
> unfounded.  Jitter due to high system load is unavoidable even with
> clock_gettime since (as Rich Felker pointed out in an earlier
> discussion) competition for cache will still affect the performance
> numbers.
> 
> Siddhesh
> 
> 	* Makeconfig (cflags): Add bench-cflags.
> 	* benchtests/Makefile: Define bench-cflags if
> 	USE_CLOCK_GETTIME is defined.
> 	* benchtests-bench-skeleton.c: Include bench-timing.h.
> 	(main): Use TIMING_* macros instead of clock_gettime.
> 	* benchtests/bench-timing.h: New file.
> 
> diff --git a/Makeconfig b/Makeconfig
> index a3d3e70..a83485f 100644
> --- a/Makeconfig
> +++ b/Makeconfig
> @@ -755,6 +755,9 @@ ifeq	"$(strip $(+cflags))" ""
>  +cflags	:= $(default_cflags)
>  endif	# $(+cflags) == ""
>  
> +# Add common benchmark CFLAGS
> ++cflags += $(bench-cflags)
> +
>  +cflags += $(cflags-cpu) $(+gccwarn) $(+merge-constants) $(+math-flags)
>  +gcc-nowarn := -w
>  
> diff --git a/benchtests/Makefile b/benchtests/Makefile
> index 3e794d7..d330abb 100644
> --- a/benchtests/Makefile
> +++ b/benchtests/Makefile
> @@ -109,12 +109,16 @@ LDFLAGS-bench-slowatan = -lm
>  # Rules to build and execute the benchmarks.  Do not put any benchmark
>  # parameters beyond this point.
>  
> +ifdef USE_CLOCK_GETTIME
> +bench-cflags := -DUSE_CLOCK_GETTIME
> +endif
> +
>  include ../Makeconfig
>  include ../Rules
>  
>  binaries-bench := $(addprefix $(objpfx)bench-,$(bench))
>  
> -bench-deps := bench-skeleton.c Makefile
> +bench-deps := bench-skeleton.c bench-timing.h Makefile
>  
>  run-bench = $(test-wrapper-env) \
>  	    GCONV_PATH=$(common-objpfx)iconvdata LC_ALL=C \
> diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
> index 13f986d..99a316e 100644
> --- a/benchtests/bench-skeleton.c
> +++ b/benchtests/bench-skeleton.c
> @@ -17,59 +17,53 @@
>     <http://www.gnu.org/licenses/>.  */
>  
>  #include <string.h>
> -#include <stdint.h>
>  #include <stdio.h>
>  #include <time.h>
>  #include <inttypes.h>
> +#include "bench-timing.h"
>  
>  int
>  main (int argc, char **argv)
>  {
>    unsigned long i, j, k;
> -  uint64_t total = 0, max = 0, min = 0x7fffffffffffffff;
> -  struct timespec start, end;
> +  timing_t total = 0, max = 0, min = 0x7fffffffffffffff;
> +  timing_t start, end;
>  
>    memset (&start, 0, sizeof (start));
>    memset (&end, 0, sizeof (end));
>  
> -  clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start);
> +  unsigned long iters;
> +
> +  TIMING_INIT (iters);
>  
> -  /* Measure 1000 times the resolution of the clock.  So for a 1ns resolution
> -     clock, we measure 1000 iterations of the function call at a time.
> -     Measurements close to the minimum clock resolution won't make much sense,
> -     but it's better than having nothing at all.  */
> -  unsigned long iters = 1000 * start.tv_nsec;
>    unsigned long total_iters = ITER / iters;
>  
>    for (i = 0; i < NUM_SAMPLES; i++)
>      {
>        for (j = 0; j < total_iters; j ++)
>  	{
> -	  clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &start);
> +	  int64_t cur;
> +
> +	  TIMING_NOW (start);
>  	  for (k = 0; k < iters; k++)
>  	    BENCH_FUNC(i);
> -	  clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &end);
> -
> -	  uint64_t cur = (end.tv_nsec - start.tv_nsec
> -			 + ((end.tv_sec - start.tv_sec)
> -			    * (uint64_t) 1000000000));
> +	  TIMING_NOW (end);
>  
> +	  TIMING_DIFF (cur, start, end);
>  	  if (cur > max)
>  	    max = cur;
>  
>  	  if (cur < min)
>  	    min = cur;
>  
> -	  total += cur;
> +	  TIMING_ACCUM (total, cur);
>  	}
>      }
>  
> -  double d_total_s = total * 1e-9;
> +  double d_total_s = total;
>    double d_iters = iters;
>    double d_total_i = (double)ITER * NUM_SAMPLES;
> -  printf (FUNCNAME ": ITERS:%g: TOTAL:%gs, MAX:%gns, MIN:%gns, %g iter/s\n",
> -	  d_total_i, d_total_s, max / d_iters, min / d_iters,
> -	  d_total_i / d_total_s);
> +  TIMING_PRINT_STATS (d_total_s, d_iters, d_total_i, max, min);
>  
>    return 0;
>  }
> diff --git a/benchtests/bench-timing.h b/benchtests/bench-timing.h
> new file mode 100644
> index 0000000..e67a88d
> --- /dev/null
> +++ b/benchtests/bench-timing.h
> @@ -0,0 +1,72 @@
> +/* Define timing macros.
> +   Copyright (C) 2013 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <hp-timing.h>
> +#include <stdint.h>
> +
> +#if HP_TIMING_AVAIL && !defined USE_CLOCK_GETTIME
> +# define GL(x) _##x
> +# define GLRO(x) _##x
> +hp_timing_t _dl_hp_timing_overhead;
> +typedef hp_timing_t timing_t;
> +
> +# define TIMING_INIT(iters) \
> +({									      \
> +  HP_TIMING_DIFF_INIT();						      \
> +  (iters) = 1000;							      \
> +})
> +
> +# define TIMING_NOW(var) HP_TIMING_NOW (var)
> +# define TIMING_DIFF(diff, start, end) HP_TIMING_DIFF ((diff), (start), (end))
> +# define TIMING_ACCUM(sum, diff) HP_TIMING_ACCUM_NT ((sum), (diff))
> +
> +# define TIMING_PRINT_STATS(d_total_s, d_iters, d_total_i, max, min) \
> +  printf (FUNCNAME ": ITERS:%g: TOTAL:%gC, MAX:%gC, MIN:%gC, %g calls/MC\n",  \
> +	  (d_total_i), (d_total_s), (max) / (d_iters), (min) / (d_iters),     \
> +	  1e6 * (d_total_i) / (d_total_s));
> +
> +#else
> +typedef uint64_t timing_t;
> +
> +/* Measure 1000 times the resolution of the clock.  So for a 1ns
> +   resolution  clock, we measure 1000 iterations of the function call at a
> +   time.  Measurements close to the minimum clock resolution won't make
> +   much sense, but it's better than having nothing at all.  */
> +# define TIMING_INIT(iters) \
> +({									      \
> +  struct timespec start;						      \
> +  clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start);			      \
> +  (iters) = 1000 * start.tv_nsec;					      \
> +})
> +
> +# define TIMING_NOW(var) \
> +({									      \
> +  struct timespec tv;							      \
> +  clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &tv);			      \
> +  (var) = (uint64_t) (tv.tv_nsec + (uint64_t) 1000000000 * tv.tv_sec);	      \
> +})
> +
> +# define TIMING_DIFF(diff, start, end) (diff) = (end) - (start)
> +# define TIMING_ACCUM(sum, diff) (sum) += (diff)
> +
> +# define TIMING_PRINT_STATS(d_total_s, d_iters, d_total_i, max, min) \
> +  printf (FUNCNAME ": ITERS:%g: TOTAL:%gs, MAX:%gs, MIN:%gs, %g iter/s\n",    \
> +	  (d_total_i), (d_total_s) * 1e9, (max) / (d_iters),		      \
> +	  (min) / (d_iters), (d_total_i) / ((d_total_s) * 1e9))
> +
> +#endif


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]