This is the mail archive of the libc-help@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

sscanf ~1000 slower that strtol


Hi,

I have a question. I was comparing the speed of reading some ASCII
integers by different methods, and I found that using sscanf (with the
characters consumed operand (%n)) is ~1000
slower than the fastest method, strtol. Why is it so slow?

I am using version ldd (Ubuntu EGLIBC 2.15-0ubuntu10.12) 2.15.

Thanks,
-Mike
/*
 * rs.cpp
 *
 * Compare performance of reading a string of numbers by different methods.
 *
 * Compile this program with
 *
 *	# g++ rs.cpp -O3 -lrt -o rs
 */

//#include <limits>
// #include <tuple>
#include <iostream>
//#include <cassert>
//#include <random>
#include <cstdio>
//#include <algorithm>
//#include <string>
//#include <vector>
//#include <chrono>
#include <fstream>

#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <inttypes.h>
#include <unistd.h>

char fname[] = "data.txt";


/*------------------------------------------------- dtime
 * Return (t1 - t0) in seconds
 */
double dtime(struct timespec *t0, struct timespec *t1)
{
	return (t1->tv_sec - t0->tv_sec) + 1e-9*(t1->tv_nsec - t0->tv_nsec);
}

/*------------------------------------------------- main
 *
 */
int main(int argc, char *argv[])
{
	int n_int, n_exe;
	FILE *fp;
	int i, j, n, n_read, sum;
	char *s, *p, *q, s80[80];
	off_t off0;
	struct timespec tsp0, tsp1;
	double d0;

	/*
	 * parse the command line parameters
	 */
	n_exe = 1;			// not used
	n_int = 50000;
	if (argc > 1) sscanf(argv[1], "%d", &n_exe);
	if (argc > 2) sscanf(argv[2], "%d", &n_int);

	/*
	 * open a file and fill it with random numbers
	 */
	fp = fopen(fname, "w+");
	if (fp == NULL) {
		fprintf(stderr, "fopen failed\n");
		return -1;
	}
	for (i = 0; i < n_int; i++)
		fprintf(fp, "%+d\n", rand() - RAND_MAX / 2);

	/*
	 * allocate space and store the file in a buffer
	 */
	off0 = ftell(fp);
	s = (char *) malloc(off0);
	if (s == NULL) {
		fprintf(stderr, "malloc failed\n");
		return -1;
	}
	rewind(fp);
	n = fread(s, 1, (int) off0, fp);
	if (n != off0) {
		fprintf(stderr, "fread failed\n");
		return -1;
	}

	/*
	 * read the numbers with scanf from buffer
	 */
	printf("sscanf from buffer\n");
	clock_gettime(CLOCK_MONOTONIC, &tsp0);
	n_read = 0;
	sum = 0;
	n = 0;
	while (sscanf(s + n, "%d%n", &i, &j) == 1) {
		n_read++;
		sum += i;		// overflow is OK
		n += j;
	}
	clock_gettime(CLOCK_MONOTONIC, &tsp1);
	d0 = dtime(&tsp0, &tsp1);
	printf("n_read = %d, sum = %d, time = %.5lf\n", n_read, sum & 0xFFFF, d0);

	/*
	 * read the numbers with strtol from buffer
	 */
	printf("strtol from buffer\n");
	clock_gettime(CLOCK_MONOTONIC, &tsp0);
	n_read = 0;
	sum = 0;
	n = 0;
	p = s;
	for (;;) {
		i = (int) strtol(p, &q, 10);
		if (q == p) break;
		p = q;
		n_read++;
		sum += i;		// overflow is OK
	}
	clock_gettime(CLOCK_MONOTONIC, &tsp1);
	d0 = dtime(&tsp0, &tsp1);
	printf("n_read = %d, sum = %d, time = %.5lf\n", n_read, sum & 0xFFFF, d0);

	/*
	 * read the numbers with fscanf from file
	 */
	printf("fscanf from file\n");
	rewind(fp);
	clock_gettime(CLOCK_MONOTONIC, &tsp0);
	n_read = 0;
	sum = 0;
	while (fscanf(fp, "%d", &i) == 1) {
		n_read++;
		sum += i;		// overflow is OK
	}
	clock_gettime(CLOCK_MONOTONIC, &tsp1);
	d0 = dtime(&tsp0, &tsp1);
	printf("n_read = %d, sum = %d, time = %.5lf\n", n_read, sum & 0xFFFF, d0);

	/*
	 * read the numbers with fgets from file
	 */
	printf("fgets from file\n");
	rewind(fp);
	clock_gettime(CLOCK_MONOTONIC, &tsp0);
	n_read = 0;
	sum = 0;
	while (fgets(s80, sizeof(s80), fp) != NULL) {
		i = atoi(s80);
		n_read++;
		sum += i;		// overflow is OK
	}
	clock_gettime(CLOCK_MONOTONIC, &tsp1);
	d0 = dtime(&tsp0, &tsp1);
	printf("n_read = %d, sum = %d, time = %.5lf\n", n_read, sum & 0xFFFF, d0);

	/*
	 * read the numbers with C++ stream
	 */
	printf("C++ stream from file\n");
	std::ifstream infile(fname);
	std::ifstream::sync_with_stdio(false);
	clock_gettime(CLOCK_MONOTONIC, &tsp0);
	n_read = 0;
	sum = 0;
	while (infile >> i) {
		++n_read;
		sum += i;		// overflow is OK
	}
	clock_gettime(CLOCK_MONOTONIC, &tsp1);
	d0 = dtime(&tsp0, &tsp1);
	printf("n_read = %d, sum = %d, time = %.5lf\n", n_read, sum & 0xFFFF, d0);

	free(s);
	fclose(fp);
	unlink(fname);
	return 0;
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]