This is the mail archive of the
libc-help@sourceware.org
mailing list for the glibc project.
sscanf ~1000 slower that strtol
- From: Michael Williamson <michael dot h dot williamson at gmail dot com>
- To: libc-help at sourceware dot org
- Date: Wed, 1 Jun 2016 10:50:16 -0700
- Subject: sscanf ~1000 slower that strtol
- Authentication-results: sourceware.org; auth=none
Hi,
I have a question. I was comparing the speed of reading some ASCII
integers by different methods, and I found that using sscanf (with the
characters consumed operand (%n)) is ~1000
slower than the fastest method, strtol. Why is it so slow?
I am using version ldd (Ubuntu EGLIBC 2.15-0ubuntu10.12) 2.15.
Thanks,
-Mike
/*
* rs.cpp
*
* Compare performance of reading a string of numbers by different methods.
*
* Compile this program with
*
* # g++ rs.cpp -O3 -lrt -o rs
*/
//#include <limits>
// #include <tuple>
#include <iostream>
//#include <cassert>
//#include <random>
#include <cstdio>
//#include <algorithm>
//#include <string>
//#include <vector>
//#include <chrono>
#include <fstream>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <inttypes.h>
#include <unistd.h>
char fname[] = "data.txt";
/*------------------------------------------------- dtime
* Return (t1 - t0) in seconds
*/
double dtime(struct timespec *t0, struct timespec *t1)
{
return (t1->tv_sec - t0->tv_sec) + 1e-9*(t1->tv_nsec - t0->tv_nsec);
}
/*------------------------------------------------- main
*
*/
int main(int argc, char *argv[])
{
int n_int, n_exe;
FILE *fp;
int i, j, n, n_read, sum;
char *s, *p, *q, s80[80];
off_t off0;
struct timespec tsp0, tsp1;
double d0;
/*
* parse the command line parameters
*/
n_exe = 1; // not used
n_int = 50000;
if (argc > 1) sscanf(argv[1], "%d", &n_exe);
if (argc > 2) sscanf(argv[2], "%d", &n_int);
/*
* open a file and fill it with random numbers
*/
fp = fopen(fname, "w+");
if (fp == NULL) {
fprintf(stderr, "fopen failed\n");
return -1;
}
for (i = 0; i < n_int; i++)
fprintf(fp, "%+d\n", rand() - RAND_MAX / 2);
/*
* allocate space and store the file in a buffer
*/
off0 = ftell(fp);
s = (char *) malloc(off0);
if (s == NULL) {
fprintf(stderr, "malloc failed\n");
return -1;
}
rewind(fp);
n = fread(s, 1, (int) off0, fp);
if (n != off0) {
fprintf(stderr, "fread failed\n");
return -1;
}
/*
* read the numbers with scanf from buffer
*/
printf("sscanf from buffer\n");
clock_gettime(CLOCK_MONOTONIC, &tsp0);
n_read = 0;
sum = 0;
n = 0;
while (sscanf(s + n, "%d%n", &i, &j) == 1) {
n_read++;
sum += i; // overflow is OK
n += j;
}
clock_gettime(CLOCK_MONOTONIC, &tsp1);
d0 = dtime(&tsp0, &tsp1);
printf("n_read = %d, sum = %d, time = %.5lf\n", n_read, sum & 0xFFFF, d0);
/*
* read the numbers with strtol from buffer
*/
printf("strtol from buffer\n");
clock_gettime(CLOCK_MONOTONIC, &tsp0);
n_read = 0;
sum = 0;
n = 0;
p = s;
for (;;) {
i = (int) strtol(p, &q, 10);
if (q == p) break;
p = q;
n_read++;
sum += i; // overflow is OK
}
clock_gettime(CLOCK_MONOTONIC, &tsp1);
d0 = dtime(&tsp0, &tsp1);
printf("n_read = %d, sum = %d, time = %.5lf\n", n_read, sum & 0xFFFF, d0);
/*
* read the numbers with fscanf from file
*/
printf("fscanf from file\n");
rewind(fp);
clock_gettime(CLOCK_MONOTONIC, &tsp0);
n_read = 0;
sum = 0;
while (fscanf(fp, "%d", &i) == 1) {
n_read++;
sum += i; // overflow is OK
}
clock_gettime(CLOCK_MONOTONIC, &tsp1);
d0 = dtime(&tsp0, &tsp1);
printf("n_read = %d, sum = %d, time = %.5lf\n", n_read, sum & 0xFFFF, d0);
/*
* read the numbers with fgets from file
*/
printf("fgets from file\n");
rewind(fp);
clock_gettime(CLOCK_MONOTONIC, &tsp0);
n_read = 0;
sum = 0;
while (fgets(s80, sizeof(s80), fp) != NULL) {
i = atoi(s80);
n_read++;
sum += i; // overflow is OK
}
clock_gettime(CLOCK_MONOTONIC, &tsp1);
d0 = dtime(&tsp0, &tsp1);
printf("n_read = %d, sum = %d, time = %.5lf\n", n_read, sum & 0xFFFF, d0);
/*
* read the numbers with C++ stream
*/
printf("C++ stream from file\n");
std::ifstream infile(fname);
std::ifstream::sync_with_stdio(false);
clock_gettime(CLOCK_MONOTONIC, &tsp0);
n_read = 0;
sum = 0;
while (infile >> i) {
++n_read;
sum += i; // overflow is OK
}
clock_gettime(CLOCK_MONOTONIC, &tsp1);
d0 = dtime(&tsp0, &tsp1);
printf("n_read = %d, sum = %d, time = %.5lf\n", n_read, sum & 0xFFFF, d0);
free(s);
fclose(fp);
unlink(fname);
return 0;
}