This is the mail archive of the
libc-help@sourceware.org
mailing list for the glibc project.
Re: Missing <aio_misc.h> exported header ?
- From: Xavier Roche <xavier dot roche at algolia dot com>
- To: libc-help at sourceware dot org
- Date: Fri, 6 Sep 2019 13:25:57 +0200
- Subject: Re: Missing <aio_misc.h> exported header ?
- References: <CAE9vp3Lw5VN0K=U=FpmeGsqYUu+YoWUMZXttmJLak4K-eMr5fA@mail.gmail.com> <009acaaf-3d4b-9c4f-4591-29711b028d13@linaro.org>
Hi!,
On Wed, Sep 4, 2019 at 9:28 PM Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
> Because it an internal-only implementation files. The Linux one, for
> instance, calls syscall directly using internal macros (INTERNAL_SYSCALL)
> which are not meant to be exported.
My understanding is that on Linux the kernel version was bypassed, and
the pure glibc version was used ?
Could the glibc version expose this feature ? The fsync call is
surprisingly missing in the lio_listio() flavor.
> The LIO_DSYNC/LIO_SYNC are used to call the internal __aio_enqueue_request,
> external usage should use aio_fsync. By "providing much better performances when
> > syncing a lot of small files" which exactly usage pattern are you referring?
The program attached is a small benchmark program that, despite its
rather basic nature, allows to spot differences between different
strategies:
- sync every files with fsync or fdatasync
- use aio
- sync with global syncfs call
For example, with 10 files of 10KB,
g++ -Wall -Wextra -std=c++17 -O3 -g3 fsynctest.cpp -o fsynctest -lrt
./fsynctest 10000 10
I get the following consistent results with a standard consumer SSD:
AVERAGE fsync: 22ms
AVERAGE fdatasync: 21ms
AVERAGE parallel(fsync): 3ms
AVERAGE parallel(fdatasync): 3ms
AVERAGE syncfs: 5ms
AVERAGE sync: 5ms
The idea being that a single lio_listio() is better than several aio_fsync().
Regards,
--
Xavier Roche -
xavier.roche@algolia.com
0675167036
// fsynctest.cpp
// fsync of multiple files pattern test sample
// Xavier Roche, Algolia, 2019
// This file is covered by the MIT License
#include <iostream>
#include <string>
#include <vector>
#include <functional>
#include <chrono>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <aio.h>
#include <string.h>
// g++ -Wall -Wextra -std=c++17 -O3 -g3 fsynctest.cpp -o fsynctest -lrt
/* Missing <aio_misc.h> definition. */
#ifndef LIO_DSYNC
enum
{
LIO_DSYNC = LIO_NOP + 1,
LIO_SYNC,
LIO_READ64 = LIO_READ | 128,
LIO_WRITE64 = LIO_WRITE | 128
};
#endif
enum class SyncMode
{
Fsync,
Fdatasync,
ParallelFsync,
ParallelFdatasync,
Syncfs,
Sync,
_MAX = Sync,
};
template<typename T>
class Wrapped
{
public:
Wrapped(T& ref, std::function<void(T&)> fun)
: _ref(ref)
, _fun(fun)
{}
~Wrapped() { _fun(_ref); }
private:
T& _ref;
std::function<void(T&)> _fun;
};
struct File
{
File(const std::string& file, int fd)
: file(file)
, fd(fd)
{}
std::string file;
int fd;
};
#define BENCH_COUNT 10
int main(int argc, char** argv)
{
if (argc != 3) {
std::cout << "Usage: " << argv[0] << " <file_size> <nbfiles>"
<< "\n";
}
const size_t size = std::stoi(argv[1]);
const size_t nbfiles = std::stoi(argv[2]);
assert(nbfiles > 0);
long microseconds[static_cast<size_t>(SyncMode::_MAX) + 1] = {};
for (size_t bench = 0; bench < BENCH_COUNT; bench++) {
for (SyncMode mode = static_cast<SyncMode>(0);
static_cast<size_t>(mode) <= static_cast<size_t>(SyncMode::_MAX);
mode = static_cast<SyncMode>(static_cast<size_t>(mode) + 1)) {
std::vector<char> data;
data.resize(size);
std::vector<File> files;
const Wrapped<decltype(files)> wrap(files, (std::function<void(std::vector<File>&)>)[](auto& files) {
for (const auto& file : files) {
close(file.fd);
unlink(file.file.c_str());
}
});
const auto start = std::chrono::steady_clock::now();
for (size_t i = 0; i < nbfiles; i++) {
const std::string testfile = std::to_string(i);
const int fd = open(testfile.c_str(),
O_RDWR | O_CREAT | O_TRUNC | O_CLOEXEC,
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
assert(fd != -1);
const size_t nw = write(fd, data.data(), data.size());
assert(nw == data.size());
files.push_back(File(testfile, fd));
}
const char* smode;
switch (mode) {
case SyncMode::Fsync:
smode = "fsync";
for (const auto& file : files) {
if (fsync(file.fd) != 0) {
assert(!"fsync failed");
}
}
break;
case SyncMode::Fdatasync:
smode = "fdatasync";
for (const auto& file : files) {
if (fdatasync(file.fd) != 0) {
assert(!"fdatasync failed");
}
}
break;
case SyncMode::ParallelFsync:
case SyncMode::ParallelFdatasync: {
const bool isFsync = mode == SyncMode::ParallelFsync;
smode = isFsync ? "parallel(fsync)" : "parallel(fdatasync)";
std::vector<struct aiocb> syncs;
syncs.resize(nbfiles);
std::vector<struct aiocb*> psyncs;
psyncs.resize(nbfiles);
for (size_t i = 0; i < nbfiles; i++) {
syncs[i].aio_fildes = files[i].fd;
syncs[i].aio_sigevent.sigev_notify = SIGEV_NONE;
syncs[i].aio_lio_opcode = isFsync ? LIO_SYNC : LIO_DSYNC;
psyncs[i] = &syncs[i];
}
// Wait for all IO to be completed; EIO mark at least one IO error
lio_listio(LIO_WAIT, psyncs.data(), psyncs.size(), nullptr);
for (const auto& sync : psyncs) {
const auto result = aio_error(sync);
if (result != 0) {
assert(result == 0 || !"aio_error");
}
}
} break;
case SyncMode::Syncfs:
smode = "syncfs";
if (syncfs(files[0].fd) != 0) {
assert(!"syncfs");
}
break;
case SyncMode::Sync:
smode = "sync";
sync();
break;
default:
assert(!"oops");
}
const auto end = std::chrono::steady_clock::now();
const auto elapsed = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
microseconds[static_cast<size_t>(mode)] += elapsed.count();
if (bench < BENCH_COUNT - 1) {
std::cout << smode << ": " << (elapsed.count() / 1000) << "ms\n";
} else {
std::cout << "AVERAGE " << smode << ": "
<< ((microseconds[static_cast<size_t>(mode)] / BENCH_COUNT) / 1000) << "ms\n";
}
}
std::cout << "\n";
}
return 0;
}