This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] New feature proposal: pureglibc


glibc, like many standard C library implementations, can be seen as the union of two libraries:
* the actual C library which provides high level services, using the system calls
    of the underlying kernel,
* the interface library to the kernel, which dispatches the system call
    to the kernel and retrieves the results.

A pure C library is a library providing only the former item.

It is useful to have a pure C library when a process needs to use self-virtualization.

In view-os, for example, using self-virtualization I can run modules like umfuseext2 
further virtualizing the system calls generated by the ext2fs and glibc libraries.
In this way the file system image can be something generated on-the-fly instead of a 
file as requested by the ext2fs design.
This is just an example. Many other applications can be found using coding creativity.

I am currently using a tricky and partial implementation of purelibc as an
overlay shared library redefining some glibc functions.
This is clearly a workaround.
https://sourceforge.net/p/view-os/code/HEAD/tree/trunk/purelibc/
http://wiki.v2.cs.unibo.it/wiki/index.php?title=PureLibc

The patch here attached is a draft implementation of a "pureglibc": 
a global variable permits to divert all the system calls generated
by glibc to a process-provided function, glibc becomes in this way a 'pure' C library
when this variable is non-NULL: system calls can be traced and virtualized.
The patch currently implements pureglibc for the x86_64 architecture only.

I am posting this as a proposal for a new feature, asking for comments and
for alternative (effective) ways to implement the same feature.

Thank you.

renzo

Here below: two "hello world" examples, one for tracing, one for self-virtualization, and the patch.

Example #1:
System Call tracing:

------
#define _GNU_SOURCE         
#include <stdio.h>
#include <unistd.h>
#include <stdarg.h>
#include <string.h>
#include <sys/syscall.h>
#include <dlfcn.h>

long (**pure_ptr)(long number, long nr, ...);

void printsyscall(long number, long nr, long *args) {
	char buf[256];
	int i;
	sprintf(buf, "syscall %d -", number);
	for (i = 0; i < nr; i++) 
		sprintf(buf + strlen(buf), " %016x", args[i]);
	sprintf(buf + strlen(buf), "\n");
	syscall(__NR_write, 1, buf, strlen(buf));
}

#define NSYSARG 6
long mysyscall(long number, long nr, ...) {
	long arg[NSYSARG];
	va_list ap;
	int i;

	va_start(ap, nr);
	for (i = 0; i < NSYSARG; i++)
		arg[i] = i < nr ? va_arg(ap, long) : 0;
	va_end(ap);

	printsyscall(number, nr, arg);

	return syscall(number, arg[0], arg[1], arg[2], arg[3], arg[4], arg[5]); 
}

int main() {
	pure_ptr = dlsym(RTLD_DEFAULT, "pure_syscall");
	if (pure_ptr) {
		printf("PURE enabled glibc found\n");
		*pure_ptr = mysyscall;
	}

	printf("hello world\n");
}
-------

Example #2:
Virtualization (when this shared object is preloaded
		"open" or "openat" syscalls of /etc/passwd, open /tmp/passwd instead.)
-------
#define _GNU_SOURCE         
#include <stdio.h>
#include <unistd.h>
#include <stdarg.h>
#include <string.h>
#include <sys/syscall.h>
#include <dlfcn.h>

long (**pure_ptr)(long number, long nr, ...);

#define NSYSARG 6
long mysyscall(long number, long nr, ...) {
	long arg[NSYSARG];
	va_list ap;
	int i;

	va_start(ap, nr);
	for (i = 0; i < NSYSARG; i++)
		arg[i] = i < nr ? va_arg(ap, long) : 0;
	va_end(ap);

	if (number == __NR_open && strcmp((char *) arg[0], "/etc/passwd") == 0)
		arg[0] = (long) "/tmp/passwd";
	if (number == __NR_openat && strcmp((char *) arg[1], "/etc/passwd") == 0)
		arg[1] = (long) "/tmp/passwd";

	return syscall(number, arg[0], arg[1], arg[2], arg[3], arg[4], arg[5]); 
}

__attribute__((constructor))
void init(void) {
	pure_ptr = dlsym(RTLD_DEFAULT, "pure_syscall");
	if (pure_ptr) {
		printf("PURE enabled glibc found\n");
		*pure_ptr = mysyscall;
	}
}
--------

Here is the patch:

2018-08-21 Renzo Davoli <renzo@cs.unibo.it>

diff --git a/misc/Versions b/misc/Versions
index bfbda505e4..ddf3a2f887 100644
--- a/misc/Versions
+++ b/misc/Versions
@@ -156,7 +156,7 @@ libc {
     gnu_dev_major; gnu_dev_minor; gnu_dev_makedev;
   }
   GLIBC_2.26 {
-    preadv2; preadv64v2; pwritev2; pwritev64v2;
+    preadv2; preadv64v2; pwritev2; pwritev64v2; __pure_syscall; pure_syscall;
   }
   GLIBC_PRIVATE {
     __madvise;
diff --git a/misc/init-misc.c b/misc/init-misc.c
index 02f2b0fa12..2e7bf13e69 100644
--- a/misc/init-misc.c
+++ b/misc/init-misc.c
@@ -24,6 +24,8 @@ char *__progname = (char *) "";
 weak_alias (__progname_full, program_invocation_name)
 weak_alias (__progname, program_invocation_short_name)
 
+long int (*__pure_syscall)(long name, long nr, ...) = NULL;
+weak_alias (__pure_syscall, pure_syscall)
 
 void
 __init_misc (int argc, char **argv, char **envp)
diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
index 880e496880..258f7b9e51 100644
--- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
@@ -193,7 +193,7 @@
 # undef INLINE_SYSCALL
 # define INLINE_SYSCALL(name, nr, args...) \
   ({									      \
-    unsigned long int resultvar = INTERNAL_SYSCALL (name, , nr, args);	      \
+    unsigned long int resultvar = INTERNAL_SYSCALL (name, , nr, ##args);	      \
     if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (resultvar, )))	      \
       {									      \
 	__set_errno (INTERNAL_SYSCALL_ERRNO (resultvar, ));		      \
@@ -221,6 +221,8 @@
 /* Registers clobbered by syscall.  */
 # define REGISTERS_CLOBBERED_BY_SYSCALL "cc", "r11", "cx"
 
+extern long int (*__pure_syscall)(long name, long nr, ...);
+
 # define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
   ({									      \
     unsigned long int resultvar;					      \
@@ -233,7 +235,13 @@
     (long int) resultvar; })
 # undef INTERNAL_SYSCALL
 # define INTERNAL_SYSCALL(name, err, nr, args...) \
-  INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, ##args)
+	({ \
+	 long int resultvar;                \
+	 if (__glibc_unlikely (__pure_syscall != NULL)) \
+	 resultvar = __pure_syscall(__NR_##name, nr, ##args); \
+	 else \
+	 resultvar = INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, ##args); \
+	 resultvar; })
 
 # define INTERNAL_SYSCALL_NCS_TYPES(name, err, nr, args...) \
   ({									      \



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]