public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Renzo Davoli <renzo@cs.unibo.it>
To: libc-alpha@sourceware.org
Subject: [PATCH] New feature proposal: pureglibc
Date: Tue, 22 Aug 2017 14:49:00 -0000	[thread overview]
Message-ID: <20170822144917.soo7pded2hyfxe36@cs.unibo.it> (raw)

glibc, like many standard C library implementations, can be seen as the union of two libraries:
* the actual C library which provides high level services, using the system calls
    of the underlying kernel,
* the interface library to the kernel, which dispatches the system call
    to the kernel and retrieves the results.

A pure C library is a library providing only the former item.

It is useful to have a pure C library when a process needs to use self-virtualization.

In view-os, for example, using self-virtualization I can run modules like umfuseext2 
further virtualizing the system calls generated by the ext2fs and glibc libraries.
In this way the file system image can be something generated on-the-fly instead of a 
file as requested by the ext2fs design.
This is just an example. Many other applications can be found using coding creativity.

I am currently using a tricky and partial implementation of purelibc as an
overlay shared library redefining some glibc functions.
This is clearly a workaround.
https://sourceforge.net/p/view-os/code/HEAD/tree/trunk/purelibc/
http://wiki.v2.cs.unibo.it/wiki/index.php?title=PureLibc

The patch here attached is a draft implementation of a "pureglibc": 
a global variable permits to divert all the system calls generated
by glibc to a process-provided function, glibc becomes in this way a 'pure' C library
when this variable is non-NULL: system calls can be traced and virtualized.
The patch currently implements pureglibc for the x86_64 architecture only.

I am posting this as a proposal for a new feature, asking for comments and
for alternative (effective) ways to implement the same feature.

Thank you.

renzo

Here below: two "hello world" examples, one for tracing, one for self-virtualization, and the patch.

Example #1:
System Call tracing:

------
#define _GNU_SOURCE         
#include <stdio.h>
#include <unistd.h>
#include <stdarg.h>
#include <string.h>
#include <sys/syscall.h>
#include <dlfcn.h>

long (**pure_ptr)(long number, long nr, ...);

void printsyscall(long number, long nr, long *args) {
	char buf[256];
	int i;
	sprintf(buf, "syscall %d -", number);
	for (i = 0; i < nr; i++) 
		sprintf(buf + strlen(buf), " %016x", args[i]);
	sprintf(buf + strlen(buf), "\n");
	syscall(__NR_write, 1, buf, strlen(buf));
}

#define NSYSARG 6
long mysyscall(long number, long nr, ...) {
	long arg[NSYSARG];
	va_list ap;
	int i;

	va_start(ap, nr);
	for (i = 0; i < NSYSARG; i++)
		arg[i] = i < nr ? va_arg(ap, long) : 0;
	va_end(ap);

	printsyscall(number, nr, arg);

	return syscall(number, arg[0], arg[1], arg[2], arg[3], arg[4], arg[5]); 
}

int main() {
	pure_ptr = dlsym(RTLD_DEFAULT, "pure_syscall");
	if (pure_ptr) {
		printf("PURE enabled glibc found\n");
		*pure_ptr = mysyscall;
	}

	printf("hello world\n");
}
-------

Example #2:
Virtualization (when this shared object is preloaded
		"open" or "openat" syscalls of /etc/passwd, open /tmp/passwd instead.)
-------
#define _GNU_SOURCE         
#include <stdio.h>
#include <unistd.h>
#include <stdarg.h>
#include <string.h>
#include <sys/syscall.h>
#include <dlfcn.h>

long (**pure_ptr)(long number, long nr, ...);

#define NSYSARG 6
long mysyscall(long number, long nr, ...) {
	long arg[NSYSARG];
	va_list ap;
	int i;

	va_start(ap, nr);
	for (i = 0; i < NSYSARG; i++)
		arg[i] = i < nr ? va_arg(ap, long) : 0;
	va_end(ap);

	if (number == __NR_open && strcmp((char *) arg[0], "/etc/passwd") == 0)
		arg[0] = (long) "/tmp/passwd";
	if (number == __NR_openat && strcmp((char *) arg[1], "/etc/passwd") == 0)
		arg[1] = (long) "/tmp/passwd";

	return syscall(number, arg[0], arg[1], arg[2], arg[3], arg[4], arg[5]); 
}

__attribute__((constructor))
void init(void) {
	pure_ptr = dlsym(RTLD_DEFAULT, "pure_syscall");
	if (pure_ptr) {
		printf("PURE enabled glibc found\n");
		*pure_ptr = mysyscall;
	}
}
--------

Here is the patch:

2018-08-21 Renzo Davoli <renzo@cs.unibo.it>

diff --git a/misc/Versions b/misc/Versions
index bfbda505e4..ddf3a2f887 100644
--- a/misc/Versions
+++ b/misc/Versions
@@ -156,7 +156,7 @@ libc {
     gnu_dev_major; gnu_dev_minor; gnu_dev_makedev;
   }
   GLIBC_2.26 {
-    preadv2; preadv64v2; pwritev2; pwritev64v2;
+    preadv2; preadv64v2; pwritev2; pwritev64v2; __pure_syscall; pure_syscall;
   }
   GLIBC_PRIVATE {
     __madvise;
diff --git a/misc/init-misc.c b/misc/init-misc.c
index 02f2b0fa12..2e7bf13e69 100644
--- a/misc/init-misc.c
+++ b/misc/init-misc.c
@@ -24,6 +24,8 @@ char *__progname = (char *) "";
 weak_alias (__progname_full, program_invocation_name)
 weak_alias (__progname, program_invocation_short_name)
 
+long int (*__pure_syscall)(long name, long nr, ...) = NULL;
+weak_alias (__pure_syscall, pure_syscall)
 
 void
 __init_misc (int argc, char **argv, char **envp)
diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
index 880e496880..258f7b9e51 100644
--- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
@@ -193,7 +193,7 @@
 # undef INLINE_SYSCALL
 # define INLINE_SYSCALL(name, nr, args...) \
   ({									      \
-    unsigned long int resultvar = INTERNAL_SYSCALL (name, , nr, args);	      \
+    unsigned long int resultvar = INTERNAL_SYSCALL (name, , nr, ##args);	      \
     if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (resultvar, )))	      \
       {									      \
 	__set_errno (INTERNAL_SYSCALL_ERRNO (resultvar, ));		      \
@@ -221,6 +221,8 @@
 /* Registers clobbered by syscall.  */
 # define REGISTERS_CLOBBERED_BY_SYSCALL "cc", "r11", "cx"
 
+extern long int (*__pure_syscall)(long name, long nr, ...);
+
 # define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
   ({									      \
     unsigned long int resultvar;					      \
@@ -233,7 +235,13 @@
     (long int) resultvar; })
 # undef INTERNAL_SYSCALL
 # define INTERNAL_SYSCALL(name, err, nr, args...) \
-  INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, ##args)
+	({ \
+	 long int resultvar;                \
+	 if (__glibc_unlikely (__pure_syscall != NULL)) \
+	 resultvar = __pure_syscall(__NR_##name, nr, ##args); \
+	 else \
+	 resultvar = INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, ##args); \
+	 resultvar; })
 
 # define INTERNAL_SYSCALL_NCS_TYPES(name, err, nr, args...) \
   ({									      \


             reply	other threads:[~2017-08-22 14:49 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-22 14:49 Renzo Davoli [this message]
2017-08-22 15:25 ` Joseph Myers

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170822144917.soo7pded2hyfxe36@cs.unibo.it \
    --to=renzo@cs.unibo.it \
    --cc=libc-alpha@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).