From: Renzo Davoli <renzo@cs.unibo.it>
To: libc-alpha@sourceware.org
Subject: [PATCH] New feature proposal: pureglibc
Date: Tue, 22 Aug 2017 14:49:00 -0000 [thread overview]
Message-ID: <20170822144917.soo7pded2hyfxe36@cs.unibo.it> (raw)
glibc, like many standard C library implementations, can be seen as the union of two libraries:
* the actual C library which provides high level services, using the system calls
of the underlying kernel,
* the interface library to the kernel, which dispatches the system call
to the kernel and retrieves the results.
A pure C library is a library providing only the former item.
It is useful to have a pure C library when a process needs to use self-virtualization.
In view-os, for example, using self-virtualization I can run modules like umfuseext2
further virtualizing the system calls generated by the ext2fs and glibc libraries.
In this way the file system image can be something generated on-the-fly instead of a
file as requested by the ext2fs design.
This is just an example. Many other applications can be found using coding creativity.
I am currently using a tricky and partial implementation of purelibc as an
overlay shared library redefining some glibc functions.
This is clearly a workaround.
https://sourceforge.net/p/view-os/code/HEAD/tree/trunk/purelibc/
http://wiki.v2.cs.unibo.it/wiki/index.php?title=PureLibc
The patch here attached is a draft implementation of a "pureglibc":
a global variable permits to divert all the system calls generated
by glibc to a process-provided function, glibc becomes in this way a 'pure' C library
when this variable is non-NULL: system calls can be traced and virtualized.
The patch currently implements pureglibc for the x86_64 architecture only.
I am posting this as a proposal for a new feature, asking for comments and
for alternative (effective) ways to implement the same feature.
Thank you.
renzo
Here below: two "hello world" examples, one for tracing, one for self-virtualization, and the patch.
Example #1:
System Call tracing:
------
#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <stdarg.h>
#include <string.h>
#include <sys/syscall.h>
#include <dlfcn.h>
long (**pure_ptr)(long number, long nr, ...);
void printsyscall(long number, long nr, long *args) {
char buf[256];
int i;
sprintf(buf, "syscall %d -", number);
for (i = 0; i < nr; i++)
sprintf(buf + strlen(buf), " %016x", args[i]);
sprintf(buf + strlen(buf), "\n");
syscall(__NR_write, 1, buf, strlen(buf));
}
#define NSYSARG 6
long mysyscall(long number, long nr, ...) {
long arg[NSYSARG];
va_list ap;
int i;
va_start(ap, nr);
for (i = 0; i < NSYSARG; i++)
arg[i] = i < nr ? va_arg(ap, long) : 0;
va_end(ap);
printsyscall(number, nr, arg);
return syscall(number, arg[0], arg[1], arg[2], arg[3], arg[4], arg[5]);
}
int main() {
pure_ptr = dlsym(RTLD_DEFAULT, "pure_syscall");
if (pure_ptr) {
printf("PURE enabled glibc found\n");
*pure_ptr = mysyscall;
}
printf("hello world\n");
}
-------
Example #2:
Virtualization (when this shared object is preloaded
"open" or "openat" syscalls of /etc/passwd, open /tmp/passwd instead.)
-------
#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <stdarg.h>
#include <string.h>
#include <sys/syscall.h>
#include <dlfcn.h>
long (**pure_ptr)(long number, long nr, ...);
#define NSYSARG 6
long mysyscall(long number, long nr, ...) {
long arg[NSYSARG];
va_list ap;
int i;
va_start(ap, nr);
for (i = 0; i < NSYSARG; i++)
arg[i] = i < nr ? va_arg(ap, long) : 0;
va_end(ap);
if (number == __NR_open && strcmp((char *) arg[0], "/etc/passwd") == 0)
arg[0] = (long) "/tmp/passwd";
if (number == __NR_openat && strcmp((char *) arg[1], "/etc/passwd") == 0)
arg[1] = (long) "/tmp/passwd";
return syscall(number, arg[0], arg[1], arg[2], arg[3], arg[4], arg[5]);
}
__attribute__((constructor))
void init(void) {
pure_ptr = dlsym(RTLD_DEFAULT, "pure_syscall");
if (pure_ptr) {
printf("PURE enabled glibc found\n");
*pure_ptr = mysyscall;
}
}
--------
Here is the patch:
2018-08-21 Renzo Davoli <renzo@cs.unibo.it>
diff --git a/misc/Versions b/misc/Versions
index bfbda505e4..ddf3a2f887 100644
--- a/misc/Versions
+++ b/misc/Versions
@@ -156,7 +156,7 @@ libc {
gnu_dev_major; gnu_dev_minor; gnu_dev_makedev;
}
GLIBC_2.26 {
- preadv2; preadv64v2; pwritev2; pwritev64v2;
+ preadv2; preadv64v2; pwritev2; pwritev64v2; __pure_syscall; pure_syscall;
}
GLIBC_PRIVATE {
__madvise;
diff --git a/misc/init-misc.c b/misc/init-misc.c
index 02f2b0fa12..2e7bf13e69 100644
--- a/misc/init-misc.c
+++ b/misc/init-misc.c
@@ -24,6 +24,8 @@ char *__progname = (char *) "";
weak_alias (__progname_full, program_invocation_name)
weak_alias (__progname, program_invocation_short_name)
+long int (*__pure_syscall)(long name, long nr, ...) = NULL;
+weak_alias (__pure_syscall, pure_syscall)
void
__init_misc (int argc, char **argv, char **envp)
diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
index 880e496880..258f7b9e51 100644
--- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
@@ -193,7 +193,7 @@
# undef INLINE_SYSCALL
# define INLINE_SYSCALL(name, nr, args...) \
({ \
- unsigned long int resultvar = INTERNAL_SYSCALL (name, , nr, args); \
+ unsigned long int resultvar = INTERNAL_SYSCALL (name, , nr, ##args); \
if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (resultvar, ))) \
{ \
__set_errno (INTERNAL_SYSCALL_ERRNO (resultvar, )); \
@@ -221,6 +221,8 @@
/* Registers clobbered by syscall. */
# define REGISTERS_CLOBBERED_BY_SYSCALL "cc", "r11", "cx"
+extern long int (*__pure_syscall)(long name, long nr, ...);
+
# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
({ \
unsigned long int resultvar; \
@@ -233,7 +235,13 @@
(long int) resultvar; })
# undef INTERNAL_SYSCALL
# define INTERNAL_SYSCALL(name, err, nr, args...) \
- INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, ##args)
+ ({ \
+ long int resultvar; \
+ if (__glibc_unlikely (__pure_syscall != NULL)) \
+ resultvar = __pure_syscall(__NR_##name, nr, ##args); \
+ else \
+ resultvar = INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, ##args); \
+ resultvar; })
# define INTERNAL_SYSCALL_NCS_TYPES(name, err, nr, args...) \
({ \
next reply other threads:[~2017-08-22 14:49 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-08-22 14:49 Renzo Davoli [this message]
2017-08-22 15:25 ` Joseph Myers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170822144917.soo7pded2hyfxe36@cs.unibo.it \
--to=renzo@cs.unibo.it \
--cc=libc-alpha@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).