qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2] linux-user: add option to intercept execve() syscalls
@ 2016-01-27  8:49 Petros Angelatos
  2016-02-10 19:03 ` Laurent Vivier
  0 siblings, 1 reply; 2+ messages in thread
From: Petros Angelatos @ 2016-01-27  8:49 UTC (permalink / raw)
  To: qemu-devel
  Cc: lucas.kaldstrom, peter.maydell, riku.voipio, laurent, Petros Angelatos

From: Petros Angelatos <petrosagg@resin.io>

In order for one to use QEMU user mode emulation under a chroot, it is
required to use binfmt_misc. This can be avoided by QEMU never doing a
raw execve() to the host system.

Introduce a new option, -execve, that uses the current QEMU interpreter
to intercept execve().

qemu_execve() will prepend the interpreter path , similar to what
binfmt_misc would do, and then pass the modified execve() to the host.

It is necessary to parse hashbang scripts in that function otherwise
the kernel will try to run the interpreter of a script without QEMU and
get an invalid exec format error.

Signed-off-by: Petros Angelatos <petrosagg@resin.io>
---
 linux-user/main.c    |  36 ++++++++++++++++
 linux-user/qemu.h    |   1 +
 linux-user/syscall.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 153 insertions(+), 1 deletion(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index ee12035..751eafa 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -22,6 +22,7 @@
 #include <string.h>
 #include <errno.h>
 #include <unistd.h>
+#include <sys/auxv.h>
 #include <sys/mman.h>
 #include <sys/syscall.h>
 #include <sys/resource.h>
@@ -79,6 +80,7 @@ static void usage(int exitcode);
 
 static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX;
 const char *qemu_uname_release;
+const char *qemu_execve_path;
 
 /* XXX: on x86 MAP_GROWSDOWN only works if ESP <= address + 32, so
    we allocate a bigger stack. Need a better solution, for example
@@ -3828,6 +3830,38 @@ static void handle_arg_guest_base(const char *arg)
     have_guest_base = 1;
 }
 
+static void handle_arg_execve(const char *arg)
+{
+    const char *execfn;
+    char buf[PATH_MAX];
+    char *ret;
+    int len;
+
+    /* try getauxval() */
+    execfn = (const char *) getauxval(AT_EXECFN);
+
+    if (execfn != 0) {
+        ret = realpath(execfn, buf);
+
+        if (ret != NULL) {
+            qemu_execve_path = strdup(buf);
+            return;
+        }
+    }
+
+    /* try /proc/self/exe */
+    len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
+
+    if (len != -1) {
+        buf[len] = '\0';
+        qemu_execve_path = strdup(buf);
+        return;
+    }
+
+    fprintf(stderr, "qemu_execve: unable to determine intepreter's path\n");
+    exit(EXIT_FAILURE);
+}
+
 static void handle_arg_reserved_va(const char *arg)
 {
     char *p;
@@ -3913,6 +3947,8 @@ static const struct qemu_argument arg_table[] = {
      "uname",      "set qemu uname release string to 'uname'"},
     {"B",          "QEMU_GUEST_BASE",  true,  handle_arg_guest_base,
      "address",    "set guest_base address to 'address'"},
+    {"execve",     "QEMU_EXECVE",      false, handle_arg_execve,
+     "",           "use this interpreter when a process calls execve()"},
     {"R",          "QEMU_RESERVED_VA", true,  handle_arg_reserved_va,
      "size",       "reserve 'size' bytes for guest virtual address space"},
     {"d",          "QEMU_LOG",         true,  handle_arg_log,
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index bd90cc3..0d9b058 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -140,6 +140,7 @@ void init_task_state(TaskState *ts);
 void task_settid(TaskState *);
 void stop_all_tasks(void);
 extern const char *qemu_uname_release;
+extern const char *qemu_execve_path;
 extern unsigned long mmap_min_addr;
 
 /* ??? See if we can avoid exposing so much of the loader internals.  */
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 0cbace4..4755978 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -111,6 +111,7 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
 #include <linux/route.h>
 #include <linux/filter.h>
 #include <linux/blkpg.h>
+#include <linux/binfmts.h>
 #include "linux_loop.h"
 #include "uname.h"
 
@@ -5854,6 +5855,118 @@ static target_timer_t get_timer_id(abi_long arg)
     return timerid;
 }
 
+/* qemu_execve() Must return target values and target errnos. */
+static abi_long qemu_execve(char *filename, char *argv[],
+                  char *envp[])
+{
+    char *i_arg = NULL, *i_name = NULL;
+    char **new_argp;
+    int argc, fd, ret, i, offset = 3;
+    char *cp;
+    char buf[BINPRM_BUF_SIZE];
+
+    /* normal execve case */
+    if (qemu_execve_path == NULL || *qemu_execve_path == 0) {
+        return get_errno(execve(filename, argv, envp));
+    }
+
+    for (argc = 0; argv[argc] != NULL; argc++) {
+        /* nothing */ ;
+    }
+
+    fd = open(filename, O_RDONLY);
+    if (fd == -1) {
+        return get_errno(fd);
+    }
+
+    ret = read(fd, buf, BINPRM_BUF_SIZE);
+    if (ret == -1) {
+        close(fd);
+        return get_errno(ret);
+    }
+
+    /* if we have less than 2 bytes, we can guess it is not executable */
+    if (ret < 2) {
+        close(fd);
+        return -host_to_target_errno(ENOEXEC);
+    }
+
+    close(fd);
+
+    /* adapted from the kernel
+     * https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/fs/binfmt_script.c
+     */
+    if ((buf[0] == '#') && (buf[1] == '!')) {
+        /*
+         * This section does the #! interpretation.
+         * Sorta complicated, but hopefully it will work.  -TYT
+         */
+
+        buf[BINPRM_BUF_SIZE - 1] = '\0';
+        cp = strchr(buf, '\n');
+        if (cp == NULL) {
+            cp = buf + BINPRM_BUF_SIZE - 1;
+        }
+        *cp = '\0';
+        while (cp > buf) {
+            cp--;
+            if ((*cp == ' ') || (*cp == '\t')) {
+                *cp = '\0';
+            } else {
+                break;
+            }
+        }
+        for (cp = buf + 2; (*cp == ' ') || (*cp == '\t'); cp++) {
+            /* nothing */ ;
+        }
+        if (*cp == '\0') {
+            return -ENOEXEC; /* No interpreter name found */
+        }
+        i_name = cp;
+        i_arg = NULL;
+        for ( ; *cp && (*cp != ' ') && (*cp != '\t'); cp++) {
+            /* nothing */ ;
+        }
+        while ((*cp == ' ') || (*cp == '\t')) {
+            *cp++ = '\0';
+        }
+        if (*cp) {
+            i_arg = cp;
+        }
+
+        if (i_arg) {
+            offset = 5;
+        } else {
+            offset = 4;
+        }
+    }
+
+    new_argp = alloca((argc + offset + 1) * sizeof(void *));
+
+    /* Copy the original arguments with offset */
+    for (i = 0; i < argc; i++) {
+        new_argp[i + offset] = argv[i];
+    }
+
+    new_argp[0] = strdup(qemu_execve_path);
+    new_argp[1] = strdup("-0");
+    new_argp[offset] = filename;
+    new_argp[argc + offset] = NULL;
+
+    if (i_name) {
+        new_argp[2] = i_name;
+        new_argp[3] = i_name;
+
+        if (i_arg) {
+            new_argp[4] = i_arg;
+        }
+    } else {
+        new_argp[2] = argv[0];
+    }
+
+    return get_errno(execve(qemu_execve_path, new_argp, envp));
+}
+
 /* do_syscall() should always have a single exit point at the end so
    that actions, such as logging of syscall results, can be performed.
    All errnos that do_syscall() returns must be -TARGET_<errcode>. */
@@ -6113,7 +6226,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 
             if (!(p = lock_user_string(arg1)))
                 goto execve_efault;
-            ret = get_errno(execve(p, argp, envp));
+
+            ret = qemu_execve(p, argp, envp);
+
             unlock_user(p, arg1, 0);
 
             goto execve_end;
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [Qemu-devel] [PATCH v2] linux-user: add option to intercept execve() syscalls
  2016-01-27  8:49 [Qemu-devel] [PATCH v2] linux-user: add option to intercept execve() syscalls Petros Angelatos
@ 2016-02-10 19:03 ` Laurent Vivier
  0 siblings, 0 replies; 2+ messages in thread
From: Laurent Vivier @ 2016-02-10 19:03 UTC (permalink / raw)
  To: Petros Angelatos, qemu-devel; +Cc: lucas.kaldstrom, peter.maydell, riku.voipio



Le 27/01/2016 09:49, Petros Angelatos a écrit :
> From: Petros Angelatos <petrosagg@resin.io>
> 
> In order for one to use QEMU user mode emulation under a chroot, it is
> required to use binfmt_misc. This can be avoided by QEMU never doing a
> raw execve() to the host system.
> 
> Introduce a new option, -execve, that uses the current QEMU interpreter
> to intercept execve().
> 
> qemu_execve() will prepend the interpreter path , similar to what
> binfmt_misc would do, and then pass the modified execve() to the host.
> 
> It is necessary to parse hashbang scripts in that function otherwise
> the kernel will try to run the interpreter of a script without QEMU and
> get an invalid exec format error.
> 
> Signed-off-by: Petros Angelatos <petrosagg@resin.io>
> ---

You can put here the difference between v1 and v2.

>  linux-user/main.c    |  36 ++++++++++++++++
>  linux-user/qemu.h    |   1 +
>  linux-user/syscall.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 153 insertions(+), 1 deletion(-)
> 
> diff --git a/linux-user/main.c b/linux-user/main.c
> index ee12035..751eafa 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -22,6 +22,7 @@
>  #include <string.h>
>  #include <errno.h>
>  #include <unistd.h>
> +#include <sys/auxv.h>

This line needs to be rebased.

Otherwise:

Tested-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>

>  #include <sys/mman.h>
>  #include <sys/syscall.h>
>  #include <sys/resource.h>
> @@ -79,6 +80,7 @@ static void usage(int exitcode);
>  
>  static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX;
>  const char *qemu_uname_release;
> +const char *qemu_execve_path;
>  
>  /* XXX: on x86 MAP_GROWSDOWN only works if ESP <= address + 32, so
>     we allocate a bigger stack. Need a better solution, for example
> @@ -3828,6 +3830,38 @@ static void handle_arg_guest_base(const char *arg)
>      have_guest_base = 1;
>  }
>  
> +static void handle_arg_execve(const char *arg)
> +{
> +    const char *execfn;
> +    char buf[PATH_MAX];
> +    char *ret;
> +    int len;
> +
> +    /* try getauxval() */
> +    execfn = (const char *) getauxval(AT_EXECFN);
> +
> +    if (execfn != 0) {
> +        ret = realpath(execfn, buf);
> +
> +        if (ret != NULL) {
> +            qemu_execve_path = strdup(buf);
> +            return;
> +        }
> +    }
> +
> +    /* try /proc/self/exe */
> +    len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
> +
> +    if (len != -1) {
> +        buf[len] = '\0';
> +        qemu_execve_path = strdup(buf);
> +        return;
> +    }
> +
> +    fprintf(stderr, "qemu_execve: unable to determine intepreter's path\n");
> +    exit(EXIT_FAILURE);
> +}
> +
>  static void handle_arg_reserved_va(const char *arg)
>  {
>      char *p;
> @@ -3913,6 +3947,8 @@ static const struct qemu_argument arg_table[] = {
>       "uname",      "set qemu uname release string to 'uname'"},
>      {"B",          "QEMU_GUEST_BASE",  true,  handle_arg_guest_base,
>       "address",    "set guest_base address to 'address'"},
> +    {"execve",     "QEMU_EXECVE",      false, handle_arg_execve,
> +     "",           "use this interpreter when a process calls execve()"},
>      {"R",          "QEMU_RESERVED_VA", true,  handle_arg_reserved_va,
>       "size",       "reserve 'size' bytes for guest virtual address space"},
>      {"d",          "QEMU_LOG",         true,  handle_arg_log,
> diff --git a/linux-user/qemu.h b/linux-user/qemu.h
> index bd90cc3..0d9b058 100644
> --- a/linux-user/qemu.h
> +++ b/linux-user/qemu.h
> @@ -140,6 +140,7 @@ void init_task_state(TaskState *ts);
>  void task_settid(TaskState *);
>  void stop_all_tasks(void);
>  extern const char *qemu_uname_release;
> +extern const char *qemu_execve_path;
>  extern unsigned long mmap_min_addr;
>  
>  /* ??? See if we can avoid exposing so much of the loader internals.  */
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index 0cbace4..4755978 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -111,6 +111,7 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
>  #include <linux/route.h>
>  #include <linux/filter.h>
>  #include <linux/blkpg.h>
> +#include <linux/binfmts.h>
>  #include "linux_loop.h"
>  #include "uname.h"
>  
> @@ -5854,6 +5855,118 @@ static target_timer_t get_timer_id(abi_long arg)
>      return timerid;
>  }
>  
> +/* qemu_execve() Must return target values and target errnos. */
> +static abi_long qemu_execve(char *filename, char *argv[],
> +                  char *envp[])
> +{
> +    char *i_arg = NULL, *i_name = NULL;
> +    char **new_argp;
> +    int argc, fd, ret, i, offset = 3;
> +    char *cp;
> +    char buf[BINPRM_BUF_SIZE];
> +
> +    /* normal execve case */
> +    if (qemu_execve_path == NULL || *qemu_execve_path == 0) {
> +        return get_errno(execve(filename, argv, envp));
> +    }
> +
> +    for (argc = 0; argv[argc] != NULL; argc++) {
> +        /* nothing */ ;
> +    }
> +
> +    fd = open(filename, O_RDONLY);
> +    if (fd == -1) {
> +        return get_errno(fd);
> +    }
> +
> +    ret = read(fd, buf, BINPRM_BUF_SIZE);
> +    if (ret == -1) {
> +        close(fd);
> +        return get_errno(ret);
> +    }
> +
> +    /* if we have less than 2 bytes, we can guess it is not executable */
> +    if (ret < 2) {
> +        close(fd);
> +        return -host_to_target_errno(ENOEXEC);
> +    }
> +
> +    close(fd);
> +
> +    /* adapted from the kernel
> +     * https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/fs/binfmt_script.c
> +     */
> +    if ((buf[0] == '#') && (buf[1] == '!')) {
> +        /*
> +         * This section does the #! interpretation.
> +         * Sorta complicated, but hopefully it will work.  -TYT
> +         */
> +
> +        buf[BINPRM_BUF_SIZE - 1] = '\0';
> +        cp = strchr(buf, '\n');
> +        if (cp == NULL) {
> +            cp = buf + BINPRM_BUF_SIZE - 1;
> +        }
> +        *cp = '\0';
> +        while (cp > buf) {
> +            cp--;
> +            if ((*cp == ' ') || (*cp == '\t')) {
> +                *cp = '\0';
> +            } else {
> +                break;
> +            }
> +        }
> +        for (cp = buf + 2; (*cp == ' ') || (*cp == '\t'); cp++) {
> +            /* nothing */ ;
> +        }
> +        if (*cp == '\0') {
> +            return -ENOEXEC; /* No interpreter name found */
> +        }
> +        i_name = cp;
> +        i_arg = NULL;
> +        for ( ; *cp && (*cp != ' ') && (*cp != '\t'); cp++) {
> +            /* nothing */ ;
> +        }
> +        while ((*cp == ' ') || (*cp == '\t')) {
> +            *cp++ = '\0';
> +        }
> +        if (*cp) {
> +            i_arg = cp;
> +        }
> +
> +        if (i_arg) {
> +            offset = 5;
> +        } else {
> +            offset = 4;
> +        }
> +    }
> +
> +    new_argp = alloca((argc + offset + 1) * sizeof(void *));
> +
> +    /* Copy the original arguments with offset */
> +    for (i = 0; i < argc; i++) {
> +        new_argp[i + offset] = argv[i];
> +    }
> +
> +    new_argp[0] = strdup(qemu_execve_path);
> +    new_argp[1] = strdup("-0");
> +    new_argp[offset] = filename;
> +    new_argp[argc + offset] = NULL;
> +
> +    if (i_name) {
> +        new_argp[2] = i_name;
> +        new_argp[3] = i_name;
> +
> +        if (i_arg) {
> +            new_argp[4] = i_arg;
> +        }
> +    } else {
> +        new_argp[2] = argv[0];
> +    }
> +
> +    return get_errno(execve(qemu_execve_path, new_argp, envp));
> +}
> +
>  /* do_syscall() should always have a single exit point at the end so
>     that actions, such as logging of syscall results, can be performed.
>     All errnos that do_syscall() returns must be -TARGET_<errcode>. */
> @@ -6113,7 +6226,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
>  
>              if (!(p = lock_user_string(arg1)))
>                  goto execve_efault;
> -            ret = get_errno(execve(p, argp, envp));
> +
> +            ret = qemu_execve(p, argp, envp);
> +
>              unlock_user(p, arg1, 0);
>  
>              goto execve_end;
> 

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2016-02-10 19:03 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-01-27  8:49 [Qemu-devel] [PATCH v2] linux-user: add option to intercept execve() syscalls Petros Angelatos
2016-02-10 19:03 ` Laurent Vivier

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).