qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: qemu-devel@nongnu.org, pbonzini@redhat.com, riku.voipio@iki.fi,
	laurent@vivier.eu, peter.maydell@linaro.org
Subject: Re: [PATCH 3/3] linux-user/i386: Emulate x86_64 vsyscalls
Date: Thu, 16 Jan 2020 16:26:22 +0000	[thread overview]
Message-ID: <87ftgfl64x.fsf@linaro.org> (raw)
In-Reply-To: <20200114210921.11216-4-richard.henderson@linaro.org>


Richard Henderson <richard.henderson@linaro.org> writes:

> Notice the magic page during translate, much like we already
> do for the arm32 commpage.  At runtime, raise an exception to
> return cpu_loop for emulation.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/i386/cpu.h          |   1 +
>  linux-user/i386/cpu_loop.c | 104 +++++++++++++++++++++++++++++++++++++
>  target/i386/translate.c    |  16 +++++-
>  3 files changed, 120 insertions(+), 1 deletion(-)
>
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 164d038d1f..3fb2d2a986 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -1000,6 +1000,7 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
>  
>  #define EXCP_VMEXIT     0x100 /* only for system emulation */
>  #define EXCP_SYSCALL    0x101 /* only for user emulation */
> +#define EXCP_VSYSCALL   0x102 /* only for user emulation */
>  
>  /* i386-specific interrupt pending bits.  */
>  #define CPU_INTERRUPT_POLL      CPU_INTERRUPT_TGT_EXT_1
> diff --git a/linux-user/i386/cpu_loop.c b/linux-user/i386/cpu_loop.c
> index e217cca5ee..8b7c9f7337 100644
> --- a/linux-user/i386/cpu_loop.c
> +++ b/linux-user/i386/cpu_loop.c
> @@ -92,6 +92,105 @@ static void gen_signal(CPUX86State *env, int sig, int code, abi_ptr addr)
>      queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
>  }
>  
> +#ifdef TARGET_X86_64
> +static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len)
> +{
> +    /*
> +     * For all the vsyscalls, NULL means "don't write anything" not
> +     * "write it at address 0".
> +     */
> +    if (addr == 0 || access_ok(VERIFY_WRITE, addr, len)) {
> +        return true;
> +    }
> +
> +    gen_signal(env, TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr);
> +    return false;
> +}
> +
> +/*
> + * Since v3.1, the kernel traps and emulates the vsyscall page.
> + * Entry points other than the official generate SIGSEGV.
> + */
> +static void emulate_vsyscall(CPUX86State *env)
> +{
> +    int syscall;
> +    abi_ulong ret;
> +    uint64_t caller;
> +
> +    /*
> +     * Validate the entry point.  We have already validated the page
> +     * during translation, now verify the offset.
> +     */
> +    switch (env->eip & ~TARGET_PAGE_MASK) {
> +    case 0x000:
> +        syscall = TARGET_NR_gettimeofday;
> +        break;
> +    case 0x400:
> +        syscall = TARGET_NR_time;
> +        break;
> +    case 0x800:
> +        syscall = TARGET_NR_getcpu;
> +        break;
> +    default:
> +    sigsegv:
> +        /* Like force_sig(SIGSEGV).  */
> +        gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
> +        return;
> +    }
> +
> +    /*
> +     * Validate the return address.
> +     * Note that the kernel treats this the same as an invalid entry point.
> +     */
> +    if (get_user_u64(caller, env->regs[R_ESP])) {
> +        goto sigsegv;
> +    }
> +
> +    /*
> +     * Validate the the pointer arguments.
> +     */
> +    switch (syscall) {
> +    case TARGET_NR_gettimeofday:
> +        if (!write_ok_or_segv(env, env->regs[R_EDI],
> +                              sizeof(struct target_timeval)) ||
> +            !write_ok_or_segv(env, env->regs[R_ESI],
> +                              sizeof(struct target_timezone))) {
> +            return;
> +        }
> +        break;
> +    case TARGET_NR_time:
> +        if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) {
> +            return;
> +        }
> +        break;
> +    case TARGET_NR_getcpu:
> +        if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) ||
> +            !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) {
> +            return;
> +        }
> +        break;
> +    default:
> +        g_assert_not_reached();
> +    }
> +
> +    /*
> +     * Perform the syscall.  None of the vsyscalls should need restarting,
> +     * and all faults should have been caught above.
> +     */
> +    ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI],
> +                     env->regs[R_EDX], env->regs[10], env->regs[8],
> +                     env->regs[9], 0, 0);

How come the register ABI to the syscall is different to the others. I
can see why syscall doesn't come from EAX but the others are a different
set to normal syscalls which might be why:

> +    g_assert(ret != -TARGET_ERESTARTSYS);
> +    g_assert(ret != -TARGET_QEMU_ESIGRETURN);
> +    g_assert(ret != -TARGET_EFAULT);

I'm seeing a EFAULT on the gettimeofday failure:

   #0  do_syscall (cpu_env=cpu_env@entry=0x5555577d2b10, num=num@entry=96, arg1=0, arg2=0, arg3=4211016, arg4=8, arg5=274888677184, arg6=274886295415, arg7=0, arg8=0) at /home/alex/lsrc/qemu.git/linux-user/syscall.c:12076                                               
   #1  0x0000555555609b6e in emulate_vsyscall (env=0x5555577d2b10) at /home/alex/lsrc/qemu.git/linux-user/x86_64/../i386/cpu_loop.c:180
   #2  cpu_loop (env=0x5555577d2b10) at /home/alex/lsrc/qemu.git/linux-user/x86_64/../i386/cpu_loop.c:246                              
   #3  0x000055555559640e in main (argc=<optimized out>, argv=<optimized
   #out>, envp=<optimized out>) at
   #/home/alex/lsrc/qemu.git/linux-user/main.c:865

arg1/arg2 don't seem right here.

> +    env->regs[R_EAX] = ret;
> +
> +    /* Emulate a ret instruction to leave the vsyscall page.  */
> +    env->eip = caller;
> +    env->regs[R_ESP] += 8;
> +}
> +#endif
> +
>  void cpu_loop(CPUX86State *env)
>  {
>      CPUState *cs = env_cpu(env);
> @@ -141,6 +240,11 @@ void cpu_loop(CPUX86State *env)
>                  env->regs[R_EAX] = ret;
>              }
>              break;
> +#endif
> +#ifdef TARGET_X86_64
> +        case EXCP_VSYSCALL:
> +            emulate_vsyscall(env);
> +            break;
>  #endif
>          case EXCP0B_NOSEG:
>          case EXCP0C_STACK:
> diff --git a/target/i386/translate.c b/target/i386/translate.c
> index 7c99ef1385..391b4ef149 100644
> --- a/target/i386/translate.c
> +++ b/target/i386/translate.c
> @@ -8555,7 +8555,21 @@ static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
>  static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
>  {
>      DisasContext *dc = container_of(dcbase, DisasContext, base);
> -    target_ulong pc_next = disas_insn(dc, cpu);
> +    target_ulong pc_next;
> +
> +#if defined(TARGET_X86_64) && \
> +    defined(CONFIG_USER_ONLY) && \
> +    defined(CONFIG_LINUX)
> +    /*
> +     * Detect entry into the vsyscall page and invoke the syscall.
> +     */
> +    if ((dc->base.pc_next & TARGET_PAGE_MASK) == 0xffffffffff600000ull) {
> +        gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
> +        return;
> +    }
> +#endif
> +
> +    pc_next = disas_insn(dc, cpu);
>  
>      if (dc->tf || (dc->base.tb->flags & HF_INHIBIT_IRQ_MASK)) {
>          /* if single step mode, we generate only one instruction and


-- 
Alex Bennée


  parent reply	other threads:[~2020-01-16 16:27 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-14 21:09 [PATCH 0/3] linux-user: Implement x86_64 vsyscalls Richard Henderson
2020-01-14 21:09 ` [PATCH 1/3] target/i386: Renumber EXCP_SYSCALL Richard Henderson
2020-01-15  7:22   ` Philippe Mathieu-Daudé
2020-01-15  9:55   ` Alex Bennée
2020-01-14 21:09 ` [PATCH 2/3] linux-user/i386: Split out gen_signal Richard Henderson
2020-01-15  7:22   ` Philippe Mathieu-Daudé
2020-01-15  9:58   ` Alex Bennée
2020-01-14 21:09 ` [PATCH 3/3] linux-user/i386: Emulate x86_64 vsyscalls Richard Henderson
2020-01-16 10:51   ` Alex Bennée
2020-01-16 16:26   ` Alex Bennée [this message]
2020-01-16 18:19     ` Richard Henderson
2020-01-16 18:22       ` Richard Henderson
2020-01-16 20:15       ` Alex Bennée
2020-01-14 23:35 ` [PATCH 0/3] linux-user: Implement " Paolo Bonzini
2020-01-15  7:01 ` Laurent Desnogues
2020-01-15 10:14 ` Laurent Vivier
2020-01-15 17:28   ` Richard Henderson
2020-01-16 14:05 ` Alex Bennée
2020-01-16 19:37   ` Richard Henderson
2020-01-16 14:30 ` Alex Bennée
2020-01-16 18:31   ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87ftgfl64x.fsf@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=laurent@vivier.eu \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=riku.voipio@iki.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).