Re: [PATCH v3 06/19] RISC-V: KVM: Implement VCPU interrupts and requests handling

From: Anup Patel <anup@brainfault.org>
To: Anup Patel <Anup.Patel@wdc.com>
Cc: Damien Le Moal <Damien.LeMoal@wdc.com>,
	Daniel Lezcano <daniel.lezcano@linaro.org>,
	"kvm@vger.kernel.org" <kvm@vger.kernel.org>,
	Radim K <rkrcmar@redhat.com>, Palmer Dabbelt <palmer@sifive.com>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	Christoph Hellwig <hch@infradead.org>,
	Atish Patra <Atish.Patra@wdc.com>,
	Alistair Francis <Alistair.Francis@wdc.com>,
	Paul Walmsley <paul.walmsley@sifive.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	"linux-riscv@lists.infradead.org"
	<linux-riscv@lists.infradead.org>
Subject: Re: [PATCH v3 06/19] RISC-V: KVM: Implement VCPU interrupts and requests handling
Date: Tue, 6 Aug 2019 08:48:27 +0530	[thread overview]
Message-ID: <CAAhSdy0mcxWJ=-vKYnKR+MfqDL-Mdc8Bg+=qpLp40Kb0zGwmQA@mail.gmail.com> (raw)
In-Reply-To: <20190805134201.2814-7-anup.patel@wdc.com>

On Mon, Aug 5, 2019 at 7:13 PM Anup Patel <Anup.Patel@wdc.com> wrote:
>
> This patch implements VCPU interrupts and requests which are both
> asynchronous events.
>
> The VCPU interrupts can be set/unset using KVM_INTERRUPT ioctl from
> user-space. In future, the in-kernel IRQCHIP emulation will use
> kvm_riscv_vcpu_set_interrupt() and kvm_riscv_vcpu_unset_interrupt()
> functions to set/unset VCPU interrupts.
>
> Important VCPU requests implemented by this patch are:
> KVM_REQ_SLEEP       - set whenever VCPU itself goes to sleep state
> KVM_REQ_VCPU_RESET  - set whenever VCPU reset is requested
>
> The WFI trap-n-emulate (added later) will use KVM_REQ_SLEEP request
> and kvm_riscv_vcpu_has_interrupt() function.
>
> The KVM_REQ_VCPU_RESET request will be used by SBI emulation (added
> later) to power-up a VCPU in power-off state. The user-space can use
> the GET_MPSTATE/SET_MPSTATE ioctls to get/set power state of a VCPU.
>
> Signed-off-by: Anup Patel <anup.patel@wdc.com>
> ---
>  arch/riscv/include/asm/kvm_host.h |  23 ++++
>  arch/riscv/include/uapi/asm/kvm.h |   3 +
>  arch/riscv/kvm/main.c             |   2 +
>  arch/riscv/kvm/vcpu.c             | 169 +++++++++++++++++++++++++++---
>  4 files changed, 184 insertions(+), 13 deletions(-)
>
> diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
> index dab32c9c3470..04804f14f760 100644
> --- a/arch/riscv/include/asm/kvm_host.h
> +++ b/arch/riscv/include/asm/kvm_host.h
> @@ -122,6 +122,21 @@ struct kvm_vcpu_arch {
>         /* CPU CSR context upon Guest VCPU reset */
>         struct kvm_vcpu_csr guest_reset_csr;
>
> +       /*
> +        * VCPU interrupts
> +        *
> +        * We have a lockless approach for tracking pending VCPU interrupts
> +        * implemented using atomic bitops. The irqs_pending bitmap represent
> +        * pending interrupts whereas irqs_pending_mask represent bits changed
> +        * in irqs_pending. Our approach is modeled around multiple producer
> +        * and single consumer problem where the consumer is the VCPU itself.
> +        */
> +       unsigned long irqs_pending;
> +       unsigned long irqs_pending_mask;
> +
> +       /* VCPU power-off state */
> +       bool power_off;
> +
>         /* Don't run the VCPU (blocked) */
>         bool pause;
>
> @@ -146,4 +161,12 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
>
>  static inline void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch) {}
>
> +int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
> +int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
> +void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
> +void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu);
> +bool kvm_riscv_vcpu_has_interrupt(struct kvm_vcpu *vcpu);
> +void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
> +void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
> +
>  #endif /* __RISCV_KVM_HOST_H__ */
> diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
> index d15875818b6e..6dbc056d58ba 100644
> --- a/arch/riscv/include/uapi/asm/kvm.h
> +++ b/arch/riscv/include/uapi/asm/kvm.h
> @@ -18,6 +18,9 @@
>
>  #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
>
> +#define KVM_INTERRUPT_SET      -1U
> +#define KVM_INTERRUPT_UNSET    -2U
> +
>  /* for KVM_GET_REGS and KVM_SET_REGS */
>  struct kvm_regs {
>  };
> diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
> index a26a68df7cfc..f4a7a3c67f8e 100644
> --- a/arch/riscv/kvm/main.c
> +++ b/arch/riscv/kvm/main.c
> @@ -48,6 +48,8 @@ int kvm_arch_hardware_enable(void)
>         hideleg |= SIE_SEIE;
>         csr_write(CSR_HIDELEG, hideleg);
>
> +       csr_write(CSR_VSIP, 0);
> +
>         return 0;
>  }
>
> diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
> index ff08d138f7c3..455b0f40832b 100644
> --- a/arch/riscv/kvm/vcpu.c
> +++ b/arch/riscv/kvm/vcpu.c
> @@ -40,6 +40,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
>                                  RISCV_ISA_EXT_s | \
>                                  RISCV_ISA_EXT_u)
>
> +static DEFINE_PER_CPU(unsigned long, vsip_shadow);
> +

With introduction of compile-time percpu variable here, the insmod
fails to insert KVM RISC-V as loadable module. This looks like some
issue with arch/riscv/kernel/module.c.

I tried run-time percpu variables using alloc_percpu() API and it
works perfectly fine with it. I will make vsip_shadow as run-time
percpu variable instead of compile-time in v4 series.

>  static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
>  {
>         struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
> @@ -50,6 +52,9 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
>         memcpy(csr, reset_csr, sizeof(*csr));
>
>         memcpy(cntx, reset_cntx, sizeof(*cntx));
> +
> +       WRITE_ONCE(vcpu->arch.irqs_pending, 0);
> +       WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
>  }
>
>  struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
> @@ -116,8 +121,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>
>  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
>  {
> -       /* TODO: */
> -       return 0;
> +       return READ_ONCE(vcpu->arch.irqs_pending) & (1UL << IRQ_S_TIMER);
>  }
>
>  void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
> @@ -130,20 +134,18 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
>
>  int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
>  {
> -       /* TODO: */
> -       return 0;
> +       return (kvm_riscv_vcpu_has_interrupt(vcpu) &&
> +               !vcpu->arch.power_off && !vcpu->arch.pause);
>  }
>
>  int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
>  {
> -       /* TODO: */
> -       return 0;
> +       return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
>  }
>
>  bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
>  {
> -       /* TODO: */
> -       return false;
> +       return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false;
>  }
>
>  bool kvm_arch_has_vcpu_debugfs(void)
> @@ -164,7 +166,21 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
>  long kvm_arch_vcpu_async_ioctl(struct file *filp,
>                                unsigned int ioctl, unsigned long arg)
>  {
> -       /* TODO; */
> +       struct kvm_vcpu *vcpu = filp->private_data;
> +       void __user *argp = (void __user *)arg;
> +
> +       if (ioctl == KVM_INTERRUPT) {
> +               struct kvm_interrupt irq;
> +
> +               if (copy_from_user(&irq, argp, sizeof(irq)))
> +                       return -EFAULT;
> +
> +               if (irq.irq == KVM_INTERRUPT_SET)
> +                       return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_S_EXT);
> +               else
> +                       return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_S_EXT);
> +       }
> +
>         return -ENOIOCTLCMD;
>  }
>
> @@ -213,18 +229,103 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
>         return -EINVAL;
>  }
>
> +void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu)
> +{
> +       struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
> +       unsigned long mask, val;
> +
> +       if (READ_ONCE(vcpu->arch.irqs_pending_mask)) {
> +               mask = xchg_acquire(&vcpu->arch.irqs_pending_mask, 0);
> +               val = READ_ONCE(vcpu->arch.irqs_pending) & mask;
> +
> +               csr->vsip &= ~mask;
> +               csr->vsip |= val;
> +       }
> +}
> +
> +void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
> +{
> +       vcpu->arch.guest_csr.vsip = csr_read(CSR_VSIP);
> +       vcpu->arch.guest_csr.vsie = csr_read(CSR_VSIE);
> +}
> +
> +int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
> +{
> +       if (irq != IRQ_S_SOFT &&
> +           irq != IRQ_S_TIMER &&
> +           irq != IRQ_S_EXT)
> +               return -EINVAL;
> +
> +       set_bit(irq, &vcpu->arch.irqs_pending);
> +       smp_mb__before_atomic();
> +       set_bit(irq, &vcpu->arch.irqs_pending_mask);
> +
> +       kvm_vcpu_kick(vcpu);
> +
> +       return 0;
> +}
> +
> +int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
> +{
> +       if (irq != IRQ_S_SOFT &&
> +           irq != IRQ_S_TIMER &&
> +           irq != IRQ_S_EXT)
> +               return -EINVAL;
> +
> +       clear_bit(irq, &vcpu->arch.irqs_pending);
> +       smp_mb__before_atomic();
> +       set_bit(irq, &vcpu->arch.irqs_pending_mask);
> +
> +       return 0;
> +}
> +
> +bool kvm_riscv_vcpu_has_interrupt(struct kvm_vcpu *vcpu)
> +{
> +       return (READ_ONCE(vcpu->arch.irqs_pending) &
> +               vcpu->arch.guest_csr.vsie) ? true : false;
> +}
> +
> +void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
> +{
> +       vcpu->arch.power_off = true;
> +       kvm_make_request(KVM_REQ_SLEEP, vcpu);
> +       kvm_vcpu_kick(vcpu);
> +}
> +
> +void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
> +{
> +       vcpu->arch.power_off = false;
> +       kvm_vcpu_wake_up(vcpu);
> +}
> +
>  int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
>                                     struct kvm_mp_state *mp_state)
>  {
> -       /* TODO: */
> +       if (vcpu->arch.power_off)
> +               mp_state->mp_state = KVM_MP_STATE_STOPPED;
> +       else
> +               mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
> +
>         return 0;
>  }
>
>  int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
>                                     struct kvm_mp_state *mp_state)
>  {
> -       /* TODO: */
> -       return 0;
> +       int ret = 0;
> +
> +       switch (mp_state->mp_state) {
> +       case KVM_MP_STATE_RUNNABLE:
> +               vcpu->arch.power_off = false;
> +               break;
> +       case KVM_MP_STATE_STOPPED:
> +               kvm_riscv_vcpu_power_off(vcpu);
> +               break;
> +       default:
> +               ret = -EINVAL;
> +       }
> +
> +       return ret;
>  }
>
>  int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> @@ -248,7 +349,37 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>
>  static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
>  {
> -       /* TODO: */
> +       struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
> +
> +       if (kvm_request_pending(vcpu)) {
> +               if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
> +                       swait_event_interruptible_exclusive(*wq,
> +                                               ((!vcpu->arch.power_off) &&
> +                                               (!vcpu->arch.pause)));
> +
> +                       if (vcpu->arch.power_off || vcpu->arch.pause) {
> +                               /*
> +                                * Awaken to handle a signal, request to
> +                                * sleep again later.
> +                                */
> +                               kvm_make_request(KVM_REQ_SLEEP, vcpu);
> +                       }
> +               }
> +
> +               if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
> +                       kvm_riscv_reset_vcpu(vcpu);
> +       }
> +}
> +
> +static void kvm_riscv_update_vsip(struct kvm_vcpu *vcpu)
> +{
> +       struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
> +       unsigned long *vsip = this_cpu_ptr(&vsip_shadow);
> +
> +       if (*vsip != csr->vsip) {
> +               csr_write(CSR_VSIP, csr->vsip);
> +               *vsip = csr->vsip;
> +       }
>  }
>
>  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
> @@ -311,6 +442,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
>                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
>                 smp_mb__after_srcu_read_unlock();
>
> +               /*
> +                * We might have got VCPU interrupts updated asynchronously
> +                * so update it in HW.
> +                */
> +               kvm_riscv_vcpu_flush_interrupts(vcpu);
> +
> +               /* Update VSIP CSR for current CPU */
> +               kvm_riscv_update_vsip(vcpu);
> +
>                 if (ret <= 0 ||
>                     kvm_request_pending(vcpu)) {
>                         vcpu->mode = OUTSIDE_GUEST_MODE;
> @@ -334,6 +474,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
>                 scause = csr_read(CSR_SCAUSE);
>                 stval = csr_read(CSR_STVAL);
>
> +               /* Syncup interrupts state with HW */
> +               kvm_riscv_vcpu_sync_interrupts(vcpu);
> +
>                 /*
>                  * We may have taken a host interrupt in VS/VU-mode (i.e.
>                  * while executing the guest). This interrupt is still
> --
> 2.17.1
>

Regards,
Anup

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv