All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stefan Hajnoczi <stefanha@redhat.com>
To: Elena Afanasova <eafanasova@gmail.com>
Cc: qemu-devel@nongnu.org, kvm@vger.kernel.org, jag.raman@oracle.com,
	elena.ufimtseva@oracle.com
Subject: Re: [PATCH RFC] hw/misc/pc-testdev: add support for ioregionfd testing
Date: Mon, 1 Mar 2021 17:47:41 +0000	[thread overview]
Message-ID: <YD0ovavIuThCSS5J@stefanha-x1.localdomain> (raw)
In-Reply-To: <20210301131628.5211-1-eafanasova@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 8206 bytes --]

On Mon, Mar 01, 2021 at 04:16:28PM +0300, Elena Afanasova wrote:

Thanks for posting this piece of the ioregionfd testing infrastructure!

Please include a commit description even if it's just an RFC patch.
People on qemu-devel may not be aware of ioregionfd or the purpose of
this patch.

Something like:

  Add ioregionfd support to pc-testdev for use with kvm-unit-tests that
  exercise the new KVM ioregionfd API. ioregionfd is a new MMIO/PIO
  dispatch mechanism for KVM. The kernel patches implementing ioregionfd
  are available here:

    https://patchwork.kernel.org/project/kvm/list/?series=436083

  The kvm-unit-test will create one read FIFO and one write FIFO and
  then invoke QEMU like this:

    --device pc-testdev,addr=0x100000000,size=4096,rfifo=path/to/rfifo,wfifo=path/to/wfifo

  QEMU does not actually process the read FIFO or write FIFO. It simply
  registers an ioregionfd with KVM. From then on KVM will communicate
  directly with the kvm-unit-test via the read FIFO and write FIFO that
  were provided. This allows test cases to handle MMIO/PIO accesses.

Elena and I discussed the long-term QEMU API for ioregionfd on IRC.
Eventually this test device could use it instead of directly calling
kvm_vm_ioctl(). The new QEMU API would be
memory_region_set_aio_context(AioContext *aio_context).

When ioregionfd is available an ioregion would be registered with KVM
and AioContext will have a read fd to handle MMIO/PIO accesses for any
of its ioregions. This way device emulation can run outside the vcpu
thread.

When ioregionfd is not available QEMU can emulate it by writing a struct
ioregionfd_cmd to the write fd from the vcpu thread. The relevant
AioContext will handle the read fd as normal and dispatch the MMIO/PIO
access to the MemoryRegion.

> Signed-off-by: Elena Afanasova <eafanasova@gmail.com>
> ---
>  hw/misc/pc-testdev.c      | 74 +++++++++++++++++++++++++++++++++++++++
>  include/sysemu/kvm.h      |  4 +--
>  linux-headers/linux/kvm.h | 24 +++++++++++++
>  3 files changed, 100 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/misc/pc-testdev.c b/hw/misc/pc-testdev.c
> index e389651869..38355923ca 100644
> --- a/hw/misc/pc-testdev.c
> +++ b/hw/misc/pc-testdev.c
> @@ -40,6 +40,9 @@
>  #include "hw/irq.h"
>  #include "hw/isa/isa.h"
>  #include "qom/object.h"
> +#include "sysemu/kvm.h"
> +#include <linux/kvm.h>
> +#include "hw/qdev-properties.h"
>  
>  #define IOMEM_LEN    0x10000
>  
> @@ -53,6 +56,15 @@ struct PCTestdev {
>      MemoryRegion iomem;
>      uint32_t ioport_data;
>      char iomem_buf[IOMEM_LEN];
> +
> +    uint64_t guest_paddr;
> +    uint64_t memory_size;
> +    char *read_fifo;
> +    char *write_fifo;
> +    bool posted_writes;
> +    bool pio;
> +    int rfd;
> +    int wfd;
>  };
>  
>  #define TYPE_TESTDEV "pc-testdev"
> @@ -169,6 +181,9 @@ static const MemoryRegionOps test_iomem_ops = {
>  
>  static void testdev_realizefn(DeviceState *d, Error **errp)
>  {
> +    struct kvm_ioregion ioreg;

Zero this so that user_data is always 0 instead of undefined data from
the stack?

> +    int flags = 0;
> +
>      ISADevice *isa = ISA_DEVICE(d);
>      PCTestdev *dev = TESTDEV(d);
>      MemoryRegion *mem = isa_address_space(isa);
> @@ -191,14 +206,73 @@ static void testdev_realizefn(DeviceState *d, Error **errp)
>      memory_region_add_subregion(io,  0xe8,       &dev->ioport_byte);
>      memory_region_add_subregion(io,  0x2000,     &dev->irq);
>      memory_region_add_subregion(mem, 0xff000000, &dev->iomem);
> +
> +    if (!dev->guest_paddr || !dev->write_fifo) {
> +        return;
> +    }
> +
> +    dev->wfd = open(dev->write_fifo, O_WRONLY);
> +    if (dev->wfd < 0) {
> +        error_report("failed to open write fifo %s", dev->write_fifo);
> +        return;
> +    }
> +
> +    if (dev->read_fifo) {
> +        dev->rfd = open(dev->read_fifo, O_RDONLY);
> +        if (dev->rfd < 0) {
> +            error_report("failed to open read fifo %s", dev->read_fifo);
> +            close(dev->wfd);
> +            return;
> +        }
> +    }
> +
> +    flags |= dev->pio ? KVM_IOREGION_PIO : 0;
> +    flags |= dev->posted_writes ? KVM_IOREGION_POSTED_WRITES : 0;
> +    ioreg.guest_paddr = dev->guest_paddr;
> +    ioreg.memory_size = dev->memory_size;
> +    ioreg.write_fd = dev->wfd;
> +    ioreg.read_fd = dev->rfd;
> +    ioreg.flags = flags;
> +    kvm_vm_ioctl(kvm_state, KVM_SET_IOREGION, &ioreg);

Printing an error message would be useful when this fails (e.g. when the
region overlaps with an existing ioeventfd/ioregionfd). 

> +}
> +
> +static void testdev_unrealizefn(DeviceState *d)
> +{
> +    struct kvm_ioregion ioreg;
> +    PCTestdev *dev = TESTDEV(d);
> +
> +    if (!dev->guest_paddr || !dev->write_fifo) {
> +        return;
> +    }
> +
> +    ioreg.guest_paddr = dev->guest_paddr;
> +    ioreg.memory_size = dev->memory_size;
> +    ioreg.flags = KVM_IOREGION_DEASSIGN;
> +    kvm_vm_ioctl(kvm_state, KVM_SET_IOREGION, &ioreg);
> +    close(dev->wfd);
> +    if (dev->rfd > 0) {
> +        close(dev->rfd);
> +    }
>  }
>  
> +static Property ioregionfd_properties[] = {
> +    DEFINE_PROP_UINT64("addr", PCTestdev, guest_paddr, 0),
> +    DEFINE_PROP_UINT64("size", PCTestdev, memory_size, 0),
> +    DEFINE_PROP_STRING("rfifo", PCTestdev, read_fifo),
> +    DEFINE_PROP_STRING("wfifo", PCTestdev, write_fifo),
> +    DEFINE_PROP_BOOL("pio", PCTestdev, pio, false),
> +    DEFINE_PROP_BOOL("pw", PCTestdev, posted_writes, false),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
>  static void testdev_class_init(ObjectClass *klass, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(klass);
>  
>      set_bit(DEVICE_CATEGORY_MISC, dc->categories);
>      dc->realize = testdev_realizefn;
> +    dc->unrealize = testdev_unrealizefn;
> +    device_class_set_props(dc, ioregionfd_properties);
>  }
>  
>  static const TypeInfo testdev_info = {
> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> index 687c598be9..d68728764a 100644
> --- a/include/sysemu/kvm.h
> +++ b/include/sysemu/kvm.h
> @@ -234,6 +234,8 @@ int kvm_has_intx_set_mask(void);
>  bool kvm_arm_supports_user_irq(void);
>  
>  
> +int kvm_vm_ioctl(KVMState *s, int type, ...);
> +
>  #ifdef NEED_CPU_H
>  #include "cpu.h"
>  
> @@ -257,8 +259,6 @@ void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align, bool shared));
>  
>  int kvm_ioctl(KVMState *s, int type, ...);
>  
> -int kvm_vm_ioctl(KVMState *s, int type, ...);
> -
>  int kvm_vcpu_ioctl(CPUState *cpu, int type, ...);
>  
>  /**
> diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
> index 020b62a619..c426fa1e56 100644
> --- a/linux-headers/linux/kvm.h
> +++ b/linux-headers/linux/kvm.h
> @@ -733,6 +733,29 @@ struct kvm_ioeventfd {
>  	__u8  pad[36];
>  };
>  
> +enum {
> +	kvm_ioregion_flag_nr_pio,
> +	kvm_ioregion_flag_nr_posted_writes,
> +	kvm_ioregion_flag_nr_deassign,
> +	kvm_ioregion_flag_nr_max,
> +};
> +
> +#define KVM_IOREGION_PIO (1 << kvm_ioregion_flag_nr_pio)
> +#define KVM_IOREGION_POSTED_WRITES (1 << kvm_ioregion_flag_nr_posted_writes)
> +#define KVM_IOREGION_DEASSIGN (1 << kvm_ioregion_flag_nr_deassign)
> +
> +#define KVM_IOREGION_VALID_FLAG_MASK ((1 << kvm_ioregion_flag_nr_max) - 1)
> +
> +struct kvm_ioregion {
> +	__u64 guest_paddr; /* guest physical address */
> +	__u64 memory_size; /* bytes */
> +	__u64 user_data;
> +	__s32 read_fd;
> +	__s32 write_fd;
> +	__u32 flags;
> +	__u8  pad[28];
> +};
> +
>  #define KVM_X86_DISABLE_EXITS_MWAIT          (1 << 0)
>  #define KVM_X86_DISABLE_EXITS_HLT            (1 << 1)
>  #define KVM_X86_DISABLE_EXITS_PAUSE          (1 << 2)
> @@ -1311,6 +1334,7 @@ struct kvm_vfio_spapr_tce {
>  					struct kvm_userspace_memory_region)
>  #define KVM_SET_TSS_ADDR          _IO(KVMIO,   0x47)
>  #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO,  0x48, __u64)
> +#define KVM_SET_IOREGION          _IOW(KVMIO,  0x49, struct kvm_ioregion)
>  
>  /* enable ucontrol for s390 */
>  struct kvm_s390_ucas_mapping {
> -- 
> 2.25.1
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

WARNING: multiple messages have this Message-ID (diff)
From: Stefan Hajnoczi <stefanha@redhat.com>
To: Elena Afanasova <eafanasova@gmail.com>
Cc: elena.ufimtseva@oracle.com, jag.raman@oracle.com,
	qemu-devel@nongnu.org, kvm@vger.kernel.org
Subject: Re: [PATCH RFC] hw/misc/pc-testdev: add support for ioregionfd testing
Date: Mon, 1 Mar 2021 17:47:41 +0000	[thread overview]
Message-ID: <YD0ovavIuThCSS5J@stefanha-x1.localdomain> (raw)
In-Reply-To: <20210301131628.5211-1-eafanasova@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 8206 bytes --]

On Mon, Mar 01, 2021 at 04:16:28PM +0300, Elena Afanasova wrote:

Thanks for posting this piece of the ioregionfd testing infrastructure!

Please include a commit description even if it's just an RFC patch.
People on qemu-devel may not be aware of ioregionfd or the purpose of
this patch.

Something like:

  Add ioregionfd support to pc-testdev for use with kvm-unit-tests that
  exercise the new KVM ioregionfd API. ioregionfd is a new MMIO/PIO
  dispatch mechanism for KVM. The kernel patches implementing ioregionfd
  are available here:

    https://patchwork.kernel.org/project/kvm/list/?series=436083

  The kvm-unit-test will create one read FIFO and one write FIFO and
  then invoke QEMU like this:

    --device pc-testdev,addr=0x100000000,size=4096,rfifo=path/to/rfifo,wfifo=path/to/wfifo

  QEMU does not actually process the read FIFO or write FIFO. It simply
  registers an ioregionfd with KVM. From then on KVM will communicate
  directly with the kvm-unit-test via the read FIFO and write FIFO that
  were provided. This allows test cases to handle MMIO/PIO accesses.

Elena and I discussed the long-term QEMU API for ioregionfd on IRC.
Eventually this test device could use it instead of directly calling
kvm_vm_ioctl(). The new QEMU API would be
memory_region_set_aio_context(AioContext *aio_context).

When ioregionfd is available an ioregion would be registered with KVM
and AioContext will have a read fd to handle MMIO/PIO accesses for any
of its ioregions. This way device emulation can run outside the vcpu
thread.

When ioregionfd is not available QEMU can emulate it by writing a struct
ioregionfd_cmd to the write fd from the vcpu thread. The relevant
AioContext will handle the read fd as normal and dispatch the MMIO/PIO
access to the MemoryRegion.

> Signed-off-by: Elena Afanasova <eafanasova@gmail.com>
> ---
>  hw/misc/pc-testdev.c      | 74 +++++++++++++++++++++++++++++++++++++++
>  include/sysemu/kvm.h      |  4 +--
>  linux-headers/linux/kvm.h | 24 +++++++++++++
>  3 files changed, 100 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/misc/pc-testdev.c b/hw/misc/pc-testdev.c
> index e389651869..38355923ca 100644
> --- a/hw/misc/pc-testdev.c
> +++ b/hw/misc/pc-testdev.c
> @@ -40,6 +40,9 @@
>  #include "hw/irq.h"
>  #include "hw/isa/isa.h"
>  #include "qom/object.h"
> +#include "sysemu/kvm.h"
> +#include <linux/kvm.h>
> +#include "hw/qdev-properties.h"
>  
>  #define IOMEM_LEN    0x10000
>  
> @@ -53,6 +56,15 @@ struct PCTestdev {
>      MemoryRegion iomem;
>      uint32_t ioport_data;
>      char iomem_buf[IOMEM_LEN];
> +
> +    uint64_t guest_paddr;
> +    uint64_t memory_size;
> +    char *read_fifo;
> +    char *write_fifo;
> +    bool posted_writes;
> +    bool pio;
> +    int rfd;
> +    int wfd;
>  };
>  
>  #define TYPE_TESTDEV "pc-testdev"
> @@ -169,6 +181,9 @@ static const MemoryRegionOps test_iomem_ops = {
>  
>  static void testdev_realizefn(DeviceState *d, Error **errp)
>  {
> +    struct kvm_ioregion ioreg;

Zero this so that user_data is always 0 instead of undefined data from
the stack?

> +    int flags = 0;
> +
>      ISADevice *isa = ISA_DEVICE(d);
>      PCTestdev *dev = TESTDEV(d);
>      MemoryRegion *mem = isa_address_space(isa);
> @@ -191,14 +206,73 @@ static void testdev_realizefn(DeviceState *d, Error **errp)
>      memory_region_add_subregion(io,  0xe8,       &dev->ioport_byte);
>      memory_region_add_subregion(io,  0x2000,     &dev->irq);
>      memory_region_add_subregion(mem, 0xff000000, &dev->iomem);
> +
> +    if (!dev->guest_paddr || !dev->write_fifo) {
> +        return;
> +    }
> +
> +    dev->wfd = open(dev->write_fifo, O_WRONLY);
> +    if (dev->wfd < 0) {
> +        error_report("failed to open write fifo %s", dev->write_fifo);
> +        return;
> +    }
> +
> +    if (dev->read_fifo) {
> +        dev->rfd = open(dev->read_fifo, O_RDONLY);
> +        if (dev->rfd < 0) {
> +            error_report("failed to open read fifo %s", dev->read_fifo);
> +            close(dev->wfd);
> +            return;
> +        }
> +    }
> +
> +    flags |= dev->pio ? KVM_IOREGION_PIO : 0;
> +    flags |= dev->posted_writes ? KVM_IOREGION_POSTED_WRITES : 0;
> +    ioreg.guest_paddr = dev->guest_paddr;
> +    ioreg.memory_size = dev->memory_size;
> +    ioreg.write_fd = dev->wfd;
> +    ioreg.read_fd = dev->rfd;
> +    ioreg.flags = flags;
> +    kvm_vm_ioctl(kvm_state, KVM_SET_IOREGION, &ioreg);

Printing an error message would be useful when this fails (e.g. when the
region overlaps with an existing ioeventfd/ioregionfd). 

> +}
> +
> +static void testdev_unrealizefn(DeviceState *d)
> +{
> +    struct kvm_ioregion ioreg;
> +    PCTestdev *dev = TESTDEV(d);
> +
> +    if (!dev->guest_paddr || !dev->write_fifo) {
> +        return;
> +    }
> +
> +    ioreg.guest_paddr = dev->guest_paddr;
> +    ioreg.memory_size = dev->memory_size;
> +    ioreg.flags = KVM_IOREGION_DEASSIGN;
> +    kvm_vm_ioctl(kvm_state, KVM_SET_IOREGION, &ioreg);
> +    close(dev->wfd);
> +    if (dev->rfd > 0) {
> +        close(dev->rfd);
> +    }
>  }
>  
> +static Property ioregionfd_properties[] = {
> +    DEFINE_PROP_UINT64("addr", PCTestdev, guest_paddr, 0),
> +    DEFINE_PROP_UINT64("size", PCTestdev, memory_size, 0),
> +    DEFINE_PROP_STRING("rfifo", PCTestdev, read_fifo),
> +    DEFINE_PROP_STRING("wfifo", PCTestdev, write_fifo),
> +    DEFINE_PROP_BOOL("pio", PCTestdev, pio, false),
> +    DEFINE_PROP_BOOL("pw", PCTestdev, posted_writes, false),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
>  static void testdev_class_init(ObjectClass *klass, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(klass);
>  
>      set_bit(DEVICE_CATEGORY_MISC, dc->categories);
>      dc->realize = testdev_realizefn;
> +    dc->unrealize = testdev_unrealizefn;
> +    device_class_set_props(dc, ioregionfd_properties);
>  }
>  
>  static const TypeInfo testdev_info = {
> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> index 687c598be9..d68728764a 100644
> --- a/include/sysemu/kvm.h
> +++ b/include/sysemu/kvm.h
> @@ -234,6 +234,8 @@ int kvm_has_intx_set_mask(void);
>  bool kvm_arm_supports_user_irq(void);
>  
>  
> +int kvm_vm_ioctl(KVMState *s, int type, ...);
> +
>  #ifdef NEED_CPU_H
>  #include "cpu.h"
>  
> @@ -257,8 +259,6 @@ void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align, bool shared));
>  
>  int kvm_ioctl(KVMState *s, int type, ...);
>  
> -int kvm_vm_ioctl(KVMState *s, int type, ...);
> -
>  int kvm_vcpu_ioctl(CPUState *cpu, int type, ...);
>  
>  /**
> diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
> index 020b62a619..c426fa1e56 100644
> --- a/linux-headers/linux/kvm.h
> +++ b/linux-headers/linux/kvm.h
> @@ -733,6 +733,29 @@ struct kvm_ioeventfd {
>  	__u8  pad[36];
>  };
>  
> +enum {
> +	kvm_ioregion_flag_nr_pio,
> +	kvm_ioregion_flag_nr_posted_writes,
> +	kvm_ioregion_flag_nr_deassign,
> +	kvm_ioregion_flag_nr_max,
> +};
> +
> +#define KVM_IOREGION_PIO (1 << kvm_ioregion_flag_nr_pio)
> +#define KVM_IOREGION_POSTED_WRITES (1 << kvm_ioregion_flag_nr_posted_writes)
> +#define KVM_IOREGION_DEASSIGN (1 << kvm_ioregion_flag_nr_deassign)
> +
> +#define KVM_IOREGION_VALID_FLAG_MASK ((1 << kvm_ioregion_flag_nr_max) - 1)
> +
> +struct kvm_ioregion {
> +	__u64 guest_paddr; /* guest physical address */
> +	__u64 memory_size; /* bytes */
> +	__u64 user_data;
> +	__s32 read_fd;
> +	__s32 write_fd;
> +	__u32 flags;
> +	__u8  pad[28];
> +};
> +
>  #define KVM_X86_DISABLE_EXITS_MWAIT          (1 << 0)
>  #define KVM_X86_DISABLE_EXITS_HLT            (1 << 1)
>  #define KVM_X86_DISABLE_EXITS_PAUSE          (1 << 2)
> @@ -1311,6 +1334,7 @@ struct kvm_vfio_spapr_tce {
>  					struct kvm_userspace_memory_region)
>  #define KVM_SET_TSS_ADDR          _IO(KVMIO,   0x47)
>  #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO,  0x48, __u64)
> +#define KVM_SET_IOREGION          _IOW(KVMIO,  0x49, struct kvm_ioregion)
>  
>  /* enable ucontrol for s390 */
>  struct kvm_s390_ucas_mapping {
> -- 
> 2.25.1
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

  reply	other threads:[~2021-03-01 17:56 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-01 13:16 [PATCH RFC] hw/misc/pc-testdev: add support for ioregionfd testing Elena Afanasova
2021-03-01 13:16 ` Elena Afanasova
2021-03-01 17:47 ` Stefan Hajnoczi [this message]
2021-03-01 17:47   ` Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YD0ovavIuThCSS5J@stefanha-x1.localdomain \
    --to=stefanha@redhat.com \
    --cc=eafanasova@gmail.com \
    --cc=elena.ufimtseva@oracle.com \
    --cc=jag.raman@oracle.com \
    --cc=kvm@vger.kernel.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.