All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yan Zhao <yan.y.zhao@intel.com>
To: Kirti Wankhede <kwankhede@nvidia.com>
Cc: "Zhengxiao.zx@Alibaba-inc.com" <Zhengxiao.zx@Alibaba-inc.com>,
	"Tian, Kevin" <kevin.tian@intel.com>,
	"Liu, Yi L" <yi.l.liu@intel.com>,
	"cjia@nvidia.com" <cjia@nvidia.com>,
	"eskultet@redhat.com" <eskultet@redhat.com>,
	"Yang, Ziye" <ziye.yang@intel.com>,
	"qemu-devel@nongnu.org" <qemu-devel@nongnu.org>,
	"cohuck@redhat.com" <cohuck@redhat.com>,
	"shuangtai.tst@alibaba-inc.com" <shuangtai.tst@alibaba-inc.com>,
	"dgilbert@redhat.com" <dgilbert@redhat.com>,
	"Wang, Zhi A" <zhi.a.wang@intel.com>,
	"mlevitsk@redhat.com" <mlevitsk@redhat.com>,
	"pasic@linux.ibm.com" <pasic@linux.ibm.com>,
	"aik@ozlabs.ru" <aik@ozlabs.ru>,
	"alex.williamson@redhat.com" <alex.williamson@redhat.com>,
	"eauger@redhat.com" <eauger@redhat.com>,
	"felipe@nutanix.com" <felipe@nutanix.com>,
	"jonathan.davies@nutanix.com" <jonathan.davies@nutanix.com>,
	"Liu, Changpeng" <changpeng.liu@intel.com>,
	"Ken.Xue@amd.com" <Ken.Xue@amd.com>
Subject: Re: [PATCH v9 QEMU 13/15] vfio: Add vfio_listener_log_sync to mark dirty pages
Date: Tue, 12 Nov 2019 20:46:56 -0500	[thread overview]
Message-ID: <20191113014656.GA18001@joy-OptiPlex-7040> (raw)
In-Reply-To: <1573578324-8389-14-git-send-email-kwankhede@nvidia.com>

On Wed, Nov 13, 2019 at 01:05:22AM +0800, Kirti Wankhede wrote:
> vfio_listener_log_sync gets list of dirty pages from container using
> VFIO_IOMMU_GET_DIRTY_BITMAP ioctl and mark those pages dirty when all
> devices are stopped and saving state.
> Return early for the RAM block section of mapped MMIO region.
> 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Reviewed-by: Neo Jia <cjia@nvidia.com>
> ---
>  hw/vfio/common.c     | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/vfio/trace-events |   1 +
>  2 files changed, 104 insertions(+)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index ade9839c28a3..66f1c64bf074 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -29,6 +29,7 @@
>  #include "hw/vfio/vfio.h"
>  #include "exec/address-spaces.h"
>  #include "exec/memory.h"
> +#include "exec/ram_addr.h"
>  #include "hw/hw.h"
>  #include "qemu/error-report.h"
>  #include "qemu/main-loop.h"
> @@ -38,6 +39,7 @@
>  #include "sysemu/reset.h"
>  #include "trace.h"
>  #include "qapi/error.h"
> +#include "migration/migration.h"
>  
>  VFIOGroupList vfio_group_list =
>      QLIST_HEAD_INITIALIZER(vfio_group_list);
> @@ -288,6 +290,28 @@ const MemoryRegionOps vfio_region_ops = {
>  };
>  
>  /*
> + * Device state interfaces
> + */
> +
> +static bool vfio_devices_are_stopped_and_saving(void)
> +{
> +    VFIOGroup *group;
> +    VFIODevice *vbasedev;
> +
> +    QLIST_FOREACH(group, &vfio_group_list, next) {
> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
> +            if ((vbasedev->device_state & VFIO_DEVICE_STATE_SAVING) &&
> +                !(vbasedev->device_state & VFIO_DEVICE_STATE_RUNNING)) {
> +                continue;
> +            } else {
> +                return false;
> +            }
> +        }
> +    }
> +    return true;
> +}
> +
> +/*
>   * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
>   */
>  static int vfio_dma_unmap(VFIOContainer *container,
> @@ -813,9 +837,88 @@ static void vfio_listener_region_del(MemoryListener *listener,
>      }
>  }
>  
> +static int vfio_get_dirty_bitmap(VFIOContainer *container,
> +                                 MemoryRegionSection *section)
> +{
> +    struct vfio_iommu_type1_dirty_bitmap range;
> +    uint64_t bitmap_size;
> +    int ret;
> +
> +    range.argsz = sizeof(range);
> +
> +    if (memory_region_is_iommu(section->mr)) {
> +        VFIOGuestIOMMU *giommu;
> +        IOMMUTLBEntry iotlb;
> +
> +        QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
> +            if (MEMORY_REGION(giommu->iommu) == section->mr &&
> +                giommu->n.start == section->offset_within_region) {
> +                break;
> +            }
> +        }
> +
> +        if (!giommu) {
> +            return -EINVAL;
> +        }
> +
> +        iotlb = address_space_get_iotlb_entry(container->space->as,
> +                       TARGET_PAGE_ALIGN(section->offset_within_address_space),
> +                       true, MEMTXATTRS_UNSPECIFIED);
> +        range.iova = iotlb.iova + giommu->iommu_offset;
> +        range.size = iotlb.addr_mask + 1;
> +    } else {
> +        range.iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> +        range.size = int128_get64(section->size);
> +    }
> +
> +    bitmap_size = BITS_TO_LONGS(range.size >> TARGET_PAGE_BITS) *
> +                                                             sizeof(uint64_t);
> +
> +    range.bitmap = g_try_malloc0(bitmap_size);
> +    if (!range.bitmap) {
> +        error_report("%s: Error allocating bitmap buffer of size 0x%lx",
> +                     __func__, bitmap_size);
> +        return -ENOMEM;
> +    }
> +
> +    range.bitmap_size = bitmap_size;
> +
> +    ret = ioctl(container->fd, VFIO_IOMMU_GET_DIRTY_BITMAP, &range);
> +
From the implementation of ioctl VFIO_IOMMU_GET_DIRTY_BITMAP,
this range.bitmap is indexed by iova, right?
so if viommu is on, why cpu_physical_memory_set_dirty_lebitmap can be 
called directly here without any viommu translation?

> +    if (!ret) {
> +        cpu_physical_memory_set_dirty_lebitmap((uint64_t *)range.bitmap,
> +                       TARGET_PAGE_ALIGN(section->offset_within_address_space),
> +                       bitmap_size >> TARGET_PAGE_BITS);
> +    } else {
> +        error_report("VFIO_IOMMU_GET_DIRTY_BITMAP: %d %d", ret, errno);
> +    }
> +
> +    trace_vfio_get_dirty_bitmap(container->fd, range.iova, range.size,
> +                                bitmap_size);
> +
> +    g_free(range.bitmap);
> +    return ret;
> +}
> +
> +static void vfio_listerner_log_sync(MemoryListener *listener,
> +        MemoryRegionSection *section)
> +{
> +    VFIOContainer *container = container_of(listener, VFIOContainer, listener);
> +
> +    if (memory_region_is_ram_device(section->mr)) {
> +        return;
> +    }
> +
how about for those devices who need to sync dirty bitmap in RUNNING and
SAVING state?
> +    if (vfio_devices_are_stopped_and_saving()) {
> +
> +        vfio_get_dirty_bitmap(container, section);
> +    }
> +}
> +
when viommu is on, the address space registered for this MemoryListener
is from VTDAddressSpace,
in this address space, listener->log_sync(listener, &mrs) would not be
called for lacking of dirty_log_mask.

If listener->log_sync still needs to be called, some special handlings
are required.

>  static const MemoryListener vfio_memory_listener = {
>      .region_add = vfio_listener_region_add,
>      .region_del = vfio_listener_region_del,
> +    .log_sync = vfio_listerner_log_sync,
>  };
>  
Thanks
Yan

>  static void vfio_listener_release(VFIOContainer *container)
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index ac065b559f4e..0dd1f2ffe648 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -160,3 +160,4 @@ vfio_save_complete_precopy(char *name) " (%s)"
>  vfio_load_device_config_state(char *name) " (%s)"
>  vfio_load_state(char *name, uint64_t data) " (%s) data 0x%"PRIx64
>  vfio_load_state_device_data(char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64
> +vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64
> -- 
> 2.7.0
> 


  reply	other threads:[~2019-11-13  1:56 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-12 17:05 [PATCH v9 Qemu 00/15] Add migration support for VFIO devices Kirti Wankhede
2019-11-12 17:05 ` [PATCH v9 QEMU 01/15] vfio: KABI for migration interface for device state Kirti Wankhede
2019-11-12 17:05 ` [PATCH v9 QEMU 02/15] vfio iommu: Add ioctl defination to get dirty pages bitmap Kirti Wankhede
2019-11-12 17:05 ` [PATCH v9 QEMU 03/15] vfio iommu: Add ioctl defination to unmap IOVA and return dirty bitmap Kirti Wankhede
2019-11-12 17:05 ` [PATCH v9 QEMU 04/15] vfio: Add function to unmap VFIO region Kirti Wankhede
2019-11-12 17:05 ` [PATCH v9 QEMU 05/15] vfio: Add vfio_get_object callback to VFIODeviceOps Kirti Wankhede
2019-11-12 17:05 ` [PATCH v9 QEMU 06/15] vfio: Add save and load functions for VFIO PCI devices Kirti Wankhede
2019-11-14  5:00   ` Alex Williamson
2019-11-12 17:05 ` [PATCH v9 QEMU 07/15] vfio: Add migration region initialization and finalize function Kirti Wankhede
2019-11-14  5:01   ` Alex Williamson
2019-11-12 17:05 ` [PATCH v9 QEMU 08/15] vfio: Add VM state change handler to know state of VM Kirti Wankhede
2019-11-14  5:02   ` Alex Williamson
2019-11-12 17:05 ` [PATCH v9 QEMU 09/15] vfio: Add migration state change notifier Kirti Wankhede
2019-11-12 17:05 ` [PATCH v9 QEMU 10/15] vfio: Register SaveVMHandlers for VFIO device Kirti Wankhede
2019-11-14  5:02   ` Alex Williamson
2019-11-12 17:05 ` [PATCH v9 QEMU 11/15] vfio: Add save state functions to SaveVMHandlers Kirti Wankhede
2019-11-14  5:04   ` Alex Williamson
2019-11-12 17:05 ` [PATCH v9 QEMU 12/15] vfio: Add load " Kirti Wankhede
2019-11-14  5:05   ` Alex Williamson
2019-11-20 18:32   ` Dr. David Alan Gilbert
2019-11-12 17:05 ` [PATCH v9 QEMU 13/15] vfio: Add vfio_listener_log_sync to mark dirty pages Kirti Wankhede
2019-11-13  1:46   ` Yan Zhao [this message]
2019-11-14  5:06   ` Alex Williamson
2019-11-12 17:05 ` [PATCH v9 QEMU 14/15] vfio: Add ioctl to get dirty pages bitmap during dma unmap Kirti Wankhede
2019-11-13  4:24   ` Yan Zhao
2019-11-14  5:06   ` Alex Williamson
2019-11-12 17:05 ` [PATCH v9 QEMU 15/15] vfio: Make vfio-pci device migration capable Kirti Wankhede
2019-11-14  5:06   ` Alex Williamson
2019-11-13 10:54 ` [PATCH v9 Qemu 00/15] Add migration support for VFIO devices Cornelia Huck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191113014656.GA18001@joy-OptiPlex-7040 \
    --to=yan.y.zhao@intel.com \
    --cc=Ken.Xue@amd.com \
    --cc=Zhengxiao.zx@Alibaba-inc.com \
    --cc=aik@ozlabs.ru \
    --cc=alex.williamson@redhat.com \
    --cc=changpeng.liu@intel.com \
    --cc=cjia@nvidia.com \
    --cc=cohuck@redhat.com \
    --cc=dgilbert@redhat.com \
    --cc=eauger@redhat.com \
    --cc=eskultet@redhat.com \
    --cc=felipe@nutanix.com \
    --cc=jonathan.davies@nutanix.com \
    --cc=kevin.tian@intel.com \
    --cc=kwankhede@nvidia.com \
    --cc=mlevitsk@redhat.com \
    --cc=pasic@linux.ibm.com \
    --cc=qemu-devel@nongnu.org \
    --cc=shuangtai.tst@alibaba-inc.com \
    --cc=yi.l.liu@intel.com \
    --cc=zhi.a.wang@intel.com \
    --cc=ziye.yang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.