All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kirti Wankhede <kwankhede@nvidia.com>
To: <alex.williamson@redhat.com>, <cjia@nvidia.com>
Cc: <kevin.tian@intel.com>, <ziye.yang@intel.com>,
	<changpeng.liu@intel.com>, <yi.l.liu@intel.com>,
	<mlevitsk@redhat.com>, <eskultet@redhat.com>, <cohuck@redhat.com>,
	<dgilbert@redhat.com>, <jonathan.davies@nutanix.com>,
	<eauger@redhat.com>, <aik@ozlabs.ru>, <pasic@linux.ibm.com>,
	<felipe@nutanix.com>, <Zhengxiao.zx@Alibaba-inc.com>,
	<shuangtai.tst@alibaba-inc.com>, <Ken.Xue@amd.com>,
	<zhi.a.wang@intel.com>, <yan.y.zhao@intel.com>,
	<qemu-devel@nongnu.org>, <kvm@vger.kernel.org>,
	"Kirti Wankhede" <kwankhede@nvidia.com>
Subject: [PATCH v11 Kernel 6/6] vfio: Selective dirty page tracking if IOMMU backed device pins pages
Date: Tue, 17 Dec 2019 22:40:51 +0530	[thread overview]
Message-ID: <1576602651-15430-7-git-send-email-kwankhede@nvidia.com> (raw)
In-Reply-To: <1576602651-15430-1-git-send-email-kwankhede@nvidia.com>

Track dirty pages reporting capability for each vfio_device by setting the
capability flag on calling vfio_pin_pages() for that device.

In vfio_iommu_type1 module, while creating dirty pages bitmap, check if
IOMMU backed device is present in the container. If IOMMU backed device is
present in container then check dirty pages reporting capability for each
vfio device in the container. If all vfio devices are capable of reporing
dirty pages tracking by pinning pages through external API, then report
create bitmap of pinned pages only. If IOMMU backed device is present in
the container and any one device is not able to report dirty pages, then
marked all pages as dirty.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Neo Jia <cjia@nvidia.com>
---
 drivers/vfio/vfio.c             | 33 +++++++++++++++++++++++++++++++
 drivers/vfio/vfio_iommu_type1.c | 44 +++++++++++++++++++++++++++++++++++++++--
 include/linux/vfio.h            |  3 ++-
 3 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index c8482624ca34..9d2fbe09768a 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -96,6 +96,8 @@ struct vfio_device {
 	struct vfio_group		*group;
 	struct list_head		group_next;
 	void				*device_data;
+	/* dirty pages reporting capable */
+	bool				dirty_pages_cap;
 };
 
 #ifdef CONFIG_VFIO_NOIOMMU
@@ -1866,6 +1868,29 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
 }
 EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
 
+int vfio_device_is_dirty_reporting_capable(struct device *dev, bool *cap)
+{
+	struct vfio_device *device;
+	struct vfio_group *group;
+
+	if (!dev || !cap)
+		return -EINVAL;
+
+	group = vfio_group_get_from_dev(dev);
+	if (!group)
+		return -ENODEV;
+
+	device = vfio_group_get_device(group, dev);
+	if (!device)
+		return -ENODEV;
+
+	*cap = device->dirty_pages_cap;
+	vfio_device_put(device);
+	vfio_group_put(group);
+	return 0;
+}
+EXPORT_SYMBOL(vfio_device_is_dirty_reporting_capable);
+
 /*
  * Pin a set of guest PFNs and return their associated host PFNs for local
  * domain only.
@@ -1907,6 +1932,14 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
 	else
 		ret = -ENOTTY;
 
+	if (ret > 0) {
+		struct vfio_device *device = vfio_group_get_device(group, dev);
+
+		if (device) {
+			device->dirty_pages_cap = true;
+			vfio_device_put(device);
+		}
+	}
 	vfio_group_try_dissolve_container(group);
 
 err_pin_pages:
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 68d8ed3b2665..ef56f31f4e73 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -891,6 +891,39 @@ static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
 	return bitmap;
 }
 
+static int vfio_is_dirty_pages_reporting_capable(struct device *dev, void *data)
+{
+	bool new;
+	int ret;
+
+	ret = vfio_device_is_dirty_reporting_capable(dev, &new);
+	if (ret)
+		return ret;
+
+	*(bool *)data = *(bool *)data && new;
+
+	return 0;
+}
+
+static bool vfio_dirty_pages_reporting_capable(struct vfio_iommu *iommu)
+{
+	struct vfio_domain *d;
+	struct vfio_group *g;
+	bool capable = true;
+	int ret;
+
+	list_for_each_entry(d, &iommu->domain_list, next) {
+		list_for_each_entry(g, &d->group_list, next) {
+			ret = iommu_group_for_each_dev(g->iommu_group, &capable,
+					vfio_is_dirty_pages_reporting_capable);
+			if (ret)
+				return false;
+		}
+	}
+
+	return capable;
+}
+
 /*
  * start_iova is the reference from where bitmaping started. This is called
  * from DMA_UNMAP where start_iova can be different than iova
@@ -903,10 +936,17 @@ static void vfio_iova_dirty_bitmap(struct vfio_iommu *iommu, dma_addr_t iova,
 	struct vfio_dma *dma;
 	dma_addr_t i = iova;
 	unsigned long pgshift = __ffs(pgsize);
+	bool dirty_report_cap = true;
+
+	if (IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu))
+		dirty_report_cap = vfio_dirty_pages_reporting_capable(iommu);
 
 	while ((dma = vfio_find_dma(iommu, i, pgsize))) {
-		/* mark all pages dirty if all pages are pinned and mapped. */
-		if (dma->iommu_mapped) {
+		/*
+		 * mark all pages dirty if any IOMMU capable device is not able
+		 * to report dirty pages and all pages are pinned and mapped.
+		 */
+		if (!dirty_report_cap && dma->iommu_mapped) {
 			dma_addr_t iova_limit;
 
 			iova_limit = (dma->iova + dma->size) < (iova + size) ?
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index e42a711a2800..ed3832ea10a1 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -148,7 +148,8 @@ extern int vfio_info_add_capability(struct vfio_info_cap *caps,
 extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
 					      int num_irqs, int max_irq_type,
 					      size_t *data_size);
-
+extern int vfio_device_is_dirty_reporting_capable(struct device *dev,
+						  bool *cap);
 struct pci_dev;
 #if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH)
 extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
-- 
2.7.0


WARNING: multiple messages have this Message-ID (diff)
From: Kirti Wankhede <kwankhede@nvidia.com>
To: <alex.williamson@redhat.com>, <cjia@nvidia.com>
Cc: Zhengxiao.zx@Alibaba-inc.com, kevin.tian@intel.com,
	yi.l.liu@intel.com, yan.y.zhao@intel.com, kvm@vger.kernel.org,
	eskultet@redhat.com, ziye.yang@intel.com, qemu-devel@nongnu.org,
	cohuck@redhat.com, shuangtai.tst@alibaba-inc.com,
	dgilbert@redhat.com, zhi.a.wang@intel.com, mlevitsk@redhat.com,
	pasic@linux.ibm.com, aik@ozlabs.ru,
	Kirti Wankhede <kwankhede@nvidia.com>,
	eauger@redhat.com, felipe@nutanix.com,
	jonathan.davies@nutanix.com, changpeng.liu@intel.com,
	Ken.Xue@amd.com
Subject: [PATCH v11 Kernel 6/6] vfio: Selective dirty page tracking if IOMMU backed device pins pages
Date: Tue, 17 Dec 2019 22:40:51 +0530	[thread overview]
Message-ID: <1576602651-15430-7-git-send-email-kwankhede@nvidia.com> (raw)
In-Reply-To: <1576602651-15430-1-git-send-email-kwankhede@nvidia.com>

Track dirty pages reporting capability for each vfio_device by setting the
capability flag on calling vfio_pin_pages() for that device.

In vfio_iommu_type1 module, while creating dirty pages bitmap, check if
IOMMU backed device is present in the container. If IOMMU backed device is
present in container then check dirty pages reporting capability for each
vfio device in the container. If all vfio devices are capable of reporing
dirty pages tracking by pinning pages through external API, then report
create bitmap of pinned pages only. If IOMMU backed device is present in
the container and any one device is not able to report dirty pages, then
marked all pages as dirty.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Neo Jia <cjia@nvidia.com>
---
 drivers/vfio/vfio.c             | 33 +++++++++++++++++++++++++++++++
 drivers/vfio/vfio_iommu_type1.c | 44 +++++++++++++++++++++++++++++++++++++++--
 include/linux/vfio.h            |  3 ++-
 3 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index c8482624ca34..9d2fbe09768a 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -96,6 +96,8 @@ struct vfio_device {
 	struct vfio_group		*group;
 	struct list_head		group_next;
 	void				*device_data;
+	/* dirty pages reporting capable */
+	bool				dirty_pages_cap;
 };
 
 #ifdef CONFIG_VFIO_NOIOMMU
@@ -1866,6 +1868,29 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
 }
 EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
 
+int vfio_device_is_dirty_reporting_capable(struct device *dev, bool *cap)
+{
+	struct vfio_device *device;
+	struct vfio_group *group;
+
+	if (!dev || !cap)
+		return -EINVAL;
+
+	group = vfio_group_get_from_dev(dev);
+	if (!group)
+		return -ENODEV;
+
+	device = vfio_group_get_device(group, dev);
+	if (!device)
+		return -ENODEV;
+
+	*cap = device->dirty_pages_cap;
+	vfio_device_put(device);
+	vfio_group_put(group);
+	return 0;
+}
+EXPORT_SYMBOL(vfio_device_is_dirty_reporting_capable);
+
 /*
  * Pin a set of guest PFNs and return their associated host PFNs for local
  * domain only.
@@ -1907,6 +1932,14 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
 	else
 		ret = -ENOTTY;
 
+	if (ret > 0) {
+		struct vfio_device *device = vfio_group_get_device(group, dev);
+
+		if (device) {
+			device->dirty_pages_cap = true;
+			vfio_device_put(device);
+		}
+	}
 	vfio_group_try_dissolve_container(group);
 
 err_pin_pages:
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 68d8ed3b2665..ef56f31f4e73 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -891,6 +891,39 @@ static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
 	return bitmap;
 }
 
+static int vfio_is_dirty_pages_reporting_capable(struct device *dev, void *data)
+{
+	bool new;
+	int ret;
+
+	ret = vfio_device_is_dirty_reporting_capable(dev, &new);
+	if (ret)
+		return ret;
+
+	*(bool *)data = *(bool *)data && new;
+
+	return 0;
+}
+
+static bool vfio_dirty_pages_reporting_capable(struct vfio_iommu *iommu)
+{
+	struct vfio_domain *d;
+	struct vfio_group *g;
+	bool capable = true;
+	int ret;
+
+	list_for_each_entry(d, &iommu->domain_list, next) {
+		list_for_each_entry(g, &d->group_list, next) {
+			ret = iommu_group_for_each_dev(g->iommu_group, &capable,
+					vfio_is_dirty_pages_reporting_capable);
+			if (ret)
+				return false;
+		}
+	}
+
+	return capable;
+}
+
 /*
  * start_iova is the reference from where bitmaping started. This is called
  * from DMA_UNMAP where start_iova can be different than iova
@@ -903,10 +936,17 @@ static void vfio_iova_dirty_bitmap(struct vfio_iommu *iommu, dma_addr_t iova,
 	struct vfio_dma *dma;
 	dma_addr_t i = iova;
 	unsigned long pgshift = __ffs(pgsize);
+	bool dirty_report_cap = true;
+
+	if (IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu))
+		dirty_report_cap = vfio_dirty_pages_reporting_capable(iommu);
 
 	while ((dma = vfio_find_dma(iommu, i, pgsize))) {
-		/* mark all pages dirty if all pages are pinned and mapped. */
-		if (dma->iommu_mapped) {
+		/*
+		 * mark all pages dirty if any IOMMU capable device is not able
+		 * to report dirty pages and all pages are pinned and mapped.
+		 */
+		if (!dirty_report_cap && dma->iommu_mapped) {
 			dma_addr_t iova_limit;
 
 			iova_limit = (dma->iova + dma->size) < (iova + size) ?
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index e42a711a2800..ed3832ea10a1 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -148,7 +148,8 @@ extern int vfio_info_add_capability(struct vfio_info_cap *caps,
 extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
 					      int num_irqs, int max_irq_type,
 					      size_t *data_size);
-
+extern int vfio_device_is_dirty_reporting_capable(struct device *dev,
+						  bool *cap);
 struct pci_dev;
 #if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH)
 extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
-- 
2.7.0



  parent reply	other threads:[~2019-12-17 17:40 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-17 17:10 [PATCH v11 Kernel 0/6] KABIs to support migration for VFIO devices Kirti Wankhede
2019-12-17 17:10 ` Kirti Wankhede
2019-12-17 17:10 ` [PATCH v11 Kernel 1/6] vfio: KABI for migration interface for device state Kirti Wankhede
2019-12-17 17:10   ` Kirti Wankhede
2019-12-17 17:10 ` [PATCH v11 Kernel 2/6] vfio iommu: Add ioctl definition for dirty pages tracking Kirti Wankhede
2019-12-17 17:10   ` Kirti Wankhede
2019-12-17 17:10 ` [PATCH v11 Kernel 3/6] vfio iommu: Implementation of ioctl to " Kirti Wankhede
2019-12-17 17:10   ` Kirti Wankhede
2019-12-17 22:12   ` Alex Williamson
2019-12-17 22:12     ` Alex Williamson
2020-01-07 20:07     ` Kirti Wankhede
2020-01-07 20:07       ` Kirti Wankhede
2020-01-07 22:02       ` Alex Williamson
2020-01-07 22:02         ` Alex Williamson
2020-01-08 20:01         ` Kirti Wankhede
2020-01-08 20:01           ` Kirti Wankhede
2020-01-08 22:29           ` Alex Williamson
2020-01-08 22:29             ` Alex Williamson
2020-01-09 13:29             ` Kirti Wankhede
2020-01-09 13:29               ` Kirti Wankhede
2020-01-09 14:53               ` Alex Williamson
2020-01-09 14:53                 ` Alex Williamson
2019-12-17 17:10 ` [PATCH v11 Kernel 4/6] vfio iommu: Update UNMAP_DMA ioctl to get dirty bitmap before unmap Kirti Wankhede
2019-12-17 17:10   ` Kirti Wankhede
2019-12-17 22:55   ` Alex Williamson
2019-12-17 22:55     ` Alex Williamson
2019-12-17 17:10 ` [PATCH v11 Kernel 5/6] vfio iommu: Adds flag to indicate dirty pages tracking capability support Kirti Wankhede
2019-12-17 17:10   ` Kirti Wankhede
2019-12-17 17:10 ` Kirti Wankhede [this message]
2019-12-17 17:10   ` [PATCH v11 Kernel 6/6] vfio: Selective dirty page tracking if IOMMU backed device pins pages Kirti Wankhede
2019-12-18  0:12   ` Alex Williamson
2019-12-18  0:12     ` Alex Williamson
2020-01-07 20:45     ` Kirti Wankhede
2020-01-07 20:45       ` Kirti Wankhede
2020-01-08  0:09       ` Alex Williamson
2020-01-08  0:09         ` Alex Williamson
2020-01-08 20:52         ` Kirti Wankhede
2020-01-08 20:52           ` Kirti Wankhede
2020-01-08 22:59           ` Alex Williamson
2020-01-08 22:59             ` Alex Williamson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1576602651-15430-7-git-send-email-kwankhede@nvidia.com \
    --to=kwankhede@nvidia.com \
    --cc=Ken.Xue@amd.com \
    --cc=Zhengxiao.zx@Alibaba-inc.com \
    --cc=aik@ozlabs.ru \
    --cc=alex.williamson@redhat.com \
    --cc=changpeng.liu@intel.com \
    --cc=cjia@nvidia.com \
    --cc=cohuck@redhat.com \
    --cc=dgilbert@redhat.com \
    --cc=eauger@redhat.com \
    --cc=eskultet@redhat.com \
    --cc=felipe@nutanix.com \
    --cc=jonathan.davies@nutanix.com \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=mlevitsk@redhat.com \
    --cc=pasic@linux.ibm.com \
    --cc=qemu-devel@nongnu.org \
    --cc=shuangtai.tst@alibaba-inc.com \
    --cc=yan.y.zhao@intel.com \
    --cc=yi.l.liu@intel.com \
    --cc=zhi.a.wang@intel.com \
    --cc=ziye.yang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.