linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive
@ 2016-04-27 12:22 Yongji Xie
  2016-05-03  2:59 ` Tian, Kevin
  0 siblings, 1 reply; 5+ messages in thread
From: Yongji Xie @ 2016-04-27 12:22 UTC (permalink / raw)
  To: linux-kernel, linux-pci, linuxppc-dev, kvm, linux-doc
  Cc: alex.williamson, bhelgaas, aik, benh, paulus, mpe, corbet,
	warrier, zhong, nikunj, gwshan, Yongji Xie

Current vfio-pci implementation disallows to mmap
sub-page(size < PAGE_SIZE) MMIO BARs because these BARs' mmio
page may be shared with other BARs. This will cause some
performance issues when we passthrough a PCI device with
this kind of BARs. Guest will be not able to handle the mmio
accesses to the BARs which leads to mmio emulations in host.

However, not all sub-page BARs will share page with other BARs.
We should allow to mmap those sub-page MMIO BARs which we can
make sure will not share page with other BARs.

This patch adds support for this case. And we also try to use 
shadow resource to reserve the remaind of the page which hot-add 
device's BAR might be assigned into.

Signed-off-by: Yongji Xie <xyjxie@linux.vnet.ibm.com>
---
 drivers/vfio/pci/vfio_pci.c         |   58 ++++++++++++++++++++++++++++++-----
 drivers/vfio/pci/vfio_pci_private.h |    8 +++++
 2 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 98059df..dc1779c 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -110,13 +110,47 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
 	return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
 }
 
+static bool vfio_pci_bar_mmap_supported(struct vfio_pci_device *vdev, int index)
+{
+	struct resource *res = vdev->pdev->resource + index;
+	struct vfio_pci_shadow_resource *shadow_res;
+
+	if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) && res->flags & IORESOURCE_MEM &&
+		resource_size(res) > 0) {
+		if (resource_size(res) >= PAGE_SIZE)
+			return true;
+
+		if (!(res->start & ~PAGE_MASK)) {
+			/*
+			 * Add shadow resource for sub-page bar whose mmio
+			 * page is exclusive in case that hot-add device's
+			 * bar is assigned into the mem hole.
+			 */
+			shadow_res = kzalloc(sizeof(*shadow_res), GFP_KERNEL);
+			shadow_res->resource.start = res->end + 1;
+			shadow_res->resource.end = res->start + PAGE_SIZE - 1;
+			shadow_res->resource.flags = res->flags;
+			if (request_resource(res->parent,
+					&shadow_res->resource)) {
+				kfree(shadow_res);
+				return false;
+			}
+			shadow_res->index = index;
+			list_add(&shadow_res->res_next,
+					&vdev->shadow_resources_list);
+			return true;
+		}
+	}
+	return false;
+}
+
 static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
 static void vfio_pci_disable(struct vfio_pci_device *vdev);
 
 static int vfio_pci_enable(struct vfio_pci_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
-	int ret;
+	int ret, bar;
 	u16 cmd;
 	u8 msix_pos;
 
@@ -183,12 +217,17 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
 		}
 	}
 
+	for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) {
+		vdev->bar_mmap_supported[bar] =
+				vfio_pci_bar_mmap_supported(vdev, bar);
+	}
 	return 0;
 }
 
 static void vfio_pci_disable(struct vfio_pci_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
+	struct vfio_pci_shadow_resource *shadow_res, *tmp;
 	int i, bar;
 
 	/* Stop the device from further DMA */
@@ -217,6 +256,13 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
 		vdev->barmap[bar] = NULL;
 	}
 
+	list_for_each_entry_safe(shadow_res, tmp,
+				 &vdev->shadow_resources_list, res_next) {
+		list_del(&shadow_res->res_next);
+		release_resource(&shadow_res->resource);
+		kfree(shadow_res);
+	}
+
 	vdev->needs_reset = true;
 
 	/*
@@ -587,9 +633,7 @@ static long vfio_pci_ioctl(void *device_data,
 
 			info.flags = VFIO_REGION_INFO_FLAG_READ |
 				     VFIO_REGION_INFO_FLAG_WRITE;
-			if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) &&
-			    pci_resource_flags(pdev, info.index) &
-			    IORESOURCE_MEM && info.size >= PAGE_SIZE) {
+			if (vdev->bar_mmap_supported[info.index]) {
 				info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
 				if (info.index == vdev->msix_bar) {
 					ret = msix_sparse_mmap_cap(vdev, &caps);
@@ -1011,16 +1055,16 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
 		return -EINVAL;
 	if (index >= VFIO_PCI_ROM_REGION_INDEX)
 		return -EINVAL;
-	if (!(pci_resource_flags(pdev, index) & IORESOURCE_MEM))
+	if (!vdev->bar_mmap_supported[index])
 		return -EINVAL;
 
-	phys_len = pci_resource_len(pdev, index);
+	phys_len = PAGE_ALIGN(pci_resource_len(pdev, index));
 	req_len = vma->vm_end - vma->vm_start;
 	pgoff = vma->vm_pgoff &
 		((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
 	req_start = pgoff << PAGE_SHIFT;
 
-	if (phys_len < PAGE_SIZE || req_start + req_len > phys_len)
+	if (req_start + req_len > phys_len)
 		return -EINVAL;
 
 	if (index == vdev->msix_bar) {
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 8a7d546..0ea4c62 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -57,9 +57,16 @@ struct vfio_pci_region {
 	u32				flags;
 };
 
+struct vfio_pci_shadow_resource {
+	struct resource		resource;
+	int			index;
+	struct list_head	res_next;
+};
+
 struct vfio_pci_device {
 	struct pci_dev		*pdev;
 	void __iomem		*barmap[PCI_STD_RESOURCE_END + 1];
+	bool			bar_mmap_supported[PCI_STD_RESOURCE_END + 1];
 	u8			*pci_config_map;
 	u8			*vconfig;
 	struct perm_bits	*msi_perm;
@@ -87,6 +94,7 @@ struct vfio_pci_device {
 	int			refcnt;
 	struct eventfd_ctx	*err_trigger;
 	struct eventfd_ctx	*req_trigger;
+	struct list_head	shadow_resources_list;
 };
 
 #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* RE: [PATCH] vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive
  2016-04-27 12:22 [PATCH] vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive Yongji Xie
@ 2016-05-03  2:59 ` Tian, Kevin
  2016-05-03  5:52   ` Yongji Xie
  0 siblings, 1 reply; 5+ messages in thread
From: Tian, Kevin @ 2016-05-03  2:59 UTC (permalink / raw)
  To: Yongji Xie, linux-kernel, linux-pci, linuxppc-dev, kvm, linux-doc
  Cc: alex.williamson, bhelgaas, aik, benh, paulus, mpe, corbet,
	warrier, zhong, nikunj, gwshan

> From: Yongji Xie
> Sent: Wednesday, April 27, 2016 8:22 PM
> 
> Current vfio-pci implementation disallows to mmap
> sub-page(size < PAGE_SIZE) MMIO BARs because these BARs' mmio
> page may be shared with other BARs. This will cause some
> performance issues when we passthrough a PCI device with
> this kind of BARs. Guest will be not able to handle the mmio
> accesses to the BARs which leads to mmio emulations in host.
> 
> However, not all sub-page BARs will share page with other BARs.
> We should allow to mmap those sub-page MMIO BARs which we can
> make sure will not share page with other BARs.
> 
> This patch adds support for this case. And we also try to use
> shadow resource to reserve the remaind of the page which hot-add
> device's BAR might be assigned into.

'shadow' usually means you have a corresponding part being
shadowed, while here looks you mostly want some 'dummy'
resource for reservation purpose?

> +
> +		if (!(res->start & ~PAGE_MASK)) {
> +			/*
> +			 * Add shadow resource for sub-page bar whose mmio
> +			 * page is exclusive in case that hot-add device's
> +			 * bar is assigned into the mem hole.
> +			 */
> +			shadow_res = kzalloc(sizeof(*shadow_res), GFP_KERNEL);
> +			shadow_res->resource.start = res->end + 1;
> +			shadow_res->resource.end = res->start + PAGE_SIZE - 1;

What about res->start not page aligned so you end up still having 
a portion before res->start not exclusively reserved?

> +			shadow_res->resource.flags = res->flags;
> +			if (request_resource(res->parent,
> +					&shadow_res->resource)) {
> +				kfree(shadow_res);
> +				return false;
> +			}
> +			shadow_res->index = index;
> +			list_add(&shadow_res->res_next,
> +					&vdev->shadow_resources_list);
> +			return true;

Thanks
Kevin

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive
  2016-05-03  2:59 ` Tian, Kevin
@ 2016-05-03  5:52   ` Yongji Xie
  2016-05-03  6:11     ` Tian, Kevin
  0 siblings, 1 reply; 5+ messages in thread
From: Yongji Xie @ 2016-05-03  5:52 UTC (permalink / raw)
  To: Tian, Kevin, linux-kernel, linux-pci, linuxppc-dev, kvm, linux-doc
  Cc: alex.williamson, bhelgaas, aik, benh, paulus, mpe, corbet,
	warrier, zhong, nikunj, gwshan

On 2016/5/3 10:59, Tian, Kevin wrote:

>> From: Yongji Xie
>> Sent: Wednesday, April 27, 2016 8:22 PM
>>
>> Current vfio-pci implementation disallows to mmap
>> sub-page(size < PAGE_SIZE) MMIO BARs because these BARs' mmio
>> page may be shared with other BARs. This will cause some
>> performance issues when we passthrough a PCI device with
>> this kind of BARs. Guest will be not able to handle the mmio
>> accesses to the BARs which leads to mmio emulations in host.
>>
>> However, not all sub-page BARs will share page with other BARs.
>> We should allow to mmap those sub-page MMIO BARs which we can
>> make sure will not share page with other BARs.
>>
>> This patch adds support for this case. And we also try to use
>> shadow resource to reserve the remaind of the page which hot-add
>> device's BAR might be assigned into.
> 'shadow' usually means you have a corresponding part being
> shadowed, while here looks you mostly want some 'dummy'
> resource for reservation purpose?

Yes., 'dummy' may be better here. And I would also replace
shadow_res/shadow_resources_list with reserved_res/reserved_resources_list.

>> +
>> +		if (!(res->start & ~PAGE_MASK)) {
>> +			/*
>> +			 * Add shadow resource for sub-page bar whose mmio
>> +			 * page is exclusive in case that hot-add device's
>> +			 * bar is assigned into the mem hole.
>> +			 */
>> +			shadow_res = kzalloc(sizeof(*shadow_res), GFP_KERNEL);
>> +			shadow_res->resource.start = res->end + 1;
>> +			shadow_res->resource.end = res->start + PAGE_SIZE - 1;
> What about res->start not page aligned so you end up still having
> a portion before res->start not exclusively reserved?

Do you mean add a 'dummy' resource to reserve the portion
before res->start if res->start not page aligned?

But would it happen that there is a mem hole in the portion
before res->start? The resource should have been assigned
into the hole at the beginning.

Thanks,
Yongji

>> +			shadow_res->resource.flags = res->flags;
>> +			if (request_resource(res->parent,
>> +					&shadow_res->resource)) {
>> +				kfree(shadow_res);
>> +				return false;
>> +			}
>> +			shadow_res->index = index;
>> +			list_add(&shadow_res->res_next,
>> +					&vdev->shadow_resources_list);
>> +			return true;
> Thanks
> Kevin
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH] vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive
  2016-05-03  5:52   ` Yongji Xie
@ 2016-05-03  6:11     ` Tian, Kevin
  2016-05-03  7:36       ` Yongji Xie
  0 siblings, 1 reply; 5+ messages in thread
From: Tian, Kevin @ 2016-05-03  6:11 UTC (permalink / raw)
  To: Yongji Xie, linux-kernel, linux-pci, linuxppc-dev, kvm, linux-doc
  Cc: alex.williamson, bhelgaas, aik, benh, paulus, mpe, corbet,
	warrier, zhong, nikunj, gwshan

> From: Yongji Xie [mailto:xyjxie@linux.vnet.ibm.com]
> Sent: Tuesday, May 03, 2016 1:52 PM
> 
> >> +
> >> +		if (!(res->start & ~PAGE_MASK)) {
> >> +			/*
> >> +			 * Add shadow resource for sub-page bar whose mmio
> >> +			 * page is exclusive in case that hot-add device's
> >> +			 * bar is assigned into the mem hole.
> >> +			 */
> >> +			shadow_res = kzalloc(sizeof(*shadow_res), GFP_KERNEL);
> >> +			shadow_res->resource.start = res->end + 1;
> >> +			shadow_res->resource.end = res->start + PAGE_SIZE - 1;
> > What about res->start not page aligned so you end up still having
> > a portion before res->start not exclusively reserved?
> 
> Do you mean add a 'dummy' resource to reserve the portion
> before res->start if res->start not page aligned?
> 
> But would it happen that there is a mem hole in the portion
> before res->start? The resource should have been assigned
> into the hole at the beginning.
> 

Just a quick thought. Another device might occupy that range 
before initializing this device, and then 'another device' is hot
removed later... 

Thanks
Kevin

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive
  2016-05-03  6:11     ` Tian, Kevin
@ 2016-05-03  7:36       ` Yongji Xie
  0 siblings, 0 replies; 5+ messages in thread
From: Yongji Xie @ 2016-05-03  7:36 UTC (permalink / raw)
  To: Tian, Kevin, linux-kernel, linux-pci, linuxppc-dev, kvm, linux-doc
  Cc: alex.williamson, bhelgaas, aik, benh, paulus, mpe, corbet,
	warrier, zhong, nikunj, gwshan

On 2016/5/3 14:11, Tian, Kevin wrote:

>> From: Yongji Xie [mailto:xyjxie@linux.vnet.ibm.com]
>> Sent: Tuesday, May 03, 2016 1:52 PM
>>
>>>> +
>>>> +		if (!(res->start & ~PAGE_MASK)) {
>>>> +			/*
>>>> +			 * Add shadow resource for sub-page bar whose mmio
>>>> +			 * page is exclusive in case that hot-add device's
>>>> +			 * bar is assigned into the mem hole.
>>>> +			 */
>>>> +			shadow_res = kzalloc(sizeof(*shadow_res), GFP_KERNEL);
>>>> +			shadow_res->resource.start = res->end + 1;
>>>> +			shadow_res->resource.end = res->start + PAGE_SIZE - 1;
>>> What about res->start not page aligned so you end up still having
>>> a portion before res->start not exclusively reserved?
>> Do you mean add a 'dummy' resource to reserve the portion
>> before res->start if res->start not page aligned?
>>
>> But would it happen that there is a mem hole in the portion
>> before res->start? The resource should have been assigned
>> into the hole at the beginning.
>>
> Just a quick thought. Another device might occupy that range
> before initializing this device, and then 'another device' is hot
> removed later...
>
> Thanks
> Kevin

That's a good point! I will add support for this case in v2.

Thanks,
Yongji

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2016-05-03  7:36 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-27 12:22 [PATCH] vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive Yongji Xie
2016-05-03  2:59 ` Tian, Kevin
2016-05-03  5:52   ` Yongji Xie
2016-05-03  6:11     ` Tian, Kevin
2016-05-03  7:36       ` Yongji Xie

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).