linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Alex Williamson <alex.williamson@redhat.com>
To: alex.williamson@redhat.com
Cc: Jason Gunthorpe <jgg@nvidia.com>,
	linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
	jgg@nvidia.com, peterx@redhat.com
Subject: [PATCH 7/7] vfio/pci: Remove map-on-fault behavior
Date: Thu, 05 Aug 2021 11:08:21 -0600	[thread overview]
Message-ID: <162818330190.1511194.10498114924408843888.stgit@omen> (raw)
In-Reply-To: <162818167535.1511194.6614962507750594786.stgit@omen>

With vfio_device_io_remap_mapping_range() we can repopulate vmas with
device mappings around manipulation of the device rather than waiting
for an access.  This allows us to go back to the more standard use
case of io_remap_pfn_range() for device memory while still preventing
access to device memory through mmaps when the device is disabled.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/vfio_pci.c         |   80 +++++++++++++++++------------------
 drivers/vfio/pci/vfio_pci_config.c  |    8 ++--
 drivers/vfio/pci/vfio_pci_private.h |    3 +
 3 files changed, 45 insertions(+), 46 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 7a9f67cfc0a2..196b8002447b 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -447,6 +447,7 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
 		kfree(dummy_res);
 	}
 
+	vdev->zapped_bars = false;
 	vdev->needs_reset = true;
 
 	/*
@@ -1057,7 +1058,7 @@ static long vfio_pci_ioctl(struct vfio_device *core_vdev,
 
 		vfio_pci_zap_and_down_write_memory_lock(vdev);
 		ret = pci_try_reset_function(vdev->pdev);
-		up_write(&vdev->memory_lock);
+		vfio_pci_test_and_up_write_memory_lock(vdev);
 
 		return ret;
 
@@ -1256,7 +1257,7 @@ static long vfio_pci_ioctl(struct vfio_device *core_vdev,
 		for (i = 0; i < devs.cur_index; i++) {
 			struct vfio_pci_device *tmp = devs.devices[i];
 
-			up_write(&tmp->memory_lock);
+			vfio_pci_test_and_up_write_memory_lock(tmp);
 			vfio_device_put(&tmp->vdev);
 		}
 		kfree(devs.devices);
@@ -1413,6 +1414,14 @@ static void vfio_pci_zap_bars(struct vfio_pci_device *vdev)
 			VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_BAR0_REGION_INDEX),
 			VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_ROM_REGION_INDEX) -
 			VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_BAR0_REGION_INDEX));
+
+	/*
+	 * Modified under memory_lock write semaphore.  Device handoff
+	 * with memory enabled, therefore any disable will zap and setup
+	 * a remap when re-enabled.  io_remap_pfn_range() is not forgiving
+	 * of duplicate mappings so we must track.
+	 */
+	vdev->zapped_bars = true;
 }
 
 void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device *vdev)
@@ -1421,6 +1430,18 @@ void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device *vdev)
 	vfio_pci_zap_bars(vdev);
 }
 
+void vfio_pci_test_and_up_write_memory_lock(struct vfio_pci_device *vdev)
+{
+	if (vdev->zapped_bars && __vfio_pci_memory_enabled(vdev)) {
+		WARN_ON(vfio_device_io_remap_mapping_range(&vdev->vdev,
+			VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_BAR0_REGION_INDEX),
+			VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_ROM_REGION_INDEX) -
+			VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_BAR0_REGION_INDEX)));
+		vdev->zapped_bars = false;
+	}
+	up_write(&vdev->memory_lock);
+}
+
 u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev)
 {
 	u16 cmd;
@@ -1464,39 +1485,6 @@ static int vfio_pci_bar_vma_to_pfn(struct vfio_device *core_vdev,
 	return 0;
 }
 
-static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
-{
-	struct vm_area_struct *vma = vmf->vma;
-	struct vfio_pci_device *vdev = vma->vm_private_data;
-	unsigned long vaddr, pfn;
-	vm_fault_t ret = VM_FAULT_SIGBUS;
-
-	if (vfio_pci_bar_vma_to_pfn(&vdev->vdev, vma, &pfn))
-		return ret;
-
-	down_read(&vdev->memory_lock);
-
-	if (__vfio_pci_memory_enabled(vdev)) {
-		for (vaddr = vma->vm_start;
-		     vaddr < vma->vm_end; vaddr += PAGE_SIZE, pfn++) {
-			ret = vmf_insert_pfn(vma, vaddr, pfn);
-			if (ret != VM_FAULT_NOPAGE) {
-				zap_vma_ptes(vma, vma->vm_start,
-					     vaddr - vma->vm_start);
-				break;
-			}
-		}
-	}
-
-	up_read(&vdev->memory_lock);
-
-	return ret;
-}
-
-static const struct vm_operations_struct vfio_pci_mmap_ops = {
-	.fault = vfio_pci_mmap_fault,
-};
-
 static int vfio_pci_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
 {
 	struct vfio_pci_device *vdev =
@@ -1504,6 +1492,7 @@ static int vfio_pci_mmap(struct vfio_device *core_vdev, struct vm_area_struct *v
 	struct pci_dev *pdev = vdev->pdev;
 	unsigned int index;
 	u64 phys_len, req_len, pgoff, req_start;
+	unsigned long pfn;
 	int ret;
 
 	index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
@@ -1554,18 +1543,25 @@ static int vfio_pci_mmap(struct vfio_device *core_vdev, struct vm_area_struct *v
 		}
 	}
 
-	vma->vm_private_data = vdev;
+	ret = vfio_pci_bar_vma_to_pfn(core_vdev, vma, &pfn);
+	if (ret)
+		return ret;
+
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
 
+	down_read(&vdev->memory_lock);
 	/*
-	 * See remap_pfn_range(), called from vfio_pci_fault() but we can't
-	 * change vm_flags within the fault handler.  Set them now.
+	 * Only perform the mapping now if BAR is not in zapped state, VFs
+	 * always report memory enabled so relying on device enable state
+	 * could lead to duplicate remaps.
 	 */
-	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
-	vma->vm_ops = &vfio_pci_mmap_ops;
+	if (!vdev->zapped_bars)
+		ret = io_remap_pfn_range(vma, vma->vm_start, pfn,
+					 vma->vm_end - vma->vm_start,
+					 vma->vm_page_prot);
+	up_read(&vdev->memory_lock);
 
-	return 0;
+	return ret;
 }
 
 static void vfio_pci_request(struct vfio_device *core_vdev, unsigned int count)
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 70e28efbc51f..4220057b253c 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -605,7 +605,7 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos,
 	count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
 	if (count < 0) {
 		if (offset == PCI_COMMAND)
-			up_write(&vdev->memory_lock);
+			vfio_pci_test_and_up_write_memory_lock(vdev);
 		return count;
 	}
 
@@ -619,7 +619,7 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos,
 		*virt_cmd &= cpu_to_le16(~mask);
 		*virt_cmd |= cpu_to_le16(new_cmd & mask);
 
-		up_write(&vdev->memory_lock);
+		vfio_pci_test_and_up_write_memory_lock(vdev);
 	}
 
 	/* Emulate INTx disable */
@@ -860,7 +860,7 @@ static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos,
 		if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) {
 			vfio_pci_zap_and_down_write_memory_lock(vdev);
 			pci_try_reset_function(vdev->pdev);
-			up_write(&vdev->memory_lock);
+			vfio_pci_test_and_up_write_memory_lock(vdev);
 		}
 	}
 
@@ -942,7 +942,7 @@ static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos,
 		if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) {
 			vfio_pci_zap_and_down_write_memory_lock(vdev);
 			pci_try_reset_function(vdev->pdev);
-			up_write(&vdev->memory_lock);
+			vfio_pci_test_and_up_write_memory_lock(vdev);
 		}
 	}
 
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 0aa542fa1e26..9aedb78a4ae3 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -128,6 +128,7 @@ struct vfio_pci_device {
 	bool			needs_reset;
 	bool			nointx;
 	bool			needs_pm_restore;
+	bool			zapped_bars;
 	struct pci_saved_state	*pci_saved_state;
 	struct pci_saved_state	*pm_save;
 	struct vfio_pci_reflck	*reflck;
@@ -186,6 +187,8 @@ extern int vfio_pci_set_power_state(struct vfio_pci_device *vdev,
 extern bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev);
 extern void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device
 						    *vdev);
+extern void vfio_pci_test_and_up_write_memory_lock(struct vfio_pci_device
+						   *vdev);
 extern u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev);
 extern void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev,
 					       u16 cmd);



  parent reply	other threads:[~2021-08-05 17:08 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-05 17:06 [PATCH 0/7] vfio: device fd address space and vfio-pci mmap invalidation cleanup Alex Williamson
2021-08-05 17:07 ` [PATCH 1/7] vfio: Create vfio_fs_type with inode per device Alex Williamson
2021-08-10  8:43   ` Christoph Hellwig
2021-08-10 14:52     ` Alex Williamson
2021-08-10 14:57       ` Christoph Hellwig
2021-08-10 18:49         ` Peter Xu
2021-08-10 21:16           ` Alex Williamson
2021-08-10 22:18             ` Peter Xu
2021-08-05 17:07 ` [PATCH 2/7] vfio: Export unmap_mapping_range() wrapper Alex Williamson
2021-08-10  8:45   ` Christoph Hellwig
2021-08-10 18:56   ` Peter Xu
2021-08-05 17:07 ` [PATCH 3/7] vfio/pci: Use vfio_device_unmap_mapping_range() Alex Williamson
2021-08-06  1:04   ` Jason Gunthorpe
2021-08-06 20:17     ` Alex Williamson
2021-08-10  8:51       ` Christoph Hellwig
2021-08-10 11:57         ` Jason Gunthorpe
2021-08-10 12:55           ` Christoph Hellwig
2021-08-10 21:50             ` Alex Williamson
2021-08-11 11:57               ` Jason Gunthorpe
2021-08-10  8:53   ` Christoph Hellwig
2021-08-10 19:02     ` Peter Xu
2021-08-10 20:51       ` Alex Williamson
2021-08-10 18:48   ` Peter Xu
2021-08-10 19:59     ` Alex Williamson
2021-08-10 20:20       ` Peter Xu
2021-08-05 17:07 ` [PATCH 4/7] vfio,vfio-pci: Add vma to pfn callback Alex Williamson
2021-08-06  1:01   ` Jason Gunthorpe
2021-08-10  9:00     ` Christoph Hellwig
2021-08-10  9:00   ` Christoph Hellwig
2021-08-05 17:08 ` [PATCH 5/7] mm/interval_tree.c: Export vma interval tree iterators Alex Williamson
2021-08-05 17:08 ` [PATCH 6/7] vfio: Add vfio_device_io_remap_mapping_range() Alex Williamson
2021-08-10  9:04   ` Christoph Hellwig
2021-08-05 17:08 ` Alex Williamson [this message]
2021-08-10  9:11   ` [PATCH 7/7] vfio/pci: Remove map-on-fault behavior Christoph Hellwig
2021-08-10 15:04     ` Alex Williamson
2021-08-10 20:54   ` Peter Xu
2021-08-10 21:45     ` Alex Williamson
2021-08-10 22:27       ` Peter Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=162818330190.1511194.10498114924408843888.stgit@omen \
    --to=alex.williamson@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterx@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).