All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ankur Arora <ankur.a.arora@oracle.com>
To: bbhushan2@marvell.com, alex.williamson@redhat.com
Cc: linux-kernel@vger.kernel.org, sgoutham@marvell.com,
	ankur.a.arora@oracle.com, terminus@gmail.com
Subject: Re: vfio-pci: protect remap_pfn_range() from simultaneous calls
Date: Wed, 20 Jan 2021 20:39:07 -0800	[thread overview]
Message-ID: <20210121043907.76037-1-ankur.a.arora@oracle.com> (raw)
In-Reply-To: <MWHPR18MB1520D43B19368D8D84360248E3A31@MWHPR18MB1520.namprd18.prod.outlook.com>

Hi Bharat,

So I don't have a final patch for you, but can you test the one below
the scissors mark? (The patch is correct, but I'm not happy that it
introduces a new lock.)

Ankur

-- >8 --

Date: Thu, 21 Jan 2021 00:04:36 +0000
Subject: vfio-pci: protect io_remap_pfn_range() from simultaneous calls

A fix for CVE-2020-12888 changed the mmap to be dynamic so it gets
zapped out and faulted back in based on the state of the PCI_COMMAND
register.

The original code flow was:

  vfio_iommu_type1::vfio_device_mmap()
    vfio_pci::vfio_pci_mmap()
      remap_pfn_range(vma->vm_start .. vma->vm_end);

  vfio_iommu_type1::vfio_iommu_type1_ioctl(VFIO_PIN_MAP_DMA)
    get_user_pages_remote()
    iommu_map()

Which became:

  vfio_iommu_type1::vfio_device_mmap()
    vfio_pci::vfio_pci_mmap()
     /* Setup vm->vm_ops */

  vfio_iommu_type1::vfio_iommu_type1_ioctl(VFIO_PIN_MAP_DMA)
    get_user_pages_remote()
      follow_fault_pfn(vma, vaddr); /* For missing pages */
        fixup_user_fault()
          handle_mm_fault()
            vfio_pci::vfio_pci_mmap_fault()
              io_remap_pfn_range(vma->vm_start .. vma->vm_end);
    iommu_map()

With this, simultaneous calls to VFIO_PIN_MAP_DMA for an overlapping
region, would mean potentially multiple calls to io_remap_pfn_range()
-- each of which try to remap the full extent.

This ends up hitting BUG_ON(!pte_none(*pte)) in remap_pte_range()
because we are mapping an already mapped pte.

The fix is to elide calls to io_remap_pfn_range() if the VMA is already
mapped.

Fixes: abafbc551fdd ("vfio-pci: Invalidate mmaps and block MMIO access
on disabled memory")

Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 drivers/vfio/pci/vfio_pci.c         | 48 ++++++++++++++++++++++++++++++++++---
 drivers/vfio/pci/vfio_pci_private.h |  2 ++
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 706de3ef94bb..db7a2a716f51 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -64,6 +64,11 @@ static bool disable_denylist;
 module_param(disable_denylist, bool, 0444);
 MODULE_PARM_DESC(disable_denylist, "Disable use of device denylist. Disabling the denylist allows binding to devices with known errata that may lead to exploitable stability or security issues when accessed by untrusted users.");
 
+struct vdev_vma_priv {
+	struct vfio_pci_device *vdev;
+	bool vma_mapped;
+};
+
 static inline bool vfio_vga_disabled(void)
 {
 #ifdef CONFIG_VFIO_PCI_VGA
@@ -1527,15 +1532,20 @@ static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try)
 		list_for_each_entry_safe(mmap_vma, tmp,
 					 &vdev->vma_list, vma_next) {
 			struct vm_area_struct *vma = mmap_vma->vma;
+			struct vdev_vma_priv *p;
 
 			if (vma->vm_mm != mm)
 				continue;
 
 			list_del(&mmap_vma->vma_next);
 			kfree(mmap_vma);
+			p = vma->vm_private_data;
 
+			mutex_lock(&vdev->map_lock);
 			zap_vma_ptes(vma, vma->vm_start,
 				     vma->vm_end - vma->vm_start);
+			p->vma_mapped = false;
+			mutex_unlock(&vdev->map_lock);
 		}
 		mutex_unlock(&vdev->vma_lock);
 		mmap_read_unlock(mm);
@@ -1591,12 +1601,19 @@ static int __vfio_pci_add_vma(struct vfio_pci_device *vdev,
  */
 static void vfio_pci_mmap_open(struct vm_area_struct *vma)
 {
+	struct vdev_vma_priv *p = vma->vm_private_data;
+	struct vfio_pci_device *vdev = p->vdev;
+
+	mutex_lock(&vdev->map_lock);
+	p->vma_mapped = false;
 	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+	mutex_unlock(&vdev->map_lock);
 }
 
 static void vfio_pci_mmap_close(struct vm_area_struct *vma)
 {
-	struct vfio_pci_device *vdev = vma->vm_private_data;
+	struct vdev_vma_priv *p = vma->vm_private_data;
+	struct vfio_pci_device *vdev = p->vdev;
 	struct vfio_pci_mmap_vma *mmap_vma;
 
 	mutex_lock(&vdev->vma_lock);
@@ -1604,6 +1621,7 @@ static void vfio_pci_mmap_close(struct vm_area_struct *vma)
 		if (mmap_vma->vma == vma) {
 			list_del(&mmap_vma->vma_next);
 			kfree(mmap_vma);
+			kfree(p);
 			break;
 		}
 	}
@@ -1613,7 +1631,8 @@ static void vfio_pci_mmap_close(struct vm_area_struct *vma)
 static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	struct vfio_pci_device *vdev = vma->vm_private_data;
+	struct vdev_vma_priv *p = vma->vm_private_data;
+	struct vfio_pci_device *vdev = p->vdev;
 	vm_fault_t ret = VM_FAULT_NOPAGE;
 
 	mutex_lock(&vdev->vma_lock);
@@ -1633,10 +1652,24 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
 
 	mutex_unlock(&vdev->vma_lock);
 
+	/*
+	 * The vdev->map_lock in vfio_pci_zap_and_vma_lock() nests
+	 * inside the vdev->vma_lock but doesn't depend on that for
+	 * protection of the VMA.
+	 * So take vdev->map_lock after releasing vdev->vma_lock.
+	 */
+	mutex_lock(&vdev->map_lock);
+	if (p->vma_mapped)
+		goto unlock_out;
+
 	if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
 			       vma->vm_end - vma->vm_start, vma->vm_page_prot))
 		ret = VM_FAULT_SIGBUS;
+	else
+		p->vma_mapped = true;
 
+unlock_out:
+	mutex_unlock(&vdev->map_lock);
 up_out:
 	up_read(&vdev->memory_lock);
 	return ret;
@@ -1654,6 +1687,7 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
 	struct pci_dev *pdev = vdev->pdev;
 	unsigned int index;
 	u64 phys_len, req_len, pgoff, req_start;
+	struct vdev_vma_priv *priv;
 	int ret;
 
 	index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
@@ -1702,7 +1736,14 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
 		}
 	}
 
-	vma->vm_private_data = vdev;
+	priv = kzalloc(sizeof(struct vdev_vma_priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->vdev = vdev;
+	priv->vma_mapped = false;
+
+	vma->vm_private_data = priv;
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 	vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff;
 
@@ -1967,6 +2008,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	INIT_LIST_HEAD(&vdev->dummy_resources_list);
 	INIT_LIST_HEAD(&vdev->ioeventfds_list);
 	mutex_init(&vdev->vma_lock);
+	mutex_init(&vdev->map_lock);
 	INIT_LIST_HEAD(&vdev->vma_list);
 	init_rwsem(&vdev->memory_lock);
 
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 5c90e560c5c7..f3010c49b06c 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -142,6 +142,8 @@ struct vfio_pci_device {
 	struct mutex		vma_lock;
 	struct list_head	vma_list;
 	struct rw_semaphore	memory_lock;
+	/* Protects VMA against simultaneous remaps. */
+	struct mutex		map_lock;
 };
 
 #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
-- 
2.9.3


  reply	other threads:[~2021-01-21  4:42 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-06 16:17 vfio-pci: protect remap_pfn_range() from simultaneous calls Bharat Bhushan
2021-01-06 18:13 ` Ankur Arora
2021-01-07  4:57   ` [EXT] " Bharat Bhushan
2021-01-19  8:51   ` Bharat Bhushan
2021-01-21  4:39     ` Ankur Arora [this message]
2021-02-26  0:53       ` Ankur Arora
2021-03-02 12:47         ` [EXT] " Bharat Bhushan
2021-03-08  6:59           ` Ankur Arora
2021-03-08  7:03             ` Bharat Bhushan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210121043907.76037-1-ankur.a.arora@oracle.com \
    --to=ankur.a.arora@oracle.com \
    --cc=alex.williamson@redhat.com \
    --cc=bbhushan2@marvell.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sgoutham@marvell.com \
    --cc=terminus@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.