From: Steve Sistare <steven.sistare@oracle.com>
To: kvm@vger.kernel.org
Cc: Alex Williamson <alex.williamson@redhat.com>,
Cornelia Huck <cohuck@redhat.com>,
Kirti Wankhede <kwankhede@nvidia.com>,
Steve Sistare <steven.sistare@oracle.com>
Subject: [PATCH V1 4/5] vfio: VA suspend interface
Date: Tue, 5 Jan 2021 07:36:52 -0800 [thread overview]
Message-ID: <1609861013-129801-5-git-send-email-steven.sistare@oracle.com> (raw)
In-Reply-To: <1609861013-129801-1-git-send-email-steven.sistare@oracle.com>
Add interfaces that allow the underlying memory object of an iova
range to be mapped to a new host virtual address in the host process:
- VFIO_DMA_UNMAP_FLAG_SUSPEND for VFIO_IOMMU_UNMAP_DMA
- VFIO_DMA_MAP_FLAG_RESUME flag for VFIO_IOMMU_MAP_DMA
- VFIO_SUSPEND extension for VFIO_CHECK_EXTENSION
The suspend interface blocks vfio translation of host virtual
addresses in a range, but DMA to already-mapped pages continues.
The resume interface records the new base VA and resumes translation.
See comments in uapi/linux/vfio.h for more details.
This is a partial implementation. Blocking is added in the next patch.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
drivers/vfio/vfio_iommu_type1.c | 47 +++++++++++++++++++++++++++++++++++------
include/uapi/linux/vfio.h | 16 ++++++++++++++
2 files changed, 57 insertions(+), 6 deletions(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 3dc501d..2c164a6 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -92,6 +92,7 @@ struct vfio_dma {
int prot; /* IOMMU_READ/WRITE */
bool iommu_mapped;
bool lock_cap; /* capable(CAP_IPC_LOCK) */
+ bool suspended;
struct task_struct *task;
struct rb_root pfn_list; /* Ex-user pinned pfn list */
unsigned long *bitmap;
@@ -1080,7 +1081,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
int ret = 0, retries = 0;
unsigned long pgshift;
dma_addr_t iova;
- unsigned long size;
+ unsigned long size, consumed;
mutex_lock(&iommu->lock);
@@ -1169,6 +1170,21 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
if (dma->task->mm != current->mm)
break;
+ if (unmap->flags & VFIO_DMA_UNMAP_FLAG_SUSPEND) {
+ if (dma->suspended) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+ dma->suspended = true;
+ consumed = dma->iova + dma->size - iova;
+ if (consumed >= size)
+ break;
+ iova += consumed;
+ size -= consumed;
+ unmapped += dma->size;
+ continue;
+ }
+
if (!RB_EMPTY_ROOT(&dma->pfn_list)) {
struct vfio_iommu_type1_dma_unmap nb_unmap;
@@ -1307,6 +1323,7 @@ static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu,
static int vfio_dma_do_map(struct vfio_iommu *iommu,
struct vfio_iommu_type1_dma_map *map)
{
+ bool resume = map->flags & VFIO_DMA_MAP_FLAG_RESUME;
dma_addr_t iova = map->iova;
unsigned long vaddr = map->vaddr;
size_t size = map->size;
@@ -1324,13 +1341,16 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
if (map->flags & VFIO_DMA_MAP_FLAG_READ)
prot |= IOMMU_READ;
+ if ((prot && resume) || (!prot && !resume))
+ return -EINVAL;
+
mutex_lock(&iommu->lock);
pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap);
WARN_ON((pgsize - 1) & PAGE_MASK);
- if (!prot || !size || (size | iova | vaddr) & (pgsize - 1)) {
+ if (!size || (size | iova | vaddr) & (pgsize - 1)) {
ret = -EINVAL;
goto out_unlock;
}
@@ -1341,7 +1361,19 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
goto out_unlock;
}
- if (vfio_find_dma(iommu, iova, size)) {
+ dma = vfio_find_dma(iommu, iova, size);
+ if (resume) {
+ if (!dma) {
+ ret = -ENOENT;
+ } else if (!dma->suspended || dma->iova != iova ||
+ dma->size != size) {
+ ret = -EINVAL;
+ } else {
+ dma->vaddr = vaddr;
+ dma->suspended = false;
+ }
+ goto out_unlock;
+ } else if (dma) {
ret = -EEXIST;
goto out_unlock;
}
@@ -2532,6 +2564,7 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu,
case VFIO_TYPE1_IOMMU:
case VFIO_TYPE1v2_IOMMU:
case VFIO_TYPE1_NESTING_IOMMU:
+ case VFIO_SUSPEND:
return 1;
case VFIO_DMA_CC_IOMMU:
if (!iommu)
@@ -2686,7 +2719,8 @@ static int vfio_iommu_type1_map_dma(struct vfio_iommu *iommu,
{
struct vfio_iommu_type1_dma_map map;
unsigned long minsz;
- uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+ uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE |
+ VFIO_DMA_MAP_FLAG_RESUME;
minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
@@ -2704,6 +2738,8 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu,
{
struct vfio_iommu_type1_dma_unmap unmap;
struct vfio_bitmap bitmap = { 0 };
+ uint32_t mask = VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP |
+ VFIO_DMA_UNMAP_FLAG_SUSPEND;
unsigned long minsz;
int ret;
@@ -2712,8 +2748,7 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu,
if (copy_from_user(&unmap, (void __user *)arg, minsz))
return -EFAULT;
- if (unmap.argsz < minsz ||
- unmap.flags & ~VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP)
+ if (unmap.argsz < minsz || unmap.flags & ~mask || unmap.flags == mask)
return -EINVAL;
if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 896e527..fcf7b56 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -46,6 +46,9 @@
*/
#define VFIO_NOIOMMU_IOMMU 8
+/* Supports VFIO DMA suspend and resume */
+#define VFIO_SUSPEND 9
+
/*
* The IOCTL interface is designed for extensibility by embedding the
* structure length (argsz) and flags into structures passed between
@@ -1046,12 +1049,19 @@ struct vfio_iommu_type1_info_cap_migration {
*
* Map process virtual addresses to IO virtual addresses using the
* provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
+ *
+ * If flags & VFIO_DMA_MAP_FLAG_RESUME, record the new base vaddr for iova, and
+ * resume translation of host virtual addresses in the iova range. The new
+ * vaddr must point to the same memory object as the old vaddr, but this is not
+ * verified. iova and size must match those in the original MAP_DMA call.
+ * Protection is not changed, and the READ & WRITE flags must be 0.
*/
struct vfio_iommu_type1_dma_map {
__u32 argsz;
__u32 flags;
#define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */
#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */
+#define VFIO_DMA_MAP_FLAG_RESUME (1 << 2)
__u64 vaddr; /* Process virtual address */
__u64 iova; /* IO virtual address */
__u64 size; /* Size of mapping (bytes) */
@@ -1084,11 +1094,17 @@ struct vfio_bitmap {
* indicates that the page at that offset from iova is dirty. A Bitmap of the
* pages in the range of unmapped size is returned in the user-provided
* vfio_bitmap.data.
+ *
+ * If flags & VFIO_DMA_UNMAP_FLAG_SUSPEND, do not unmap, but suspend vfio
+ * translation of host virtual addresses in the iova range. During suspension,
+ * kernel threads that attempt to translate will block. DMA to already-mapped
+ * pages continues.
*/
struct vfio_iommu_type1_dma_unmap {
__u32 argsz;
__u32 flags;
#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0)
+#define VFIO_DMA_UNMAP_FLAG_SUSPEND (1 << 1)
__u64 iova; /* IO virtual address */
__u64 size; /* Size of mapping (bytes) */
__u8 data[];
--
1.8.3.1
next prev parent reply other threads:[~2021-01-05 16:05 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-01-05 15:36 [PATCH V1 0/5] vfio virtual address update Steve Sistare
2021-01-05 15:36 ` [PATCH V1 1/5] vfio: maintain dma_list order Steve Sistare
2021-01-05 18:48 ` kernel test robot
2021-01-06 0:02 ` Alex Williamson
2021-01-06 14:50 ` Steven Sistare
2021-01-05 15:36 ` [PATCH V1 2/5] vfio: option to unmap all Steve Sistare
2021-01-08 19:35 ` Alex Williamson
2021-01-11 21:09 ` Steven Sistare
2021-01-13 19:41 ` Alex Williamson
2021-01-05 15:36 ` [PATCH V1 3/5] vfio: detect closed container Steve Sistare
2021-01-08 19:39 ` Alex Williamson
2021-01-11 21:12 ` Steven Sistare
2021-01-13 19:26 ` Alex Williamson
2021-01-13 19:44 ` Steven Sistare
2021-01-13 19:44 ` Alex Williamson
2021-01-05 15:36 ` Steve Sistare [this message]
2021-01-08 21:15 ` [PATCH V1 4/5] vfio: VA suspend interface Alex Williamson
2021-01-11 21:15 ` Steven Sistare
2021-01-12 15:47 ` Jason Zeng
2021-01-12 22:09 ` Alex Williamson
2021-01-13 4:37 ` Jason Zeng
2021-01-12 22:47 ` Alex Williamson
2021-01-13 4:10 ` Alex Williamson
2021-01-13 18:02 ` Steven Sistare
2021-01-13 18:34 ` Alex Williamson
2021-01-13 18:01 ` Steven Sistare
2021-01-19 20:11 ` Steven Sistare
2021-01-05 15:36 ` [PATCH V1 5/5] vfio: block during VA suspend Steve Sistare
2021-01-05 18:08 ` kernel test robot
2021-01-05 20:03 ` kernel test robot
2021-01-07 15:17 ` Steven Sistare
2021-01-05 20:03 ` [RFC PATCH] vfio: vfio_vaddr_valid() can be static kernel test robot
2021-01-08 21:32 ` [PATCH V1 5/5] vfio: block during VA suspend Alex Williamson
2021-01-11 21:16 ` Steven Sistare
2021-01-12 21:52 ` [PATCH V1 0/5] vfio virtual address update Steven Sistare
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1609861013-129801-5-git-send-email-steven.sistare@oracle.com \
--to=steven.sistare@oracle.com \
--cc=alex.williamson@redhat.com \
--cc=cohuck@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=kwankhede@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).