linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Daniel Jordan <daniel.m.jordan@oracle.com>
To: Alexander Duyck <alexanderduyck@fb.com>,
	Alex Williamson <alex.williamson@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Ben Segall <bsegall@google.com>,
	Cornelia Huck <cohuck@redhat.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Herbert Xu <herbert@gondor.apana.org.au>,
	Ingo Molnar <mingo@redhat.com>, Jason Gunthorpe <jgg@nvidia.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Josh Triplett <josh@joshtriplett.org>,
	Michal Hocko <mhocko@suse.com>, Nico Pache <npache@redhat.com>,
	Pasha Tatashin <pasha.tatashin@soleen.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Steffen Klassert <steffen.klassert@secunet.com>,
	Steve Sistare <steven.sistare@oracle.com>,
	Tejun Heo <tj@kernel.org>, Tim Chen <tim.c.chen@linux.intel.com>,
	Vincent Guittot <vincent.guittot@linaro.org>
Cc: linux-mm@kvack.org, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-crypto@vger.kernel.org,
	Daniel Jordan <daniel.m.jordan@oracle.com>
Subject: [RFC 07/16] vfio/type1: Parallelize vfio_pin_map_dma()
Date: Wed,  5 Jan 2022 19:46:47 -0500	[thread overview]
Message-ID: <20220106004656.126790-8-daniel.m.jordan@oracle.com> (raw)
In-Reply-To: <20220106004656.126790-1-daniel.m.jordan@oracle.com>

The VFIO_IOMMU_MAP_DMA ioctl uses a single CPU to pin all pages in the
given range to facilitate DMA to/from the passed-through device.  The
pages may not have been faulted in and cleared, in which case the wall
time for this can be truly horrendous, but even if this was already done
(e.g. qemu prealloc), pinning pages for the largest guests still takes
significant time, even with recent optimizations to hugetlb gup[1] and
ioctl(VFIO_IOMMU_MAP_DMA) itself[2].

Parallelize with padata for faster guest initialization times.  Numbers
come later on.

[1] https://lore.kernel.org/linux-mm/20210128182632.24562-1-joao.m.martins@oracle.com
[2] https://lore.kernel.org/lkml/20210219161305.36522-1-daniel.m.jordan@oracle.com/

Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Suggested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/vfio/Kconfig            |  1 +
 drivers/vfio/vfio_iommu_type1.c | 95 +++++++++++++++++++++++++++------
 2 files changed, 80 insertions(+), 16 deletions(-)

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 67d0bf4efa16..39c7efb7b1b1 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -2,6 +2,7 @@
 config VFIO_IOMMU_TYPE1
 	tristate
 	depends on VFIO
+	select PADATA
 	default n
 
 config VFIO_IOMMU_SPAPR_TCE
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 8440e7e2c36d..faee849f1cce 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -40,6 +40,7 @@
 #include <linux/notifier.h>
 #include <linux/dma-iommu.h>
 #include <linux/irqdomain.h>
+#include <linux/padata.h>
 
 #define DRIVER_VERSION  "0.2"
 #define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
@@ -1488,24 +1489,44 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
 	return ret;
 }
 
-static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
-			    size_t map_size)
+struct vfio_pin_args {
+	struct vfio_iommu *iommu;
+	struct vfio_dma *dma;
+	unsigned long limit;
+	struct mm_struct *mm;
+};
+
+static void vfio_pin_map_dma_undo(unsigned long start_vaddr,
+				  unsigned long end_vaddr, void *arg)
+{
+	struct vfio_pin_args *args = arg;
+	struct vfio_dma *dma = args->dma;
+	dma_addr_t iova = dma->iova + (start_vaddr - dma->vaddr);
+	dma_addr_t end  = dma->iova + (end_vaddr   - dma->vaddr);
+
+	vfio_unmap_unpin(args->iommu, args->dma, iova, end, true);
+}
+
+static int vfio_pin_map_dma_chunk(unsigned long start_vaddr,
+				  unsigned long end_vaddr, void *arg)
 {
-	dma_addr_t iova = dma->iova;
-	unsigned long vaddr = dma->vaddr;
+	struct vfio_pin_args *args = arg;
+	struct vfio_dma *dma = args->dma;
+	dma_addr_t iova = dma->iova + (start_vaddr - dma->vaddr);
+	unsigned long unmapped_size = end_vaddr - start_vaddr;
+	unsigned long pfn, mapped_size = 0;
 	struct vfio_batch batch;
-	size_t size = map_size;
 	long npage;
-	unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 	int ret = 0;
 
 	vfio_batch_init(&batch);
 
-	while (size) {
+	while (unmapped_size) {
 		/* Pin a contiguous chunk of memory */
-		npage = vfio_pin_pages_remote(dma, vaddr + dma->size,
-					      size >> PAGE_SHIFT, &pfn, limit,
-					      &batch, current->mm);
+		npage = vfio_pin_pages_remote(dma, start_vaddr + mapped_size,
+					      unmapped_size >> PAGE_SHIFT,
+					      &pfn, args->limit, &batch,
+					      args->mm);
 		if (npage <= 0) {
 			WARN_ON(!npage);
 			ret = (int)npage;
@@ -1513,24 +1534,66 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
 		}
 
 		/* Map it! */
-		ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage,
-				     dma->prot);
+		ret = vfio_iommu_map(args->iommu, iova + mapped_size, pfn,
+				     npage, dma->prot);
 		if (ret) {
-			vfio_unpin_pages_remote(dma, iova + dma->size, pfn,
+			vfio_unpin_pages_remote(dma, iova + mapped_size, pfn,
 						npage, true);
 			vfio_batch_unpin(&batch, dma);
 			break;
 		}
 
-		size -= npage << PAGE_SHIFT;
-		dma->size += npage << PAGE_SHIFT;
+		unmapped_size -= npage << PAGE_SHIFT;
+		mapped_size   += npage << PAGE_SHIFT;
 	}
 
 	vfio_batch_fini(&batch);
+
+	/*
+	 * Undo the successfully completed part of this chunk now.  padata will
+	 * undo previously completed chunks internally at the end of the job.
+	 */
+	if (ret) {
+		vfio_pin_map_dma_undo(start_vaddr, start_vaddr + mapped_size,
+				      args);
+		return ret;
+	}
+
+	return 0;
+}
+
+/* Small-memory guests benefited from this relatively small value in testing. */
+#define VFIO_MIN_CHUNK		(1ul << 27)
+
+/* The sweet spot between performance and efficiency on the test machines. */
+#define VFIO_MAX_THREADS	16
+
+static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
+			    size_t map_size)
+{
+	unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	int ret = 0;
+	struct vfio_pin_args args = { iommu, dma, limit, current->mm };
+	/* Stay on PMD boundary in case THP is being used. */
+	struct padata_mt_job job = {
+		.thread_fn   = vfio_pin_map_dma_chunk,
+		.fn_arg      = &args,
+		.start       = dma->vaddr,
+		.size        = map_size,
+		.align       = PMD_SIZE,
+		.min_chunk   = VFIO_MIN_CHUNK,
+		.undo_fn     = vfio_pin_map_dma_undo,
+		.max_threads = VFIO_MAX_THREADS,
+	};
+
+	ret = padata_do_multithreaded(&job);
+
 	dma->iommu_mapped = true;
 
 	if (ret)
-		vfio_remove_dma(iommu, dma);
+		vfio_remove_dma_finish(iommu, dma);
+	else
+		dma->size += map_size;
 
 	return ret;
 }
-- 
2.34.1


  parent reply	other threads:[~2022-01-06  0:49 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-06  0:46 [RFC 00/16] padata, vfio, sched: Multithreaded VFIO page pinning Daniel Jordan
2022-01-06  0:46 ` [RFC 01/16] padata: Remove __init from multithreading functions Daniel Jordan
2022-01-06  0:46 ` [RFC 02/16] padata: Return first error from a job Daniel Jordan
2022-01-06  0:46 ` [RFC 03/16] padata: Add undo support Daniel Jordan
2022-01-06  0:46 ` [RFC 04/16] padata: Detect deadlocks between main and helper threads Daniel Jordan
2022-01-06  0:46 ` [RFC 05/16] vfio/type1: Pass mm to vfio_pin_pages_remote() Daniel Jordan
2022-01-06  0:46 ` [RFC 06/16] vfio/type1: Refactor dma map removal Daniel Jordan
2022-01-06  0:46 ` Daniel Jordan [this message]
2022-01-06  0:46 ` [RFC 08/16] vfio/type1: Cache locked_vm to ease mmap_lock contention Daniel Jordan
2022-01-06  0:53   ` Jason Gunthorpe
2022-01-06  1:17     ` Daniel Jordan
2022-01-06 12:34       ` Jason Gunthorpe
2022-01-06 21:05         ` Alex Williamson
2022-01-07  0:19           ` Jason Gunthorpe
2022-01-07  3:06             ` Daniel Jordan
2022-01-07 15:18               ` Jason Gunthorpe
2022-01-07 16:39                 ` Daniel Jordan
2022-01-06  0:46 ` [RFC 09/16] padata: Use kthreads in do_multithreaded Daniel Jordan
2022-01-06  0:46 ` [RFC 10/16] padata: Helpers should respect main thread's CPU affinity Daniel Jordan
2022-01-06  0:46 ` [RFC 11/16] padata: Cap helpers started to online CPUs Daniel Jordan
2022-01-06  0:46 ` [RFC 12/16] sched, padata: Bound max threads with max_cfs_bandwidth_cpus() Daniel Jordan
2022-01-06  0:46 ` [RFC 13/16] padata: Run helper threads at MAX_NICE Daniel Jordan
2022-01-06  0:46 ` [RFC 14/16] padata: Nice helper threads one by one to prevent starvation Daniel Jordan
2022-01-06  0:46 ` [RFC 15/16] sched/fair: Account kthread runtime debt for CFS bandwidth Daniel Jordan
2022-01-11 11:58   ` Peter Zijlstra
2022-01-11 16:29     ` Daniel Jordan
2022-01-12 20:18       ` Tejun Heo
2022-01-13 21:08         ` Daniel Jordan
2022-01-13 21:11           ` Daniel Jordan
2022-01-14  9:31   ` Peter Zijlstra
2022-01-14  9:40     ` Peter Zijlstra
2022-01-14 16:38       ` Tejun Heo
2022-01-18 17:40       ` Daniel Jordan
2022-01-14 16:30     ` Tejun Heo
2022-01-18 17:32     ` Daniel Jordan
2022-01-06  0:46 ` [RFC 16/16] sched/fair: Consider kthread debt in cputime Daniel Jordan
2022-01-06  1:13 ` [RFC 00/16] padata, vfio, sched: Multithreaded VFIO page pinning Jason Gunthorpe
2022-01-07  3:03   ` Daniel Jordan
2022-01-07 17:12     ` Jason Gunthorpe
2022-01-10 22:27       ` Daniel Jordan
2022-01-11  0:17         ` Jason Gunthorpe
2022-01-11 16:20           ` Daniel Jordan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220106004656.126790-8-daniel.m.jordan@oracle.com \
    --to=daniel.m.jordan@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=alex.williamson@redhat.com \
    --cc=alexanderduyck@fb.com \
    --cc=bsegall@google.com \
    --cc=cohuck@redhat.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=hannes@cmpxchg.org \
    --cc=herbert@gondor.apana.org.au \
    --cc=jgg@nvidia.com \
    --cc=josh@joshtriplett.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=mingo@redhat.com \
    --cc=npache@redhat.com \
    --cc=pasha.tatashin@soleen.com \
    --cc=peterz@infradead.org \
    --cc=steffen.klassert@secunet.com \
    --cc=steven.sistare@oracle.com \
    --cc=tim.c.chen@linux.intel.com \
    --cc=tj@kernel.org \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).