All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ralph Campbell <rcampbell@nvidia.com>
To: <nouveau@lists.freedesktop.org>, <linux-rdma@vger.kernel.org>,
	<linux-mm@kvack.org>, <linux-kselftest@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>
Cc: Jerome Glisse <jglisse@redhat.com>,
	John Hubbard <jhubbard@nvidia.com>,
	Christoph Hellwig <hch@lst.de>,
	Jason Gunthorpe <jgg@mellanox.com>,
	"Ben Skeggs" <bskeggs@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Shuah Khan <shuah@kernel.org>,
	Ralph Campbell <rcampbell@nvidia.com>
Subject: [PATCH 16/16] nouveau: support THP migration to private memory
Date: Fri, 19 Jun 2020 14:56:49 -0700	[thread overview]
Message-ID: <20200619215649.32297-17-rcampbell@nvidia.com> (raw)
In-Reply-To: <20200619215649.32297-1-rcampbell@nvidia.com>

Add support for migrating transparent huge pages to and from device
private memory.

Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
---
 drivers/gpu/drm/nouveau/nouveau_dmem.c | 171 +++++++++++++++++--------
 drivers/gpu/drm/nouveau/nouveau_svm.c  |  11 +-
 drivers/gpu/drm/nouveau/nouveau_svm.h  |   3 +-
 3 files changed, 127 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index f6a806ba3caa..e8c4c0bc78ae 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -46,6 +46,7 @@
  */
 #define DMEM_CHUNK_SIZE (2UL << 20)
 #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT)
+#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
 
 enum nouveau_aper {
 	NOUVEAU_APER_VIRT,
@@ -53,7 +54,7 @@ enum nouveau_aper {
 	NOUVEAU_APER_HOST,
 };
 
-typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages,
+typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u32 length,
 				      enum nouveau_aper, u64 dst_addr,
 				      enum nouveau_aper, u64 src_addr);
 typedef int (*nouveau_clear_page_t)(struct nouveau_drm *drm, u32 length,
@@ -79,6 +80,7 @@ struct nouveau_dmem {
 	struct list_head chunks;
 	struct mutex mutex;
 	struct page *free_pages;
+	struct page *free_huge_pages;
 	spinlock_t lock;
 };
 
@@ -109,8 +111,13 @@ static void nouveau_dmem_page_free(struct page *page)
 	struct nouveau_dmem *dmem = chunk->drm->dmem;
 
 	spin_lock(&dmem->lock);
-	page->zone_device_data = dmem->free_pages;
-	dmem->free_pages = page;
+	if (PageHuge(page)) {
+		page->zone_device_data = dmem->free_huge_pages;
+		dmem->free_huge_pages = page;
+	} else {
+		page->zone_device_data = dmem->free_pages;
+		dmem->free_pages = page;
+	}
 
 	WARN_ON(!chunk->callocated);
 	chunk->callocated--;
@@ -136,33 +143,41 @@ static void nouveau_dmem_fence_done(struct nouveau_fence **fence)
 
 static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm,
 		struct vm_fault *vmf, struct migrate_vma *args,
-		dma_addr_t *dma_addr)
+		dma_addr_t *dma_addr, size_t *sizep)
 {
 	struct device *dev = drm->dev->dev;
 	struct page *dpage, *spage;
+	unsigned int order;
 
 	spage = migrate_pfn_to_page(args->src[0]);
 	if (!spage || !(args->src[0] & MIGRATE_PFN_MIGRATE))
 		return 0;
 
-	dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address);
+	order = compound_order(spage);
+	if (order)
+		dpage = alloc_transhugepage(vmf->vma, vmf->address);
+	else
+		dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address);
 	if (!dpage)
 		return VM_FAULT_SIGBUS;
+	WARN_ON_ONCE(order != compound_order(dpage));
 	lock_page(dpage);
 
-	*dma_addr = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	*sizep = page_size(dpage);
+	*dma_addr = dma_map_page(dev, dpage, 0, *sizep, DMA_BIDIRECTIONAL);
 	if (dma_mapping_error(dev, *dma_addr))
 		goto error_free_page;
 
-	if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_HOST, *dma_addr,
-			NOUVEAU_APER_VRAM, nouveau_dmem_page_addr(spage)))
+	if (drm->dmem->migrate.copy_func(drm, page_size(spage),
+			NOUVEAU_APER_HOST, *dma_addr, NOUVEAU_APER_VRAM,
+			nouveau_dmem_page_addr(spage)))
 		goto error_dma_unmap;
 
 	args->dst[0] = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
 	return 0;
 
 error_dma_unmap:
-	dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	dma_unmap_page(dev, *dma_addr, page_size(dpage), DMA_BIDIRECTIONAL);
 error_free_page:
 	__free_page(dpage);
 	return VM_FAULT_SIGBUS;
@@ -173,8 +188,11 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
 	struct nouveau_drm *drm = page_to_drm(vmf->page);
 	struct nouveau_dmem *dmem = drm->dmem;
 	struct nouveau_fence *fence;
+	struct page *page;
+	unsigned int order;
 	unsigned long src = 0, dst = 0;
 	dma_addr_t dma_addr = 0;
+	size_t size = 0;
 	vm_fault_t ret;
 	struct migrate_vma args = {
 		.vma		= vmf->vma,
@@ -185,26 +203,52 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
 		.src_owner	= drm->dev,
 	};
 
+	/*
+	 * If the page was migrated to the GPU as a huge page, migrate it
+	 * back the same way.
+	 * FIXME If there is thrashing, maybe we should migrate one page.
+	 */
+	page = compound_head(vmf->page);
+	order = compound_order(page);
+	if (order) {
+		args.start &= PAGE_MASK << order;
+		args.end = args.start + (PAGE_SIZE << order);
+		args.src = kcalloc(1U << order, sizeof(*args.src), GFP_KERNEL);
+		if (!args.src)
+			return VM_FAULT_OOM;
+		args.dst = kcalloc(1U << order, sizeof(*args.dst), GFP_KERNEL);
+		if (!args.dst) {
+			ret = VM_FAULT_OOM;
+			goto error_src;
+		}
+	}
+
 	/*
 	 * FIXME what we really want is to find some heuristic to migrate more
 	 * than just one page on CPU fault. When such fault happens it is very
 	 * likely that more surrounding page will CPU fault too.
 	 */
-	if (migrate_vma_setup(&args) < 0)
-		return VM_FAULT_SIGBUS;
-	if (!args.cpages)
-		return 0;
+	if (migrate_vma_setup(&args) < 0) {
+		ret = VM_FAULT_SIGBUS;
+		goto error_dst;
+	}
 
-	ret = nouveau_dmem_fault_copy_one(drm, vmf, &args, &dma_addr);
-	if (ret || dst == 0)
+	ret = nouveau_dmem_fault_copy_one(drm, vmf, &args, &dma_addr, &size);
+	if (ret)
 		goto done;
 
 	nouveau_fence_new(dmem->migrate.chan, false, &fence);
 	migrate_vma_pages(&args);
 	nouveau_dmem_fence_done(&fence);
-	dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	dma_unmap_page(drm->dev->dev, dma_addr, size, DMA_BIDIRECTIONAL);
 done:
 	migrate_vma_finalize(&args);
+error_dst:
+	if (args.dst != &dst)
+		kfree(args.dst);
+error_src:
+	if (args.src != &src)
+		kfree(args.src);
 	return ret;
 }
 
@@ -213,8 +257,8 @@ static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = {
 	.migrate_to_ram		= nouveau_dmem_migrate_to_ram,
 };
 
-static int
-nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
+static int nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, bool is_huge,
+				    struct page **ppage)
 {
 	struct nouveau_dmem_chunk *chunk;
 	struct resource *res;
@@ -266,16 +310,20 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
 	pfn_first = chunk->pagemap.res.start >> PAGE_SHIFT;
 	page = pfn_to_page(pfn_first);
 	spin_lock(&drm->dmem->lock);
-	for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) {
-		page->zone_device_data = drm->dmem->free_pages;
-		drm->dmem->free_pages = page;
-	}
+	if (is_huge)
+		prep_compound_page(page, PMD_ORDER);
+	else
+		for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) {
+			page->zone_device_data = drm->dmem->free_pages;
+			drm->dmem->free_pages = page;
+		}
 	*ppage = page;
 	chunk->callocated++;
 	spin_unlock(&drm->dmem->lock);
 
-	NV_INFO(drm, "DMEM: registered %ldMB of device memory\n",
-		DMEM_CHUNK_SIZE >> 20);
+	NV_INFO(drm, "DMEM: registered %ldMB of %sdevice memory %lx %lx\n",
+		DMEM_CHUNK_SIZE >> 20, is_huge ? "huge " : "", pfn_first,
+		nouveau_dmem_page_addr(page));
 
 	return 0;
 
@@ -293,14 +341,20 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
 }
 
 static struct page *
-nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm)
+nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm, bool is_huge)
 {
 	struct nouveau_dmem_chunk *chunk;
 	struct page *page = NULL;
 	int ret;
 
 	spin_lock(&drm->dmem->lock);
-	if (drm->dmem->free_pages) {
+	if (is_huge && drm->dmem->free_huge_pages) {
+		page = drm->dmem->free_huge_pages;
+		drm->dmem->free_huge_pages = page->zone_device_data;
+		chunk = nouveau_page_to_chunk(page);
+		chunk->callocated++;
+		spin_unlock(&drm->dmem->lock);
+	} else if (!is_huge && drm->dmem->free_pages) {
 		page = drm->dmem->free_pages;
 		drm->dmem->free_pages = page->zone_device_data;
 		chunk = nouveau_page_to_chunk(page);
@@ -308,7 +362,7 @@ nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm)
 		spin_unlock(&drm->dmem->lock);
 	} else {
 		spin_unlock(&drm->dmem->lock);
-		ret = nouveau_dmem_chunk_alloc(drm, &page);
+		ret = nouveau_dmem_chunk_alloc(drm, is_huge, &page);
 		if (ret)
 			return NULL;
 	}
@@ -381,19 +435,18 @@ nouveau_dmem_fini(struct nouveau_drm *drm)
 }
 
 static int
-nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages,
+nvc0b5_migrate_copy(struct nouveau_drm *drm, u32 length,
 		    enum nouveau_aper dst_aper, u64 dst_addr,
 		    enum nouveau_aper src_aper, u64 src_addr)
 {
 	struct nouveau_channel *chan = drm->dmem->migrate.chan;
-	u32 launch_dma = (1 << 9) /* MULTI_LINE_ENABLE. */ |
-			 (1 << 8) /* DST_MEMORY_LAYOUT_PITCH. */ |
+	u32 launch_dma = (1 << 8) /* DST_MEMORY_LAYOUT_PITCH. */ |
 			 (1 << 7) /* SRC_MEMORY_LAYOUT_PITCH. */ |
 			 (1 << 2) /* FLUSH_ENABLE_TRUE. */ |
 			 (2 << 0) /* DATA_TRANSFER_TYPE_NON_PIPELINED. */;
 	int ret;
 
-	ret = RING_SPACE(chan, 13);
+	ret = RING_SPACE(chan, 11);
 	if (ret)
 		return ret;
 
@@ -425,17 +478,15 @@ nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages,
 		launch_dma |= 0x00002000; /* DST_TYPE_PHYSICAL. */
 	}
 
-	BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8);
-	OUT_RING  (chan, upper_32_bits(src_addr));
-	OUT_RING  (chan, lower_32_bits(src_addr));
-	OUT_RING  (chan, upper_32_bits(dst_addr));
-	OUT_RING  (chan, lower_32_bits(dst_addr));
-	OUT_RING  (chan, PAGE_SIZE);
-	OUT_RING  (chan, PAGE_SIZE);
-	OUT_RING  (chan, PAGE_SIZE);
-	OUT_RING  (chan, npages);
+	BEGIN_NVC0(chan, NvSubCopy, 0x0400, 4);
+	OUT_RING(chan, upper_32_bits(src_addr));
+	OUT_RING(chan, lower_32_bits(src_addr));
+	OUT_RING(chan, upper_32_bits(dst_addr));
+	OUT_RING(chan, lower_32_bits(dst_addr));
+	BEGIN_NVC0(chan, NvSubCopy, 0x0418, 1);
+	OUT_RING(chan, length);
 	BEGIN_NVC0(chan, NvSubCopy, 0x0300, 1);
-	OUT_RING  (chan, launch_dma);
+	OUT_RING(chan, launch_dma);
 	return 0;
 }
 
@@ -535,6 +586,7 @@ static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
 	struct device *dev = drm->dev->dev;
 	struct page *dpage, *spage;
 	unsigned long paddr;
+	unsigned long dst;
 
 	spage = migrate_pfn_to_page(src);
 	if (!(src & MIGRATE_PFN_MIGRATE))
@@ -546,7 +598,8 @@ static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
 		goto done;
 	}
 
-	dpage = nouveau_dmem_page_alloc_locked(drm);
+	dpage = nouveau_dmem_page_alloc_locked(drm,
+					       src & MIGRATE_PFN_COMPOUND);
 	if (!dpage)
 		goto out;
 
@@ -556,7 +609,7 @@ static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
 					 DMA_BIDIRECTIONAL);
 		if (dma_mapping_error(dev, *dma_addr))
 			goto out_free_page;
-		if (drm->dmem->migrate.copy_func(drm, 1,
+		if (drm->dmem->migrate.copy_func(drm, page_size(spage),
 			NOUVEAU_APER_VRAM, paddr, NOUVEAU_APER_HOST, *dma_addr))
 			goto out_dma_unmap;
 	} else {
@@ -571,10 +624,13 @@ static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
 		((paddr >> PAGE_SHIFT) << NVIF_VMM_PFNMAP_V0_ADDR_SHIFT);
 	if (src & MIGRATE_PFN_WRITE)
 		*pfn |= NVIF_VMM_PFNMAP_V0_W;
-	return migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+	dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+	if (PageHead(dpage))
+		dst |= MIGRATE_PFN_COMPOUND;
+	return dst;
 
 out_dma_unmap:
-	dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	dma_unmap_page(dev, *dma_addr, page_size(spage), DMA_BIDIRECTIONAL);
 out_free_page:
 	nouveau_dmem_page_free_locked(drm, dpage);
 out:
@@ -588,24 +644,30 @@ static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm,
 {
 	struct nouveau_fence *fence;
 	unsigned long addr = args->start, nr_dma = 0, i;
+	unsigned int page_shift = PAGE_SHIFT;
 
 	for (i = 0; addr < args->end; i++) {
 		args->dst[i] = nouveau_dmem_migrate_copy_one(drm, args->src[i],
 				dma_addrs + nr_dma, pfns + i);
 		if (!dma_mapping_error(drm->dev->dev, dma_addrs[nr_dma]))
 			nr_dma++;
+		if (args->dst[i] & MIGRATE_PFN_COMPOUND) {
+			page_shift = PMD_SHIFT;
+			i++;
+			break;
+		}
 		addr += PAGE_SIZE;
 	}
 
 	nouveau_fence_new(drm->dmem->migrate.chan, false, &fence);
 	migrate_vma_pages(args);
 	nouveau_dmem_fence_done(&fence);
-	nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i);
+	nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i,
+			 page_shift);
 
-	while (nr_dma--) {
-		dma_unmap_page(drm->dev->dev, dma_addrs[nr_dma], PAGE_SIZE,
-				DMA_BIDIRECTIONAL);
-	}
+	while (nr_dma)
+		dma_unmap_page(drm->dev->dev, dma_addrs[--nr_dma],
+				1UL << page_shift, DMA_BIDIRECTIONAL);
 	migrate_vma_finalize(args);
 }
 
@@ -617,7 +679,7 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
 			 unsigned long end)
 {
 	unsigned long npages = (end - start) >> PAGE_SHIFT;
-	unsigned long max = min(SG_MAX_SINGLE_ALLOC, npages);
+	unsigned long max = min(1UL << PMD_ORDER, npages);
 	dma_addr_t *dma_addrs;
 	struct migrate_vma args = {
 		.vma		= vma,
@@ -646,8 +708,10 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
 	if (!pfns)
 		goto out_free_dma;
 
-	for (i = 0; i < npages; i += max) {
-		args.end = start + (max << PAGE_SHIFT);
+	for (; args.start < end; args.start = args.end) {
+		args.end = ALIGN(args.start, PMD_SIZE);
+		if (args.start == args.end)
+			args.end = min(end, args.start + PMD_SIZE);
 		ret = migrate_vma_setup(&args);
 		if (ret)
 			goto out_free_pfns;
@@ -655,7 +719,6 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
 		if (args.cpages)
 			nouveau_dmem_migrate_chunk(drm, svmm, &args, dma_addrs,
 						   pfns);
-		args.start = args.end;
 	}
 
 	ret = 0;
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index a27625f3c5f9..f386a9318190 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -684,7 +684,6 @@ nouveau_svm_fault(struct nvif_notify *notify)
 			nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
 			continue;
 		}
-		SVMM_DBG(svmm, "addr %016llx", buffer->fault[fi]->addr);
 
 		/* We try and group handling of faults within a small
 		 * window into a single update.
@@ -736,6 +735,10 @@ nouveau_svm_fault(struct nvif_notify *notify)
 		}
 		mmput(mm);
 
+		SVMM_DBG(svmm, "addr %llx %s %c", buffer->fault[fi]->addr,
+			args.phys[0] & NVIF_VMM_PFNMAP_V0_VRAM ?
+			"vram" : "sysmem",
+			args.i.p.size > PAGE_SIZE ? 'H' : 'N');
 		limit = args.i.p.addr + args.i.p.size;
 		for (fn = fi; ++fn < buffer->fault_nr; ) {
 			/* It's okay to skip over duplicate addresses from the
@@ -807,13 +810,15 @@ nouveau_pfns_free(u64 *pfns)
 
 void
 nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm,
-		 unsigned long addr, u64 *pfns, unsigned long npages)
+		 unsigned long addr, u64 *pfns, unsigned long npages,
+		 unsigned int page_shift)
 {
 	struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns);
 	int ret;
 
 	args->p.addr = addr;
-	args->p.size = npages << PAGE_SHIFT;
+	args->p.page = page_shift;
+	args->p.size = npages << args->p.page;
 
 	mutex_lock(&svmm->mutex);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.h b/drivers/gpu/drm/nouveau/nouveau_svm.h
index f0fcd1b72e8b..ba5927e445ad 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.h
@@ -22,7 +22,8 @@ int nouveau_svmm_bind(struct drm_device *, void *, struct drm_file *);
 u64 *nouveau_pfns_alloc(unsigned long npages);
 void nouveau_pfns_free(u64 *pfns);
 void nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm,
-		      unsigned long addr, u64 *pfns, unsigned long npages);
+		      unsigned long addr, u64 *pfns, unsigned long npages,
+		      unsigned int page_shift);
 #else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */
 static inline void nouveau_svm_init(struct nouveau_drm *drm) {}
 static inline void nouveau_svm_fini(struct nouveau_drm *drm) {}
-- 
2.20.1


WARNING: multiple messages have this Message-ID (diff)
From: Ralph Campbell <rcampbell-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org,
	linux-kselftest-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: Ralph Campbell
	<rcampbell-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>,
	Jason Gunthorpe <jgg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>,
	Ben Skeggs <bskeggs-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,
	Andrew Morton
	<akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>,
	Shuah Khan <shuah-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>,
	Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Subject: [PATCH 16/16] nouveau: support THP migration to private memory
Date: Fri, 19 Jun 2020 14:56:49 -0700	[thread overview]
Message-ID: <20200619215649.32297-17-rcampbell@nvidia.com> (raw)
In-Reply-To: <20200619215649.32297-1-rcampbell-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>

Add support for migrating transparent huge pages to and from device
private memory.

Signed-off-by: Ralph Campbell <rcampbell-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
---
 drivers/gpu/drm/nouveau/nouveau_dmem.c | 171 +++++++++++++++++--------
 drivers/gpu/drm/nouveau/nouveau_svm.c  |  11 +-
 drivers/gpu/drm/nouveau/nouveau_svm.h  |   3 +-
 3 files changed, 127 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index f6a806ba3caa..e8c4c0bc78ae 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -46,6 +46,7 @@
  */
 #define DMEM_CHUNK_SIZE (2UL << 20)
 #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT)
+#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
 
 enum nouveau_aper {
 	NOUVEAU_APER_VIRT,
@@ -53,7 +54,7 @@ enum nouveau_aper {
 	NOUVEAU_APER_HOST,
 };
 
-typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages,
+typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u32 length,
 				      enum nouveau_aper, u64 dst_addr,
 				      enum nouveau_aper, u64 src_addr);
 typedef int (*nouveau_clear_page_t)(struct nouveau_drm *drm, u32 length,
@@ -79,6 +80,7 @@ struct nouveau_dmem {
 	struct list_head chunks;
 	struct mutex mutex;
 	struct page *free_pages;
+	struct page *free_huge_pages;
 	spinlock_t lock;
 };
 
@@ -109,8 +111,13 @@ static void nouveau_dmem_page_free(struct page *page)
 	struct nouveau_dmem *dmem = chunk->drm->dmem;
 
 	spin_lock(&dmem->lock);
-	page->zone_device_data = dmem->free_pages;
-	dmem->free_pages = page;
+	if (PageHuge(page)) {
+		page->zone_device_data = dmem->free_huge_pages;
+		dmem->free_huge_pages = page;
+	} else {
+		page->zone_device_data = dmem->free_pages;
+		dmem->free_pages = page;
+	}
 
 	WARN_ON(!chunk->callocated);
 	chunk->callocated--;
@@ -136,33 +143,41 @@ static void nouveau_dmem_fence_done(struct nouveau_fence **fence)
 
 static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm,
 		struct vm_fault *vmf, struct migrate_vma *args,
-		dma_addr_t *dma_addr)
+		dma_addr_t *dma_addr, size_t *sizep)
 {
 	struct device *dev = drm->dev->dev;
 	struct page *dpage, *spage;
+	unsigned int order;
 
 	spage = migrate_pfn_to_page(args->src[0]);
 	if (!spage || !(args->src[0] & MIGRATE_PFN_MIGRATE))
 		return 0;
 
-	dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address);
+	order = compound_order(spage);
+	if (order)
+		dpage = alloc_transhugepage(vmf->vma, vmf->address);
+	else
+		dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address);
 	if (!dpage)
 		return VM_FAULT_SIGBUS;
+	WARN_ON_ONCE(order != compound_order(dpage));
 	lock_page(dpage);
 
-	*dma_addr = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	*sizep = page_size(dpage);
+	*dma_addr = dma_map_page(dev, dpage, 0, *sizep, DMA_BIDIRECTIONAL);
 	if (dma_mapping_error(dev, *dma_addr))
 		goto error_free_page;
 
-	if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_HOST, *dma_addr,
-			NOUVEAU_APER_VRAM, nouveau_dmem_page_addr(spage)))
+	if (drm->dmem->migrate.copy_func(drm, page_size(spage),
+			NOUVEAU_APER_HOST, *dma_addr, NOUVEAU_APER_VRAM,
+			nouveau_dmem_page_addr(spage)))
 		goto error_dma_unmap;
 
 	args->dst[0] = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
 	return 0;
 
 error_dma_unmap:
-	dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	dma_unmap_page(dev, *dma_addr, page_size(dpage), DMA_BIDIRECTIONAL);
 error_free_page:
 	__free_page(dpage);
 	return VM_FAULT_SIGBUS;
@@ -173,8 +188,11 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
 	struct nouveau_drm *drm = page_to_drm(vmf->page);
 	struct nouveau_dmem *dmem = drm->dmem;
 	struct nouveau_fence *fence;
+	struct page *page;
+	unsigned int order;
 	unsigned long src = 0, dst = 0;
 	dma_addr_t dma_addr = 0;
+	size_t size = 0;
 	vm_fault_t ret;
 	struct migrate_vma args = {
 		.vma		= vmf->vma,
@@ -185,26 +203,52 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
 		.src_owner	= drm->dev,
 	};
 
+	/*
+	 * If the page was migrated to the GPU as a huge page, migrate it
+	 * back the same way.
+	 * FIXME If there is thrashing, maybe we should migrate one page.
+	 */
+	page = compound_head(vmf->page);
+	order = compound_order(page);
+	if (order) {
+		args.start &= PAGE_MASK << order;
+		args.end = args.start + (PAGE_SIZE << order);
+		args.src = kcalloc(1U << order, sizeof(*args.src), GFP_KERNEL);
+		if (!args.src)
+			return VM_FAULT_OOM;
+		args.dst = kcalloc(1U << order, sizeof(*args.dst), GFP_KERNEL);
+		if (!args.dst) {
+			ret = VM_FAULT_OOM;
+			goto error_src;
+		}
+	}
+
 	/*
 	 * FIXME what we really want is to find some heuristic to migrate more
 	 * than just one page on CPU fault. When such fault happens it is very
 	 * likely that more surrounding page will CPU fault too.
 	 */
-	if (migrate_vma_setup(&args) < 0)
-		return VM_FAULT_SIGBUS;
-	if (!args.cpages)
-		return 0;
+	if (migrate_vma_setup(&args) < 0) {
+		ret = VM_FAULT_SIGBUS;
+		goto error_dst;
+	}
 
-	ret = nouveau_dmem_fault_copy_one(drm, vmf, &args, &dma_addr);
-	if (ret || dst == 0)
+	ret = nouveau_dmem_fault_copy_one(drm, vmf, &args, &dma_addr, &size);
+	if (ret)
 		goto done;
 
 	nouveau_fence_new(dmem->migrate.chan, false, &fence);
 	migrate_vma_pages(&args);
 	nouveau_dmem_fence_done(&fence);
-	dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	dma_unmap_page(drm->dev->dev, dma_addr, size, DMA_BIDIRECTIONAL);
 done:
 	migrate_vma_finalize(&args);
+error_dst:
+	if (args.dst != &dst)
+		kfree(args.dst);
+error_src:
+	if (args.src != &src)
+		kfree(args.src);
 	return ret;
 }
 
@@ -213,8 +257,8 @@ static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = {
 	.migrate_to_ram		= nouveau_dmem_migrate_to_ram,
 };
 
-static int
-nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
+static int nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, bool is_huge,
+				    struct page **ppage)
 {
 	struct nouveau_dmem_chunk *chunk;
 	struct resource *res;
@@ -266,16 +310,20 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
 	pfn_first = chunk->pagemap.res.start >> PAGE_SHIFT;
 	page = pfn_to_page(pfn_first);
 	spin_lock(&drm->dmem->lock);
-	for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) {
-		page->zone_device_data = drm->dmem->free_pages;
-		drm->dmem->free_pages = page;
-	}
+	if (is_huge)
+		prep_compound_page(page, PMD_ORDER);
+	else
+		for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) {
+			page->zone_device_data = drm->dmem->free_pages;
+			drm->dmem->free_pages = page;
+		}
 	*ppage = page;
 	chunk->callocated++;
 	spin_unlock(&drm->dmem->lock);
 
-	NV_INFO(drm, "DMEM: registered %ldMB of device memory\n",
-		DMEM_CHUNK_SIZE >> 20);
+	NV_INFO(drm, "DMEM: registered %ldMB of %sdevice memory %lx %lx\n",
+		DMEM_CHUNK_SIZE >> 20, is_huge ? "huge " : "", pfn_first,
+		nouveau_dmem_page_addr(page));
 
 	return 0;
 
@@ -293,14 +341,20 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
 }
 
 static struct page *
-nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm)
+nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm, bool is_huge)
 {
 	struct nouveau_dmem_chunk *chunk;
 	struct page *page = NULL;
 	int ret;
 
 	spin_lock(&drm->dmem->lock);
-	if (drm->dmem->free_pages) {
+	if (is_huge && drm->dmem->free_huge_pages) {
+		page = drm->dmem->free_huge_pages;
+		drm->dmem->free_huge_pages = page->zone_device_data;
+		chunk = nouveau_page_to_chunk(page);
+		chunk->callocated++;
+		spin_unlock(&drm->dmem->lock);
+	} else if (!is_huge && drm->dmem->free_pages) {
 		page = drm->dmem->free_pages;
 		drm->dmem->free_pages = page->zone_device_data;
 		chunk = nouveau_page_to_chunk(page);
@@ -308,7 +362,7 @@ nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm)
 		spin_unlock(&drm->dmem->lock);
 	} else {
 		spin_unlock(&drm->dmem->lock);
-		ret = nouveau_dmem_chunk_alloc(drm, &page);
+		ret = nouveau_dmem_chunk_alloc(drm, is_huge, &page);
 		if (ret)
 			return NULL;
 	}
@@ -381,19 +435,18 @@ nouveau_dmem_fini(struct nouveau_drm *drm)
 }
 
 static int
-nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages,
+nvc0b5_migrate_copy(struct nouveau_drm *drm, u32 length,
 		    enum nouveau_aper dst_aper, u64 dst_addr,
 		    enum nouveau_aper src_aper, u64 src_addr)
 {
 	struct nouveau_channel *chan = drm->dmem->migrate.chan;
-	u32 launch_dma = (1 << 9) /* MULTI_LINE_ENABLE. */ |
-			 (1 << 8) /* DST_MEMORY_LAYOUT_PITCH. */ |
+	u32 launch_dma = (1 << 8) /* DST_MEMORY_LAYOUT_PITCH. */ |
 			 (1 << 7) /* SRC_MEMORY_LAYOUT_PITCH. */ |
 			 (1 << 2) /* FLUSH_ENABLE_TRUE. */ |
 			 (2 << 0) /* DATA_TRANSFER_TYPE_NON_PIPELINED. */;
 	int ret;
 
-	ret = RING_SPACE(chan, 13);
+	ret = RING_SPACE(chan, 11);
 	if (ret)
 		return ret;
 
@@ -425,17 +478,15 @@ nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages,
 		launch_dma |= 0x00002000; /* DST_TYPE_PHYSICAL. */
 	}
 
-	BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8);
-	OUT_RING  (chan, upper_32_bits(src_addr));
-	OUT_RING  (chan, lower_32_bits(src_addr));
-	OUT_RING  (chan, upper_32_bits(dst_addr));
-	OUT_RING  (chan, lower_32_bits(dst_addr));
-	OUT_RING  (chan, PAGE_SIZE);
-	OUT_RING  (chan, PAGE_SIZE);
-	OUT_RING  (chan, PAGE_SIZE);
-	OUT_RING  (chan, npages);
+	BEGIN_NVC0(chan, NvSubCopy, 0x0400, 4);
+	OUT_RING(chan, upper_32_bits(src_addr));
+	OUT_RING(chan, lower_32_bits(src_addr));
+	OUT_RING(chan, upper_32_bits(dst_addr));
+	OUT_RING(chan, lower_32_bits(dst_addr));
+	BEGIN_NVC0(chan, NvSubCopy, 0x0418, 1);
+	OUT_RING(chan, length);
 	BEGIN_NVC0(chan, NvSubCopy, 0x0300, 1);
-	OUT_RING  (chan, launch_dma);
+	OUT_RING(chan, launch_dma);
 	return 0;
 }
 
@@ -535,6 +586,7 @@ static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
 	struct device *dev = drm->dev->dev;
 	struct page *dpage, *spage;
 	unsigned long paddr;
+	unsigned long dst;
 
 	spage = migrate_pfn_to_page(src);
 	if (!(src & MIGRATE_PFN_MIGRATE))
@@ -546,7 +598,8 @@ static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
 		goto done;
 	}
 
-	dpage = nouveau_dmem_page_alloc_locked(drm);
+	dpage = nouveau_dmem_page_alloc_locked(drm,
+					       src & MIGRATE_PFN_COMPOUND);
 	if (!dpage)
 		goto out;
 
@@ -556,7 +609,7 @@ static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
 					 DMA_BIDIRECTIONAL);
 		if (dma_mapping_error(dev, *dma_addr))
 			goto out_free_page;
-		if (drm->dmem->migrate.copy_func(drm, 1,
+		if (drm->dmem->migrate.copy_func(drm, page_size(spage),
 			NOUVEAU_APER_VRAM, paddr, NOUVEAU_APER_HOST, *dma_addr))
 			goto out_dma_unmap;
 	} else {
@@ -571,10 +624,13 @@ static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
 		((paddr >> PAGE_SHIFT) << NVIF_VMM_PFNMAP_V0_ADDR_SHIFT);
 	if (src & MIGRATE_PFN_WRITE)
 		*pfn |= NVIF_VMM_PFNMAP_V0_W;
-	return migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+	dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+	if (PageHead(dpage))
+		dst |= MIGRATE_PFN_COMPOUND;
+	return dst;
 
 out_dma_unmap:
-	dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	dma_unmap_page(dev, *dma_addr, page_size(spage), DMA_BIDIRECTIONAL);
 out_free_page:
 	nouveau_dmem_page_free_locked(drm, dpage);
 out:
@@ -588,24 +644,30 @@ static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm,
 {
 	struct nouveau_fence *fence;
 	unsigned long addr = args->start, nr_dma = 0, i;
+	unsigned int page_shift = PAGE_SHIFT;
 
 	for (i = 0; addr < args->end; i++) {
 		args->dst[i] = nouveau_dmem_migrate_copy_one(drm, args->src[i],
 				dma_addrs + nr_dma, pfns + i);
 		if (!dma_mapping_error(drm->dev->dev, dma_addrs[nr_dma]))
 			nr_dma++;
+		if (args->dst[i] & MIGRATE_PFN_COMPOUND) {
+			page_shift = PMD_SHIFT;
+			i++;
+			break;
+		}
 		addr += PAGE_SIZE;
 	}
 
 	nouveau_fence_new(drm->dmem->migrate.chan, false, &fence);
 	migrate_vma_pages(args);
 	nouveau_dmem_fence_done(&fence);
-	nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i);
+	nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i,
+			 page_shift);
 
-	while (nr_dma--) {
-		dma_unmap_page(drm->dev->dev, dma_addrs[nr_dma], PAGE_SIZE,
-				DMA_BIDIRECTIONAL);
-	}
+	while (nr_dma)
+		dma_unmap_page(drm->dev->dev, dma_addrs[--nr_dma],
+				1UL << page_shift, DMA_BIDIRECTIONAL);
 	migrate_vma_finalize(args);
 }
 
@@ -617,7 +679,7 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
 			 unsigned long end)
 {
 	unsigned long npages = (end - start) >> PAGE_SHIFT;
-	unsigned long max = min(SG_MAX_SINGLE_ALLOC, npages);
+	unsigned long max = min(1UL << PMD_ORDER, npages);
 	dma_addr_t *dma_addrs;
 	struct migrate_vma args = {
 		.vma		= vma,
@@ -646,8 +708,10 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
 	if (!pfns)
 		goto out_free_dma;
 
-	for (i = 0; i < npages; i += max) {
-		args.end = start + (max << PAGE_SHIFT);
+	for (; args.start < end; args.start = args.end) {
+		args.end = ALIGN(args.start, PMD_SIZE);
+		if (args.start == args.end)
+			args.end = min(end, args.start + PMD_SIZE);
 		ret = migrate_vma_setup(&args);
 		if (ret)
 			goto out_free_pfns;
@@ -655,7 +719,6 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
 		if (args.cpages)
 			nouveau_dmem_migrate_chunk(drm, svmm, &args, dma_addrs,
 						   pfns);
-		args.start = args.end;
 	}
 
 	ret = 0;
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index a27625f3c5f9..f386a9318190 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -684,7 +684,6 @@ nouveau_svm_fault(struct nvif_notify *notify)
 			nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
 			continue;
 		}
-		SVMM_DBG(svmm, "addr %016llx", buffer->fault[fi]->addr);
 
 		/* We try and group handling of faults within a small
 		 * window into a single update.
@@ -736,6 +735,10 @@ nouveau_svm_fault(struct nvif_notify *notify)
 		}
 		mmput(mm);
 
+		SVMM_DBG(svmm, "addr %llx %s %c", buffer->fault[fi]->addr,
+			args.phys[0] & NVIF_VMM_PFNMAP_V0_VRAM ?
+			"vram" : "sysmem",
+			args.i.p.size > PAGE_SIZE ? 'H' : 'N');
 		limit = args.i.p.addr + args.i.p.size;
 		for (fn = fi; ++fn < buffer->fault_nr; ) {
 			/* It's okay to skip over duplicate addresses from the
@@ -807,13 +810,15 @@ nouveau_pfns_free(u64 *pfns)
 
 void
 nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm,
-		 unsigned long addr, u64 *pfns, unsigned long npages)
+		 unsigned long addr, u64 *pfns, unsigned long npages,
+		 unsigned int page_shift)
 {
 	struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns);
 	int ret;
 
 	args->p.addr = addr;
-	args->p.size = npages << PAGE_SHIFT;
+	args->p.page = page_shift;
+	args->p.size = npages << args->p.page;
 
 	mutex_lock(&svmm->mutex);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.h b/drivers/gpu/drm/nouveau/nouveau_svm.h
index f0fcd1b72e8b..ba5927e445ad 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.h
@@ -22,7 +22,8 @@ int nouveau_svmm_bind(struct drm_device *, void *, struct drm_file *);
 u64 *nouveau_pfns_alloc(unsigned long npages);
 void nouveau_pfns_free(u64 *pfns);
 void nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm,
-		      unsigned long addr, u64 *pfns, unsigned long npages);
+		      unsigned long addr, u64 *pfns, unsigned long npages,
+		      unsigned int page_shift);
 #else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */
 static inline void nouveau_svm_init(struct nouveau_drm *drm) {}
 static inline void nouveau_svm_fini(struct nouveau_drm *drm) {}
-- 
2.20.1

  parent reply	other threads:[~2020-06-19 21:59 UTC|newest]

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-19 21:56 [PATCH 00/16] mm/hmm/nouveau: THP mapping and migration Ralph Campbell
2020-06-19 21:56 ` Ralph Campbell
2020-06-19 21:56 ` [PATCH 01/16] mm: fix migrate_vma_setup() src_owner and normal pages Ralph Campbell
2020-06-19 21:56   ` Ralph Campbell
2020-06-19 21:56 ` [PATCH 02/16] nouveau: fix migrate page regression Ralph Campbell
2020-06-19 21:56   ` Ralph Campbell
2020-06-19 21:56 ` [PATCH 03/16] nouveau: fix mixed normal and device private page migration Ralph Campbell
2020-06-19 21:56   ` Ralph Campbell
2020-06-19 21:56 ` [PATCH 04/16] mm/hmm: fix test timeout on slower machines Ralph Campbell
2020-06-19 21:56   ` Ralph Campbell
2020-06-19 21:56 ` [PATCH 05/16] mm/hmm/test: remove redundant page table invalidate Ralph Campbell
2020-06-19 21:56   ` Ralph Campbell
2020-06-19 21:56 ` [PATCH 06/16] mm/hmm: test mixed normal and device private migrations Ralph Campbell
2020-06-19 21:56   ` Ralph Campbell
2020-06-19 21:56 ` [PATCH 07/16] nouveau: make nvkm_vmm_ctor() and nvkm_mmu_ptp_get() static Ralph Campbell
2020-06-19 21:56   ` Ralph Campbell
2020-06-19 21:56 ` [PATCH 08/16] nouveau/hmm: fault one page at a time Ralph Campbell
2020-06-19 21:56   ` Ralph Campbell
2020-06-22 17:22   ` Jason Gunthorpe
2020-06-22 17:22     ` Jason Gunthorpe
2020-06-22 18:44     ` Ralph Campbell
2020-06-22 18:44       ` Ralph Campbell
2020-06-19 21:56 ` [PATCH 09/16] mm/hmm: add output flag for compound page mapping Ralph Campbell
2020-06-19 21:56   ` Ralph Campbell
2020-06-22 17:25   ` Jason Gunthorpe
2020-06-22 17:25     ` Jason Gunthorpe
2020-06-22 18:10     ` Ralph Campbell
2020-06-22 18:10       ` Ralph Campbell
2020-06-22 23:18       ` Jason Gunthorpe
2020-06-22 23:18         ` Jason Gunthorpe
2020-06-22 23:26         ` Ralph Campbell
2020-06-22 23:26           ` Ralph Campbell
2020-06-19 21:56 ` [PATCH 10/16] nouveau/hmm: support mapping large sysmem pages Ralph Campbell
2020-06-19 21:56   ` Ralph Campbell
2020-06-19 21:56 ` [PATCH 11/16] hmm: add tests for HMM_PFN_COMPOUND flag Ralph Campbell
2020-06-19 21:56   ` Ralph Campbell
2020-06-19 21:56 ` [PATCH 12/16] mm/hmm: optimize migrate_vma_setup() for holes Ralph Campbell
2020-06-19 21:56   ` Ralph Campbell
2020-06-19 21:56 ` [PATCH 13/16] mm: support THP migration to device private memory Ralph Campbell
2020-06-19 21:56   ` Ralph Campbell
2020-06-21 23:20   ` Zi Yan
2020-06-21 23:20     ` Zi Yan
2020-06-22 19:36     ` Ralph Campbell
2020-06-22 19:36       ` Ralph Campbell
2020-06-22 20:10       ` Zi Yan
2020-06-22 20:10         ` Zi Yan
2020-06-22 21:31         ` Ralph Campbell
2020-06-22 21:31           ` Ralph Campbell
2020-06-22 21:53           ` Zi Yan
2020-06-22 21:53             ` Zi Yan
2020-06-22 22:30             ` Yang Shi
2020-06-22 22:30               ` Yang Shi
2020-06-22 22:33               ` Yang Shi
2020-06-22 22:33                 ` Yang Shi
2020-06-22 22:33                 ` Yang Shi
2020-06-22 23:01                 ` John Hubbard
2020-06-22 23:01                   ` John Hubbard
2020-06-22 23:54                   ` Yang Shi
2020-06-22 23:54                     ` Yang Shi
2020-06-22 23:54                     ` Yang Shi
2020-06-23  0:05                     ` Ralph Campbell
2020-06-23  0:05                       ` Ralph Campbell
2020-06-23  2:51                       ` Huang, Ying
2020-06-23  2:51                         ` Huang, Ying
2020-06-23  2:51                         ` Huang, Ying
2020-06-19 21:56 ` [PATCH 14/16] mm/thp: add THP allocation helper Ralph Campbell
2020-06-19 21:56   ` Ralph Campbell
2020-06-22  0:15   ` Zi Yan
2020-06-22  0:15     ` Zi Yan
2020-06-22 21:33     ` Ralph Campbell
2020-06-22 21:33       ` Ralph Campbell
2020-06-19 21:56 ` [PATCH 15/16] mm/hmm/test: add self tests for THP migration Ralph Campbell
2020-06-19 21:56   ` Ralph Campbell
2020-06-19 21:56 ` Ralph Campbell [this message]
2020-06-19 21:56   ` [PATCH 16/16] nouveau: support THP migration to private memory Ralph Campbell
2020-06-22 12:39 ` [PATCH 00/16] mm/hmm/nouveau: THP mapping and migration Jason Gunthorpe
2020-06-22 12:39   ` Jason Gunthorpe
2020-06-22 16:58   ` Ralph Campbell
2020-06-22 16:58     ` Ralph Campbell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200619215649.32297-17-rcampbell@nvidia.com \
    --to=rcampbell@nvidia.com \
    --cc=akpm@linux-foundation.org \
    --cc=bskeggs@redhat.com \
    --cc=hch@lst.de \
    --cc=jgg@mellanox.com \
    --cc=jglisse@redhat.com \
    --cc=jhubbard@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=nouveau@lists.freedesktop.org \
    --cc=shuah@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.