All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alex Sierra <alex.sierra@amd.com>
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH 06/10] drm/amdkfd: skip invalid pages during migrations
Date: Mon, 21 Jun 2021 11:04:19 -0500	[thread overview]
Message-ID: <20210621160423.3517-6-alex.sierra@amd.com> (raw)
In-Reply-To: <20210621160423.3517-1-alex.sierra@amd.com>

Invalid pages can be the result of pages that have been migrated
already due to copy-on-write procedure or pages that were never
migrated to VRAM in first place. This is not an issue anymore,
as pranges now support mixed memory domains (CPU/GPU).

Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 38 +++++++++++-------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index b298aa8dea4d..6fd68528c425 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -419,7 +419,6 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 	size_t size;
 	void *buf;
 	int r = -ENOMEM;
-	int retry = 0;
 
 	memset(&migrate, 0, sizeof(migrate));
 	migrate.vma = vma;
@@ -438,7 +437,6 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 	migrate.dst = migrate.src + npages;
 	scratch = (dma_addr_t *)(migrate.dst + npages);
 
-retry:
 	r = migrate_vma_setup(&migrate);
 	if (r) {
 		pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
@@ -446,17 +444,9 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 		goto out_free;
 	}
 	if (migrate.cpages != npages) {
-		pr_debug("collect 0x%lx/0x%llx pages, retry\n", migrate.cpages,
+		pr_debug("Partial migration. 0x%lx/0x%llx pages can be migrated\n",
+			 migrate.cpages,
 			 npages);
-		migrate_vma_finalize(&migrate);
-		if (retry++ >= 3) {
-			r = -ENOMEM;
-			pr_debug("failed %d migrate svms 0x%p [0x%lx 0x%lx]\n",
-				 r, prange->svms, prange->start, prange->last);
-			goto out_free;
-		}
-
-		goto retry;
 	}
 
 	if (migrate.cpages) {
@@ -547,9 +537,8 @@ static void svm_migrate_page_free(struct page *page)
 static int
 svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 			struct migrate_vma *migrate, struct dma_fence **mfence,
-			dma_addr_t *scratch)
+			dma_addr_t *scratch, uint64_t npages)
 {
-	uint64_t npages = migrate->cpages;
 	struct device *dev = adev->dev;
 	uint64_t *src;
 	dma_addr_t *dst;
@@ -566,15 +555,23 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 	src = (uint64_t *)(scratch + npages);
 	dst = scratch;
 
-	for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) {
+	for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) {
 		struct page *spage;
 
 		spage = migrate_pfn_to_page(migrate->src[i]);
-		if (!spage) {
-			pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n",
+		if (!spage || !is_zone_device_page(spage)) {
+			pr_debug("invalid page. Could be in CPU already svms 0x%p [0x%lx 0x%lx]\n",
 				 prange->svms, prange->start, prange->last);
-			r = -ENOMEM;
-			goto out_oom;
+			if (j) {
+				r = svm_migrate_copy_memory_gart(adev, dst + i - j,
+								 src + i - j, j,
+								 FROM_VRAM_TO_RAM,
+								 mfence);
+				if (r)
+					goto out_oom;
+				j = 0;
+			}
+			continue;
 		}
 		src[i] = svm_migrate_addr(adev, spage);
 		if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
@@ -607,6 +604,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 
 		migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
 		migrate->dst[i] |= MIGRATE_PFN_LOCKED;
+		j++;
 	}
 
 	r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j,
@@ -664,7 +662,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 
 	if (migrate.cpages) {
 		r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
-					    scratch);
+					    scratch, npages);
 		migrate_vma_pages(&migrate);
 		svm_migrate_copy_done(adev, mfence);
 		migrate_vma_finalize(&migrate);
-- 
2.32.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

  parent reply	other threads:[~2021-06-21 16:04 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-21 16:04 [PATCH 01/10] drm/amdkfd: device pgmap owner at the svm migrate init Alex Sierra
2021-06-21 16:04 ` [PATCH 02/10] drm/amdkfd: add owner ref param to get hmm pages Alex Sierra
2021-06-21 20:00   ` Felix Kuehling
2021-06-21 16:04 ` [PATCH 03/10] drm/amdkfd: set owner ref to svm range prefault Alex Sierra
2021-06-21 20:02   ` Felix Kuehling
2021-06-21 16:04 ` [PATCH 04/10] drm/amdgpu: get owner ref in validate and map Alex Sierra
2021-06-21 20:04   ` Felix Kuehling
2021-06-21 16:04 ` [PATCH 05/10] drm/amdkfd: classify and map mixed svm range pages in GPU Alex Sierra
2021-06-21 20:26   ` Felix Kuehling
2021-06-21 16:04 ` Alex Sierra [this message]
2021-06-21 16:04 ` [PATCH 07/10] drm/amdkfd: skip migration for pages already in VRAM Alex Sierra
2021-06-21 21:01   ` Felix Kuehling
2021-06-21 16:04 ` [PATCH 08/10] drm/amdkfd: add invalid pages debug at vram migration Alex Sierra
2021-06-21 21:02   ` Felix Kuehling
2021-06-21 16:04 ` [PATCH 09/10] drm/amdkfd: partially actual_loc removed Alex Sierra
2021-06-21 21:24   ` Felix Kuehling
2021-06-21 16:04 ` [PATCH 10/10] drm/amdkfd: protect svm_bo ref in case prange has forked Alex Sierra
2021-06-21 21:46   ` Felix Kuehling
2021-06-21 19:59 ` [PATCH 01/10] drm/amdkfd: device pgmap owner at the svm migrate init Felix Kuehling
  -- strict thread matches above, loose matches on Subject: below --
2021-05-27 20:55 Felix Kuehling
2021-05-27 20:56 ` [PATCH 06/10] drm/amdkfd: skip invalid pages during migrations Felix Kuehling
2021-05-27 20:56   ` Felix Kuehling

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210621160423.3517-6-alex.sierra@amd.com \
    --to=alex.sierra@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.