All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/amdkfd: device pgmap owner at the svm migrate init
@ 2021-05-05 21:49 Alex Sierra
  2021-05-05 21:50 ` [PATCH 2/3] drm/amdkfd: use dev_private_owner ref to get hmm pages Alex Sierra
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Alex Sierra @ 2021-05-05 21:49 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

pgmap owner member at the svm migrate init could be referenced
to either adev or hive, depending on device topology.

Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 6b810863f6ba..21d92de235be 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -426,7 +426,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 	migrate.start = start;
 	migrate.end = end;
 	migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
-	migrate.pgmap_owner = adev;
+	migrate.pgmap_owner = (adev->hive)?(void *)adev->hive:(void *)adev;
 
 	size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
 	size *= npages;
@@ -641,7 +641,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 	migrate.start = start;
 	migrate.end = end;
 	migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
-	migrate.pgmap_owner = adev;
+	migrate.pgmap_owner = (adev->hive)?(void *)adev->hive:(void *)adev;
 
 	size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
 	size *= npages;
@@ -907,7 +907,7 @@ int svm_migrate_init(struct amdgpu_device *adev)
 	pgmap->range.start = res->start;
 	pgmap->range.end = res->end;
 	pgmap->ops = &svm_migrate_pgmap_ops;
-	pgmap->owner = adev;
+	pgmap->owner = (adev->hive)?adev->hive:adev;
 	pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
 	r = devm_memremap_pages(adev->dev, pgmap);
 	if (IS_ERR(r)) {
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/3] drm/amdkfd: use dev_private_owner ref to get hmm pages
  2021-05-05 21:49 [PATCH 1/3] drm/amdkfd: device pgmap owner at the svm migrate init Alex Sierra
@ 2021-05-05 21:50 ` Alex Sierra
  2021-05-05 21:50 ` [PATCH 3/3] drm/amdkfd: classify and map mixed svm range pages in GPU Alex Sierra
  2021-05-05 22:36 ` [PATCH 1/3] drm/amdkfd: device pgmap owner at the svm migrate init Rodrigo Siqueira
  2 siblings, 0 replies; 6+ messages in thread
From: Alex Sierra @ 2021-05-05 21:50 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

This reference helps hmm to decide if device pages in the range
require to be migrated back to system memory, based if they are or
not in the same memory domain.
In this case, this reference could come from the same memory domain
with devices connected to the same hive.

Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c  |  5 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c    | 31 ++++++++++++++++++++++---
 4 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 2741c28ff1b5..b4ec33fdf4c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -160,7 +160,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
 			       struct mm_struct *mm, struct page **pages,
 			       uint64_t start, uint64_t npages,
 			       struct hmm_range **phmm_range, bool readonly,
-			       bool mmap_locked)
+			       bool mmap_locked, void *owner)
 {
 	struct hmm_range *hmm_range;
 	unsigned long timeout;
@@ -185,6 +185,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
 	hmm_range->hmm_pfns = pfns;
 	hmm_range->start = start;
 	hmm_range->end = start + npages * PAGE_SIZE;
+	hmm_range->dev_private_owner = owner;
 
 	/* Assuming 512MB takes maxmium 1 second to fault page address */
 	timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT;
@@ -195,11 +196,11 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
 
 	if (likely(!mmap_locked))
 		mmap_read_lock(mm);
-
 	r = hmm_range_fault(hmm_range);
 
 	if (likely(!mmap_locked))
 		mmap_read_unlock(mm);
+
 	if (unlikely(r)) {
 		/*
 		 * FIXME: This timeout should encompass the retry from
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index 7f7d37a457c3..14a3c1864085 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -34,7 +34,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
 			       struct mm_struct *mm, struct page **pages,
 			       uint64_t start, uint64_t npages,
 			       struct hmm_range **phmm_range, bool readonly,
-			       bool mmap_locked);
+			       bool mmap_locked, void *owner);
 int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
 
 #if defined(CONFIG_HMM_MIRROR)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 7e7d8330d64b..c13f7fbfc070 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -709,7 +709,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
 	readonly = amdgpu_ttm_tt_is_readonly(ttm);
 	r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start,
 				       ttm->num_pages, &gtt->range, readonly,
-				       false);
+				       false, NULL);
 out_putmm:
 	mmput(mm);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index d9111fea724b..7683cb44cb9c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1304,6 +1304,16 @@ static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
 	ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
 }
 
+static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
+{
+	struct kfd_process_device *pdd;
+	struct amdgpu_device *adev;
+
+	pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+	adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+	return (adev->hive)?(void *)adev->hive:(void *)adev;
+}
 /*
  * Validation+GPU mapping with concurrent invalidation (MMU notifiers)
  *
@@ -1334,7 +1344,11 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 {
 	struct svm_validate_context ctx;
 	struct hmm_range *hmm_range;
+	void *owner;
+	struct kfd_process *p;
 	int r = 0;
+	int i = 0;
+	int32_t idx;
 
 	ctx.process = container_of(prange->svms, struct kfd_process, svms);
 	ctx.prange = prange;
@@ -1380,15 +1394,26 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 	svm_range_reserve_bos(&ctx);
 
 	if (!prange->actual_loc) {
+
+		p = container_of(prange->svms, struct kfd_process, svms);
+		owner = kfd_svm_page_owner(p, find_first_bit(ctx.bitmap,
+							     MAX_GPU_INSTANCE));
+		for_each_set_bit(idx, ctx.bitmap, MAX_GPU_INSTANCE) {
+
+			if (kfd_svm_page_owner(p, idx) != owner) {
+				owner = NULL;
+				break;
+			}
+		}
+
 		r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
 					       prange->start << PAGE_SHIFT,
 					       prange->npages, &hmm_range,
-					       false, true);
+					       false, true, owner);
 		if (r) {
 			pr_debug("failed %d to get svm range pages\n", r);
 			goto unreserve_out;
 		}
-
 		r = svm_range_dma_map(prange, ctx.bitmap,
 				      hmm_range->hmm_pfns);
 		if (r) {
@@ -2650,7 +2675,7 @@ void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm)
 	r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
 				       prange->start << PAGE_SHIFT,
 				       prange->npages, &hmm_range,
-				       false, true);
+				       false, true, NULL);
 	if (!r) {
 		amdgpu_hmm_range_get_pages_done(hmm_range);
 		prange->validated_once = true;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/3] drm/amdkfd: classify and map mixed svm range pages in GPU
  2021-05-05 21:49 [PATCH 1/3] drm/amdkfd: device pgmap owner at the svm migrate init Alex Sierra
  2021-05-05 21:50 ` [PATCH 2/3] drm/amdkfd: use dev_private_owner ref to get hmm pages Alex Sierra
@ 2021-05-05 21:50 ` Alex Sierra
  2021-05-05 22:36 ` [PATCH 1/3] drm/amdkfd: device pgmap owner at the svm migrate init Rodrigo Siqueira
  2 siblings, 0 replies; 6+ messages in thread
From: Alex Sierra @ 2021-05-05 21:50 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

[Why]
svm ranges can have mixed pages from device or system memory.
A good example is, after a prange has been allocated in VRAM and a
copy-on-write is triggered by a fork. This invalidates some pages
inside the prange. Endding up in mixed pages.

[How]
By classifying each page inside a prange, based on its type. Device or
system memory, during dma mapping call. If page corresponds
to VRAM domain, a flag is set to its dma_addr entry for each GPU.
Then, at the GPU page table mapping. All group of contiguous pages within
the same type are mapped with their proper pte flags.

Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 58 +++++++++++++++++++---------
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  2 +
 2 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 7683cb44cb9c..045a961f7978 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -141,6 +141,12 @@ svm_range_dma_map_dev(struct device *dev, dma_addr_t **dma_addr,
 			dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
 
 		page = hmm_pfn_to_page(hmm_pfns[i]);
+		if (is_zone_device_page(page)) {
+			addr[i] = hmm_pfns[i] << PAGE_SHIFT;
+			addr[i] |= SVM_RANGE_VRAM_DOMAIN;
+			pr_debug("vram address detected: 0x%llx\n", addr[i] >> PAGE_SHIFT);
+			continue;
+		}
 		addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
 		r = dma_mapping_error(dev, addr[i]);
 		if (r) {
@@ -1131,33 +1137,49 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		     struct amdgpu_device *bo_adev, struct dma_fence **fence)
 {
 	struct amdgpu_bo_va bo_va;
+	struct ttm_resource *ttm_res;
 	uint64_t pte_flags;
+	unsigned long last_start;
+	int last_domain;
 	int r = 0;
+	int64_t i, j;
 
 	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
 		 prange->last);
 
-	if (prange->svm_bo && prange->ttm_res) {
+	if (prange->svm_bo && prange->ttm_res)
 		bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
-		prange->mapping.bo_va = &bo_va;
-	}
-
-	prange->mapping.start = prange->start;
-	prange->mapping.last = prange->last;
-	prange->mapping.offset = prange->offset;
-	pte_flags = svm_range_get_pte_flags(adev, prange);
 
-	r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
-					prange->mapping.start,
-					prange->mapping.last, pte_flags,
-					prange->mapping.offset,
-					prange->ttm_res ?
-						prange->ttm_res->mm_node : NULL,
-					dma_addr, &vm->last_update);
-	if (r) {
-		pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
-		goto out;
+	ttm_res = prange->ttm_res;
+	last_start = prange->start;
+	for (i = j = 0; i < prange->npages; i++) {
+		last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
+		if ((last_start + j) < prange->last &&
+		    last_domain == (dma_addr[i + 1] & SVM_RANGE_VRAM_DOMAIN)) {
+			j++;
+			continue;
+		}
+		pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
+			 last_start, last_start + j, last_domain ? "GPU" : "CPU");
+		prange->ttm_res = last_domain ? ttm_res : NULL;
+		pte_flags = svm_range_get_pte_flags(adev, prange);
+		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
+						last_start,
+						last_start + j, pte_flags,
+						((last_start - prange->start) << PAGE_SHIFT),
+						prange->ttm_res ?
+							prange->ttm_res->mm_node : NULL,
+						prange->ttm_res ? NULL :
+						dma_addr,
+						&vm->last_update);
+		if (r) {
+			pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
+			goto out;
+		}
+		last_start += j + 1;
+		j = 0;
 	}
+	prange->ttm_res = ttm_res;
 
 	r = amdgpu_vm_update_pdes(adev, vm, false);
 	if (r) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 573f984b81fe..bb85288cd658 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -35,6 +35,8 @@
 #include "amdgpu.h"
 #include "kfd_priv.h"
 
+#define SVM_RANGE_VRAM_DOMAIN (1UL << 0)
+
 struct svm_range_bo {
 	struct amdgpu_bo		*bo;
 	struct kref			kref;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/3] drm/amdkfd: device pgmap owner at the svm migrate init
  2021-05-05 21:49 [PATCH 1/3] drm/amdkfd: device pgmap owner at the svm migrate init Alex Sierra
  2021-05-05 21:50 ` [PATCH 2/3] drm/amdkfd: use dev_private_owner ref to get hmm pages Alex Sierra
  2021-05-05 21:50 ` [PATCH 3/3] drm/amdkfd: classify and map mixed svm range pages in GPU Alex Sierra
@ 2021-05-05 22:36 ` Rodrigo Siqueira
  2 siblings, 0 replies; 6+ messages in thread
From: Rodrigo Siqueira @ 2021-05-05 22:36 UTC (permalink / raw)
  To: Alex Sierra; +Cc: amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 2482 bytes --]

On 05/05, Alex Sierra wrote:
> pgmap owner member at the svm migrate init could be referenced
> to either adev or hive, depending on device topology.
> 
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index 6b810863f6ba..21d92de235be 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -426,7 +426,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
>  	migrate.start = start;
>  	migrate.end = end;
>  	migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
> -	migrate.pgmap_owner = adev;
> +	migrate.pgmap_owner = (adev->hive)?(void *)adev->hive:(void *)adev;

I think you missed the spaces around "?" and ":" in this patch.

Thanks

>  
>  	size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
>  	size *= npages;
> @@ -641,7 +641,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
>  	migrate.start = start;
>  	migrate.end = end;
>  	migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
> -	migrate.pgmap_owner = adev;
> +	migrate.pgmap_owner = (adev->hive)?(void *)adev->hive:(void *)adev;
>  
>  	size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
>  	size *= npages;
> @@ -907,7 +907,7 @@ int svm_migrate_init(struct amdgpu_device *adev)
>  	pgmap->range.start = res->start;
>  	pgmap->range.end = res->end;
>  	pgmap->ops = &svm_migrate_pgmap_ops;
> -	pgmap->owner = adev;
> +	pgmap->owner = (adev->hive)?adev->hive:adev;
>  	pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
>  	r = devm_memremap_pages(adev->dev, pgmap);
>  	if (IS_ERR(r)) {
> -- 
> 2.17.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7CRodrigo.Siqueira%40amd.com%7Ccd057517621142c86d6d08d9100fd033%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637558482375385783%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=j2z4VJ8ZGKim3R5BxIOLNRwjj%2B6yanIKNd5JQp738P8%3D&amp;reserved=0

-- 
Rodrigo Siqueira
https://siqueira.tech

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 3/3] drm/amdkfd: classify and map mixed svm range pages in GPU
  2021-05-05 22:59 ` [PATCH 3/3] drm/amdkfd: classify and map mixed svm range pages in GPU Alex Sierra
@ 2021-05-06  0:48   ` Felix Kuehling
  0 siblings, 0 replies; 6+ messages in thread
From: Felix Kuehling @ 2021-05-06  0:48 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx


Am 2021-05-05 um 6:59 p.m. schrieb Alex Sierra:
> [Why]
> svm ranges can have mixed pages from device or system memory.
> A good example is, after a prange has been allocated in VRAM and a
> copy-on-write is triggered by a fork. This invalidates some pages
> inside the prange. Endding up in mixed pages.
>
> [How]
> By classifying each page inside a prange, based on its type. Device or
> system memory, during dma mapping call. If page corresponds
> to VRAM domain, a flag is set to its dma_addr entry for each GPU.
> Then, at the GPU page table mapping. All group of contiguous pages within
> the same type are mapped with their proper pte flags.
>
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 58 +++++++++++++++++++---------
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  2 +
>  2 files changed, 42 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 7104762a0119..94fe9ab70e94 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -141,6 +141,12 @@ svm_range_dma_map_dev(struct device *dev, dma_addr_t **dma_addr,
>  			dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
>  
>  		page = hmm_pfn_to_page(hmm_pfns[i]);
> +		if (is_zone_device_page(page)) {
> +			addr[i] = hmm_pfns[i] << PAGE_SHIFT;

This is not the real physical address. It's a fake address from
devm_request_free_mem_region. You need to translate that to an actual
physical address by subtracting pgmap->range.start and adding
adev->vm_manager.vram_base_offset.

Right now it looks like you don't use this address for VRAM at all. You
calculate the address from the ttm_res in the svm_range. This works OK
as long as we do the VRAM allocation per SVM range. Eventually it may be
useful to calculate the correct address here from the PFN. Then we can
be more flexible in the VRAM management and may not need a reference to
the BO in the svm_range at all any more.


> +			addr[i] |= SVM_RANGE_VRAM_DOMAIN;
> +			pr_debug("vram address detected: 0x%llx\n", addr[i] >> PAGE_SHIFT);
> +			continue;
> +		}
>  		addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
>  		r = dma_mapping_error(dev, addr[i]);
>  		if (r) {
> @@ -1131,33 +1137,49 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  		     struct amdgpu_device *bo_adev, struct dma_fence **fence)
>  {
>  	struct amdgpu_bo_va bo_va;
> +	struct ttm_resource *ttm_res;
>  	uint64_t pte_flags;
> +	unsigned long last_start;
> +	int last_domain;
>  	int r = 0;
> +	int64_t i, j;
>  
>  	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
>  		 prange->last);
>  
> -	if (prange->svm_bo && prange->ttm_res) {
> +	if (prange->svm_bo && prange->ttm_res)
>  		bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
> -		prange->mapping.bo_va = &bo_va;
> -	}
> -
> -	prange->mapping.start = prange->start;
> -	prange->mapping.last = prange->last;
> -	prange->mapping.offset = prange->offset;
> -	pte_flags = svm_range_get_pte_flags(adev, prange);
>  
> -	r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
> -					prange->mapping.start,
> -					prange->mapping.last, pte_flags,
> -					prange->mapping.offset,
> -					prange->ttm_res ?
> -						prange->ttm_res->mm_node : NULL,
> -					dma_addr, &vm->last_update);
> -	if (r) {
> -		pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
> -		goto out;
> +	ttm_res = prange->ttm_res;

This is not needed here. See below.


> +	last_start = prange->start;
> +	for (i = j = 0; i < prange->npages; i++) {
> +		last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
> +		if ((last_start + j) < prange->last &&

I think this should be true: prange->start + i == last_start + j. So you
don't need j.


> +		    last_domain == (dma_addr[i + 1] & SVM_RANGE_VRAM_DOMAIN)) {
> +			j++;
> +			continue;
> +		}
> +		pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
> +			 last_start, last_start + j, last_domain ? "GPU" : "CPU");
> +		prange->ttm_res = last_domain ? ttm_res : NULL;

You're using prange->ttm_res as a temporary variable. Use the local
variable ttm_res as your temporary. Then this becomes:

    ttm_res = last_domain ? prange->ttm_res : NULL;

Then use ttm_res instead of prange->ttm_res below.


> +		pte_flags = svm_range_get_pte_flags(adev, prange);
> +		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
> +						last_start,
> +						last_start + j, pte_flags,

This could also use prange->start + i.


> +						((last_start - prange->start) << PAGE_SHIFT),

I think you still need to add prange->offset here. Otherwise you break
the case where one BO is shared by several ranges.


> +						prange->ttm_res ?
> +							prange->ttm_res->mm_node : NULL,

This looks like you need to rebase on Christian's latest changes.
amdgpu_vm_bo_update_mapping now takes a ttm_res parameter.


> +						prange->ttm_res ? NULL :
> +						dma_addr,

This line-break is really jarring. Put dma_addr on the same line as the
rest of the ternary expression.


> +						&vm->last_update);
> +		if (r) {
> +			pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
> +			goto out;
> +		}
> +		last_start += j + 1;

last_start = prange->start + i + 1;


> +		j = 0;
>  	}
> +	prange->ttm_res = ttm_res;

Remove this. See my comment about making ttm_res the temporary variable.
We should not misuse our data structures for temporary variables.

Regards,
  Felix


>  
>  	r = amdgpu_vm_update_pdes(adev, vm, false);
>  	if (r) {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> index 573f984b81fe..bb85288cd658 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> @@ -35,6 +35,8 @@
>  #include "amdgpu.h"
>  #include "kfd_priv.h"
>  
> +#define SVM_RANGE_VRAM_DOMAIN (1UL << 0)
> +
>  struct svm_range_bo {
>  	struct amdgpu_bo		*bo;
>  	struct kref			kref;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 3/3] drm/amdkfd: classify and map mixed svm range pages in GPU
  2021-05-05 22:59 Alex Sierra
@ 2021-05-05 22:59 ` Alex Sierra
  2021-05-06  0:48   ` Felix Kuehling
  0 siblings, 1 reply; 6+ messages in thread
From: Alex Sierra @ 2021-05-05 22:59 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

[Why]
svm ranges can have mixed pages from device or system memory.
A good example is, after a prange has been allocated in VRAM and a
copy-on-write is triggered by a fork. This invalidates some pages
inside the prange. Endding up in mixed pages.

[How]
By classifying each page inside a prange, based on its type. Device or
system memory, during dma mapping call. If page corresponds
to VRAM domain, a flag is set to its dma_addr entry for each GPU.
Then, at the GPU page table mapping. All group of contiguous pages within
the same type are mapped with their proper pte flags.

Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 58 +++++++++++++++++++---------
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  2 +
 2 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 7104762a0119..94fe9ab70e94 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -141,6 +141,12 @@ svm_range_dma_map_dev(struct device *dev, dma_addr_t **dma_addr,
 			dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
 
 		page = hmm_pfn_to_page(hmm_pfns[i]);
+		if (is_zone_device_page(page)) {
+			addr[i] = hmm_pfns[i] << PAGE_SHIFT;
+			addr[i] |= SVM_RANGE_VRAM_DOMAIN;
+			pr_debug("vram address detected: 0x%llx\n", addr[i] >> PAGE_SHIFT);
+			continue;
+		}
 		addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
 		r = dma_mapping_error(dev, addr[i]);
 		if (r) {
@@ -1131,33 +1137,49 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		     struct amdgpu_device *bo_adev, struct dma_fence **fence)
 {
 	struct amdgpu_bo_va bo_va;
+	struct ttm_resource *ttm_res;
 	uint64_t pte_flags;
+	unsigned long last_start;
+	int last_domain;
 	int r = 0;
+	int64_t i, j;
 
 	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
 		 prange->last);
 
-	if (prange->svm_bo && prange->ttm_res) {
+	if (prange->svm_bo && prange->ttm_res)
 		bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
-		prange->mapping.bo_va = &bo_va;
-	}
-
-	prange->mapping.start = prange->start;
-	prange->mapping.last = prange->last;
-	prange->mapping.offset = prange->offset;
-	pte_flags = svm_range_get_pte_flags(adev, prange);
 
-	r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
-					prange->mapping.start,
-					prange->mapping.last, pte_flags,
-					prange->mapping.offset,
-					prange->ttm_res ?
-						prange->ttm_res->mm_node : NULL,
-					dma_addr, &vm->last_update);
-	if (r) {
-		pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
-		goto out;
+	ttm_res = prange->ttm_res;
+	last_start = prange->start;
+	for (i = j = 0; i < prange->npages; i++) {
+		last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
+		if ((last_start + j) < prange->last &&
+		    last_domain == (dma_addr[i + 1] & SVM_RANGE_VRAM_DOMAIN)) {
+			j++;
+			continue;
+		}
+		pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
+			 last_start, last_start + j, last_domain ? "GPU" : "CPU");
+		prange->ttm_res = last_domain ? ttm_res : NULL;
+		pte_flags = svm_range_get_pte_flags(adev, prange);
+		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
+						last_start,
+						last_start + j, pte_flags,
+						((last_start - prange->start) << PAGE_SHIFT),
+						prange->ttm_res ?
+							prange->ttm_res->mm_node : NULL,
+						prange->ttm_res ? NULL :
+						dma_addr,
+						&vm->last_update);
+		if (r) {
+			pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
+			goto out;
+		}
+		last_start += j + 1;
+		j = 0;
 	}
+	prange->ttm_res = ttm_res;
 
 	r = amdgpu_vm_update_pdes(adev, vm, false);
 	if (r) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 573f984b81fe..bb85288cd658 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -35,6 +35,8 @@
 #include "amdgpu.h"
 #include "kfd_priv.h"
 
+#define SVM_RANGE_VRAM_DOMAIN (1UL << 0)
+
 struct svm_range_bo {
 	struct amdgpu_bo		*bo;
 	struct kref			kref;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-05-06  0:48 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-05 21:49 [PATCH 1/3] drm/amdkfd: device pgmap owner at the svm migrate init Alex Sierra
2021-05-05 21:50 ` [PATCH 2/3] drm/amdkfd: use dev_private_owner ref to get hmm pages Alex Sierra
2021-05-05 21:50 ` [PATCH 3/3] drm/amdkfd: classify and map mixed svm range pages in GPU Alex Sierra
2021-05-05 22:36 ` [PATCH 1/3] drm/amdkfd: device pgmap owner at the svm migrate init Rodrigo Siqueira
2021-05-05 22:59 Alex Sierra
2021-05-05 22:59 ` [PATCH 3/3] drm/amdkfd: classify and map mixed svm range pages in GPU Alex Sierra
2021-05-06  0:48   ` Felix Kuehling

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.