All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] drm/amdgpu: partial revert VM sync changes
@ 2020-04-07 14:27 Christian König
  2020-04-07 14:27 ` [PATCH 2/4] drm/amdgpu: cleanup IB pool handling a bit Christian König
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Christian König @ 2020-04-07 14:27 UTC (permalink / raw)
  To: amd-gfx, Felix.Kuehling, Alex.Sierra

We still need to add the VM update fences to the root PD.

So make sure to never sync to those implicitely.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index b86392253696..b87ca171986a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -249,6 +249,11 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
 			continue;
 
+		/* Never sync to VM updates either. */
+		if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
+		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+			continue;
+
 		/* Ignore fences depending on the sync mode */
 		switch (mode) {
 		case AMDGPU_SYNC_ALWAYS:
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/4] drm/amdgpu: cleanup IB pool handling a bit
  2020-04-07 14:27 [PATCH 1/4] drm/amdgpu: partial revert VM sync changes Christian König
@ 2020-04-07 14:27 ` Christian König
  2020-04-07 14:27 ` [PATCH 3/4] drm/amdgpu: rename direct to immediate for VM updates Christian König
  2020-04-07 14:27 ` [PATCH 4/4] drm/amdgpu: add new unlocked flag for PTE updates Christian König
  2 siblings, 0 replies; 7+ messages in thread
From: Christian König @ 2020-04-07 14:27 UTC (permalink / raw)
  To: amd-gfx, Felix.Kuehling, Alex.Sierra

Fix the coding style, move and rename the definitions to
better match what they are supposed to be doing.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h         | 11 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c      | 65 +++++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h    | 13 +++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_test.c    |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     | 10 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c     |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c     |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 11 ++--
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c      |  3 +-
 10 files changed, 71 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 32bc2a882fd2..912671f5a665 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -192,8 +192,6 @@ extern int amdgpu_cik_support;
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
 #define AMDGPU_MAX_USEC_TIMEOUT			100000	/* 100 ms */
 #define AMDGPU_FENCE_JIFFIES_TIMEOUT		(HZ / 2)
-/* AMDGPU_IB_POOL_SIZE must be a power of 2 */
-#define AMDGPU_IB_POOL_SIZE			16
 #define AMDGPU_DEBUGFS_MAX_COMPONENTS		32
 #define AMDGPUFB_CONN_LIMIT			4
 #define AMDGPU_BIOS_NUM_SCRATCH			16
@@ -390,13 +388,6 @@ struct amdgpu_sa_bo {
 int amdgpu_fence_slab_init(void);
 void amdgpu_fence_slab_fini(void);
 
-enum amdgpu_ib_pool_type {
-	AMDGPU_IB_POOL_NORMAL = 0,
-	AMDGPU_IB_POOL_VM,
-	AMDGPU_IB_POOL_DIRECT,
-
-	AMDGPU_IB_POOL_MAX
-};
 /*
  * IRQS.
  */
@@ -857,7 +848,7 @@ struct amdgpu_device {
 	unsigned			num_rings;
 	struct amdgpu_ring		*rings[AMDGPU_MAX_RINGS];
 	bool				ib_pool_ready;
-	struct amdgpu_sa_manager	ring_tmp_bo[AMDGPU_IB_POOL_MAX];
+	struct amdgpu_sa_manager	ib_pools[AMDGPU_IB_POOL_MAX];
 	struct amdgpu_sched		gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
 
 	/* interrupts */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 79dd4ed8146b..1f16b0ff3953 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -921,7 +921,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 
 		ring = to_amdgpu_ring(entity->rq->sched);
 		r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
-				   chunk_ib->ib_bytes : 0, AMDGPU_IB_POOL_NORMAL, ib);
+				   chunk_ib->ib_bytes : 0,
+				   AMDGPU_IB_POOL_DELAYED, ib);
 		if (r) {
 			DRM_ERROR("Failed to get ib !\n");
 			return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 77eaaba927ee..ec2c5e164cd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -61,14 +61,13 @@
  * Returns 0 on success, error on failure.
  */
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-		unsigned size,
-		enum amdgpu_ib_pool_type pool_type,
-		struct amdgpu_ib *ib)
+		  unsigned size, enum amdgpu_ib_pool_type pool_type,
+		  struct amdgpu_ib *ib)
 {
 	int r;
 
 	if (size) {
-		r = amdgpu_sa_bo_new(&adev->ring_tmp_bo[pool_type],
+		r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type],
 				      &ib->sa_bo, size, 256);
 		if (r) {
 			dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
@@ -302,30 +301,32 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
  */
 int amdgpu_ib_pool_init(struct amdgpu_device *adev)
 {
-	int r, i;
 	unsigned size;
+	int r, i;
 
-	if (adev->ib_pool_ready) {
+	if (adev->ib_pool_ready)
 		return 0;
-	}
+
 	for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
 		if (i == AMDGPU_IB_POOL_DIRECT)
 			size = PAGE_SIZE * 2;
 		else
-			size = AMDGPU_IB_POOL_SIZE*64*1024;
-		r = amdgpu_sa_bo_manager_init(adev, &adev->ring_tmp_bo[i],
-				size,
-				AMDGPU_GPU_PAGE_SIZE,
-				AMDGPU_GEM_DOMAIN_GTT);
-		if (r) {
-			for (i--; i >= 0; i--)
-				amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo[i]);
-			return r;
-		}
+			size = AMDGPU_IB_POOL_SIZE;
+
+		r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i],
+					      size, AMDGPU_GPU_PAGE_SIZE,
+					      AMDGPU_GEM_DOMAIN_GTT);
+		if (r)
+			goto error;
 	}
 	adev->ib_pool_ready = true;
 
 	return 0;
+
+error:
+	while (i--)
+		amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]);
+	return r;
 }
 
 /**
@@ -340,11 +341,12 @@ void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
 {
 	int i;
 
-	if (adev->ib_pool_ready) {
-		for (i = 0; i < AMDGPU_IB_POOL_MAX; i++)
-			amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo[i]);
-		adev->ib_pool_ready = false;
-	}
+	if (!adev->ib_pool_ready)
+		return;
+
+	for (i = 0; i < AMDGPU_IB_POOL_MAX; i++)
+		amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]);
+	adev->ib_pool_ready = false;
 }
 
 /**
@@ -359,9 +361,9 @@ void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
  */
 int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 {
-	unsigned i;
-	int r, ret = 0;
 	long tmo_gfx, tmo_mm;
+	int r, ret = 0;
+	unsigned i;
 
 	tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT;
 	if (amdgpu_sriov_vf(adev)) {
@@ -439,15 +441,16 @@ static int amdgpu_debugfs_sa_info(struct seq_file *m, void *data)
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 
-	seq_printf(m, "-------------------- NORMAL -------------------- \n");
-	amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo[AMDGPU_IB_POOL_NORMAL], m);
-	seq_printf(m, "---------------------- VM ---------------------- \n");
-	amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo[AMDGPU_IB_POOL_VM], m);
-	seq_printf(m, "-------------------- DIRECT--------------------- \n");
-	amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo[AMDGPU_IB_POOL_DIRECT], m);
+	seq_printf(m, "--------------------- DELAYED --------------------- \n");
+	amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED],
+				     m);
+	seq_printf(m, "-------------------- IMMEDIATE -------------------- \n");
+	amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_IMMEDIATE],
+				     m);
+	seq_printf(m, "--------------------- DIRECT ---------------------- \n");
+	amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DIRECT], m);
 
 	return 0;
-
 }
 
 static const struct drm_info_list amdgpu_debugfs_sa_list[] = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index bfd634aedf26..efd7627b3f69 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -50,6 +50,8 @@
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
+#define AMDGPU_IB_POOL_SIZE	(1024 * 1024)
+
 enum amdgpu_ring_type {
 	AMDGPU_RING_TYPE_GFX		= AMDGPU_HW_IP_GFX,
 	AMDGPU_RING_TYPE_COMPUTE	= AMDGPU_HW_IP_COMPUTE,
@@ -63,6 +65,17 @@ enum amdgpu_ring_type {
 	AMDGPU_RING_TYPE_KIQ
 };
 
+enum amdgpu_ib_pool_type {
+	/* Normal submissions to the top of the pipeline. */
+	AMDGPU_IB_POOL_DELAYED,
+	/* Immediate submissions to the bottom of the pipeline. */
+	AMDGPU_IB_POOL_IMMEDIATE,
+	/* Direct submission to the ring buffer during init and reset. */
+	AMDGPU_IB_POOL_DIRECT,
+
+	AMDGPU_IB_POOL_MAX
+};
+
 struct amdgpu_device;
 struct amdgpu_ring;
 struct amdgpu_ib;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index 476f1f89aaad..2f4d5ca9894f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -44,7 +44,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 	/* Number of tests =
 	 * (Total GTT - IB pool - writeback page - ring buffers) / test size
 	 */
-	n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024;
+	n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE;
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		if (adev->rings[i])
 			n -= adev->rings[i]->ring_size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 1b658e905620..153af5bdbec1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -337,7 +337,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
 	num_bytes = num_pages * 8;
 
 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
-				     AMDGPU_IB_POOL_NORMAL, &job);
+				     AMDGPU_IB_POOL_DELAYED, &job);
 	if (r)
 		return r;
 
@@ -2127,6 +2127,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 		       struct dma_fence **fence, bool direct_submit,
 		       bool vm_needs_flush, bool tmz)
 {
+	enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT :
+		AMDGPU_IB_POOL_DELAYED;
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_job *job;
 
@@ -2144,8 +2146,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
 	num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
 
-	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4,
-			direct_submit ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_NORMAL, &job);
+	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, &job);
 	if (r)
 		return r;
 
@@ -2234,7 +2235,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 	/* for IB padding */
 	num_dw += 64;
 
-	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_NORMAL, &job);
+	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED,
+				     &job);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 550282d9c1fc..5100ebe8858d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1056,8 +1056,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 			goto err;
 	}
 
-	r = amdgpu_job_alloc_with_ib(adev, 64,
-			direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_NORMAL, &job);
+	r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
+				     AMDGPU_IB_POOL_DELAYED, &job);
 	if (r)
 		goto err;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index d090455282e5..ecaa2d7483b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -447,7 +447,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 	int i, r;
 
 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
-					AMDGPU_IB_POOL_DIRECT, &job);
+				     AMDGPU_IB_POOL_DIRECT, &job);
 	if (r)
 		return r;
 
@@ -526,7 +526,8 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	int i, r;
 
 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
-			direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_NORMAL, &job);
+				     direct ? AMDGPU_IB_POOL_DIRECT :
+				     AMDGPU_IB_POOL_DELAYED, &job);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index fbd451f3559a..b96c8d9a1946 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -61,11 +61,12 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
 				  struct dma_resv *resv,
 				  enum amdgpu_sync_mode sync_mode)
 {
+	enum amdgpu_ib_pool_type pool = p->direct ? AMDGPU_IB_POOL_IMMEDIATE :
+		AMDGPU_IB_POOL_DELAYED;
 	unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
 	int r;
 
-	r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4,
-			p->direct ? AMDGPU_IB_POOL_VM : AMDGPU_IB_POOL_NORMAL, &p->job);
+	r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, &p->job);
 	if (r)
 		return r;
 
@@ -199,6 +200,8 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
 				 uint64_t addr, unsigned count, uint32_t incr,
 				 uint64_t flags)
 {
+	enum amdgpu_ib_pool_type pool = p->direct ? AMDGPU_IB_POOL_IMMEDIATE :
+		AMDGPU_IB_POOL_DELAYED;
 	unsigned int i, ndw, nptes;
 	uint64_t *pte;
 	int r;
@@ -224,8 +227,8 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
 			ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW);
 			ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW);
 
-			r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4,
-					p->direct ? AMDGPU_IB_POOL_VM : AMDGPU_IB_POOL_NORMAL, &p->job);
+			r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool,
+						     &p->job);
 			if (r)
 				return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 78d769e13643..0655cd4f77a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -369,7 +369,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	 * translation. Avoid this by doing the invalidation from the SDMA
 	 * itself.
 	 */
-	r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_VM, &job);
+	r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
+				     &job);
 	if (r)
 		goto error_alloc;
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/4] drm/amdgpu: rename direct to immediate for VM updates
  2020-04-07 14:27 [PATCH 1/4] drm/amdgpu: partial revert VM sync changes Christian König
  2020-04-07 14:27 ` [PATCH 2/4] drm/amdgpu: cleanup IB pool handling a bit Christian König
@ 2020-04-07 14:27 ` Christian König
  2020-04-07 14:27 ` [PATCH 4/4] drm/amdgpu: add new unlocked flag for PTE updates Christian König
  2 siblings, 0 replies; 7+ messages in thread
From: Christian König @ 2020-04-07 14:27 UTC (permalink / raw)
  To: amd-gfx, Felix.Kuehling, Alex.Sierra

To avoid confusion with direct ring submissions rename bottom
of pipe VM table changes to immediate updates.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c     |  6 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      | 60 ++++++++++-----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      | 10 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 23 ++++----
 5 files changed, 51 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 3a67f6c046d4..fe92dcd94d4a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -282,7 +282,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
 	    !dma_fence_is_later(updates, (*id)->flushed_updates))
 	    updates = NULL;
 
-	if ((*id)->owner != vm->direct.fence_context ||
+	if ((*id)->owner != vm->immediate.fence_context ||
 	    job->vm_pd_addr != (*id)->pd_gpu_addr ||
 	    updates || !(*id)->last_flush ||
 	    ((*id)->last_flush->context != fence_context &&
@@ -349,7 +349,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
 		struct dma_fence *flushed;
 
 		/* Check all the prerequisites to using this VMID */
-		if ((*id)->owner != vm->direct.fence_context)
+		if ((*id)->owner != vm->immediate.fence_context)
 			continue;
 
 		if ((*id)->pd_gpu_addr != job->vm_pd_addr)
@@ -448,7 +448,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	}
 
 	id->pd_gpu_addr = job->vm_pd_addr;
-	id->owner = vm->direct.fence_context;
+	id->owner = vm->immediate.fence_context;
 
 	if (job->vm_needs_flush) {
 		dma_fence_put(id->last_flush);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3b2e74841136..1ee87b460b96 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -726,7 +726,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
  * @adev: amdgpu_device pointer
  * @vm: VM to clear BO from
  * @bo: BO to clear
- * @direct: use a direct update
+ * @immediate: use an immediate update
  *
  * Root PD needs to be reserved when calling this.
  *
@@ -736,7 +736,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 			      struct amdgpu_vm *vm,
 			      struct amdgpu_bo *bo,
-			      bool direct)
+			      bool immediate)
 {
 	struct ttm_operation_ctx ctx = { true, false };
 	unsigned level = adev->vm_manager.root_level;
@@ -795,7 +795,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
 	params.vm = vm;
-	params.direct = direct;
+	params.immediate = immediate;
 
 	r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
 	if (r)
@@ -850,11 +850,11 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
  * @adev: amdgpu_device pointer
  * @vm: requesting vm
  * @level: the page table level
- * @direct: use a direct update
+ * @immediate: use a immediate update
  * @bp: resulting BO allocation parameters
  */
 static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			       int level, bool direct,
+			       int level, bool immediate,
 			       struct amdgpu_bo_param *bp)
 {
 	memset(bp, 0, sizeof(*bp));
@@ -870,7 +870,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	else if (!vm->root.base.bo || vm->root.base.bo->shadow)
 		bp->flags |= AMDGPU_GEM_CREATE_SHADOW;
 	bp->type = ttm_bo_type_kernel;
-	bp->no_wait_gpu = direct;
+	bp->no_wait_gpu = immediate;
 	if (vm->root.base.bo)
 		bp->resv = vm->root.base.bo->tbo.base.resv;
 }
@@ -881,7 +881,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  * @adev: amdgpu_device pointer
  * @vm: VM to allocate page tables for
  * @cursor: Which page table to allocate
- * @direct: use a direct update
+ * @immediate: use an immediate update
  *
  * Make sure a specific page table or directory is allocated.
  *
@@ -892,7 +892,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 			       struct amdgpu_vm *vm,
 			       struct amdgpu_vm_pt_cursor *cursor,
-			       bool direct)
+			       bool immediate)
 {
 	struct amdgpu_vm_pt *entry = cursor->entry;
 	struct amdgpu_bo_param bp;
@@ -913,7 +913,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	if (entry->base.bo)
 		return 0;
 
-	amdgpu_vm_bo_param(adev, vm, cursor->level, direct, &bp);
+	amdgpu_vm_bo_param(adev, vm, cursor->level, immediate, &bp);
 
 	r = amdgpu_bo_create(adev, &bp, &pt);
 	if (r)
@@ -925,7 +925,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
 	amdgpu_vm_bo_base_init(&entry->base, vm, pt);
 
-	r = amdgpu_vm_clear_bo(adev, vm, pt, direct);
+	r = amdgpu_vm_clear_bo(adev, vm, pt, immediate);
 	if (r)
 		goto error_free_pt;
 
@@ -1276,7 +1276,7 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev,
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
- * @direct: submit directly to the paging queue
+ * @immediate: submit immediately to the paging queue
  *
  * Makes sure all directories are up to date.
  *
@@ -1284,7 +1284,7 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev,
  * 0 for success, error for failure.
  */
 int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
-			  struct amdgpu_vm *vm, bool direct)
+			  struct amdgpu_vm *vm, bool immediate)
 {
 	struct amdgpu_vm_update_params params;
 	int r;
@@ -1295,7 +1295,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
 	params.vm = vm;
-	params.direct = direct;
+	params.immediate = immediate;
 
 	r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
 	if (r)
@@ -1451,7 +1451,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
 			 * address range are actually allocated
 			 */
 			r = amdgpu_vm_alloc_pts(params->adev, params->vm,
-						&cursor, params->direct);
+						&cursor, params->immediate);
 			if (r)
 				return r;
 		}
@@ -1557,7 +1557,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
- * @direct: direct submission in a page fault
+ * @immediate: immediate submission in a page fault
  * @resv: fences we need to sync to
  * @start: start of mapped range
  * @last: last mapped entry
@@ -1572,7 +1572,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
  * 0 for success, -EINVAL for failure.
  */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
-				       struct amdgpu_vm *vm, bool direct,
+				       struct amdgpu_vm *vm, bool immediate,
 				       struct dma_resv *resv,
 				       uint64_t start, uint64_t last,
 				       uint64_t flags, uint64_t addr,
@@ -1586,7 +1586,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
 	params.vm = vm;
-	params.direct = direct;
+	params.immediate = immediate;
 	params.pages_addr = pages_addr;
 
 	/* Implicitly sync to command submissions in the same VM before
@@ -1606,8 +1606,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
 		struct amdgpu_bo *root = vm->root.base.bo;
 
-		if (!dma_fence_is_signaled(vm->last_direct))
-			amdgpu_bo_fence(root, vm->last_direct, true);
+		if (!dma_fence_is_signaled(vm->last_immediate))
+			amdgpu_bo_fence(root, vm->last_immediate, true);
 	}
 
 	r = vm->update_funcs->prepare(&params, resv, sync_mode);
@@ -2589,7 +2589,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
 		return false;
 
 	/* Don't evict VM page tables while they are updated */
-	if (!dma_fence_is_signaled(bo_base->vm->last_direct)) {
+	if (!dma_fence_is_signaled(bo_base->vm->last_immediate)) {
 		amdgpu_vm_eviction_unlock(bo_base->vm);
 		return false;
 	}
@@ -2766,7 +2766,7 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
 	if (timeout <= 0)
 		return timeout;
 
-	return dma_fence_wait_timeout(vm->last_direct, true, timeout);
+	return dma_fence_wait_timeout(vm->last_immediate, true, timeout);
 }
 
 /**
@@ -2802,7 +2802,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 
 	/* create scheduler entities for page table updates */
-	r = drm_sched_entity_init(&vm->direct, DRM_SCHED_PRIORITY_NORMAL,
+	r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
 				  adev->vm_manager.vm_pte_scheds,
 				  adev->vm_manager.vm_pte_num_scheds, NULL);
 	if (r)
@@ -2812,7 +2812,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 				  adev->vm_manager.vm_pte_scheds,
 				  adev->vm_manager.vm_pte_num_scheds, NULL);
 	if (r)
-		goto error_free_direct;
+		goto error_free_immediate;
 
 	vm->pte_support_ats = false;
 	vm->is_compute_context = false;
@@ -2838,7 +2838,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	else
 		vm->update_funcs = &amdgpu_vm_sdma_funcs;
 	vm->last_update = NULL;
-	vm->last_direct = dma_fence_get_stub();
+	vm->last_immediate = dma_fence_get_stub();
 
 	mutex_init(&vm->eviction_lock);
 	vm->evicting = false;
@@ -2892,11 +2892,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	vm->root.base.bo = NULL;
 
 error_free_delayed:
-	dma_fence_put(vm->last_direct);
+	dma_fence_put(vm->last_immediate);
 	drm_sched_entity_destroy(&vm->delayed);
 
-error_free_direct:
-	drm_sched_entity_destroy(&vm->direct);
+error_free_immediate:
+	drm_sched_entity_destroy(&vm->immediate);
 
 	return r;
 }
@@ -3093,8 +3093,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		vm->pasid = 0;
 	}
 
-	dma_fence_wait(vm->last_direct, false);
-	dma_fence_put(vm->last_direct);
+	dma_fence_wait(vm->last_immediate, false);
+	dma_fence_put(vm->last_immediate);
 
 	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
 		if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
@@ -3111,7 +3111,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	amdgpu_bo_unref(&root);
 	WARN_ON(vm->root.base.bo);
 
-	drm_sched_entity_destroy(&vm->direct);
+	drm_sched_entity_destroy(&vm->immediate);
 	drm_sched_entity_destroy(&vm->delayed);
 
 	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index b52efc591160..0b64200ec45f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -206,9 +206,9 @@ struct amdgpu_vm_update_params {
 	struct amdgpu_vm *vm;
 
 	/**
-	 * @direct: if changes should be made directly
+	 * @immediate: if changes should be made immediately
 	 */
-	bool direct;
+	bool immediate;
 
 	/**
 	 * @pages_addr:
@@ -274,11 +274,11 @@ struct amdgpu_vm {
 	struct dma_fence	*last_update;
 
 	/* Scheduler entities for page table updates */
-	struct drm_sched_entity	direct;
+	struct drm_sched_entity	immediate;
 	struct drm_sched_entity	delayed;
 
 	/* Last submission to the scheduler entities */
-	struct dma_fence	*last_direct;
+	struct dma_fence	*last_immediate;
 
 	unsigned int		pasid;
 	/* dedicated to vm */
@@ -383,7 +383,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      void *param);
 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
 int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
-			  struct amdgpu_vm *vm, bool direct);
+			  struct amdgpu_vm *vm, bool immediate);
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			  struct amdgpu_vm *vm,
 			  struct dma_fence **fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index e38516304070..39c704a1fb0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -84,7 +84,7 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
 
 	pe += (unsigned long)amdgpu_bo_kptr(bo);
 
-	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct);
+	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate);
 
 	for (i = 0; i < count; i++) {
 		value = p->pages_addr ?
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index b96c8d9a1946..c78bcebd9378 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -61,8 +61,8 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
 				  struct dma_resv *resv,
 				  enum amdgpu_sync_mode sync_mode)
 {
-	enum amdgpu_ib_pool_type pool = p->direct ? AMDGPU_IB_POOL_IMMEDIATE :
-		AMDGPU_IB_POOL_DELAYED;
+	enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
+		: AMDGPU_IB_POOL_DELAYED;
 	unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
 	int r;
 
@@ -96,7 +96,7 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
 	struct amdgpu_ring *ring;
 	int r;
 
-	entity = p->direct ? &p->vm->direct : &p->vm->delayed;
+	entity = p->immediate ? &p->vm->immediate : &p->vm->delayed;
 	ring = container_of(entity->rq->sched, struct amdgpu_ring, sched);
 
 	WARN_ON(ib->length_dw == 0);
@@ -106,15 +106,16 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
 	if (r)
 		goto error;
 
-	if (p->direct) {
+	if (p->immediate) {
 		tmp = dma_fence_get(f);
-		swap(p->vm->last_direct, tmp);
+		swap(p->vm->last_immediate, f);
 		dma_fence_put(tmp);
 	} else {
-		dma_resv_add_shared_fence(p->vm->root.base.bo->tbo.base.resv, f);
+		dma_resv_add_shared_fence(p->vm->root.base.bo->tbo.base.resv,
+					  f);
 	}
 
-	if (fence && !p->direct)
+	if (fence && !p->immediate)
 		swap(*fence, f);
 	dma_fence_put(f);
 	return 0;
@@ -144,7 +145,7 @@ static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p,
 	src += p->num_dw_left * 4;
 
 	pe += amdgpu_gmc_sign_extend(bo->tbo.offset);
-	trace_amdgpu_vm_copy_ptes(pe, src, count, p->direct);
+	trace_amdgpu_vm_copy_ptes(pe, src, count, p->immediate);
 
 	amdgpu_vm_copy_pte(p->adev, ib, pe, src, count);
 }
@@ -171,7 +172,7 @@ static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p,
 	struct amdgpu_ib *ib = p->job->ibs;
 
 	pe += amdgpu_gmc_sign_extend(bo->tbo.offset);
-	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct);
+	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate);
 	if (count < 3) {
 		amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags,
 				    count, incr);
@@ -200,8 +201,8 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
 				 uint64_t addr, unsigned count, uint32_t incr,
 				 uint64_t flags)
 {
-	enum amdgpu_ib_pool_type pool = p->direct ? AMDGPU_IB_POOL_IMMEDIATE :
-		AMDGPU_IB_POOL_DELAYED;
+	enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
+		: AMDGPU_IB_POOL_DELAYED;
 	unsigned int i, ndw, nptes;
 	uint64_t *pte;
 	int r;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/4] drm/amdgpu: add new unlocked flag for PTE updates
  2020-04-07 14:27 [PATCH 1/4] drm/amdgpu: partial revert VM sync changes Christian König
  2020-04-07 14:27 ` [PATCH 2/4] drm/amdgpu: cleanup IB pool handling a bit Christian König
  2020-04-07 14:27 ` [PATCH 3/4] drm/amdgpu: rename direct to immediate for VM updates Christian König
@ 2020-04-07 14:27 ` Christian König
  2020-04-07 19:24   ` Felix Kuehling
  2 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2020-04-07 14:27 UTC (permalink / raw)
  To: amd-gfx, Felix.Kuehling, Alex.Sierra

For HMM support we need the ability to invalidate PTEs from
a MM callback where we can't lock the root PD.

Add a new flag to better support this instead of assuming
that all invalidation updates are unlocked.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      | 42 ++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  9 ++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 12 +++---
 3 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1ee87b460b96..4ca4f61b34ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1446,7 +1446,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
 		uint64_t incr, entry_end, pe_start;
 		struct amdgpu_bo *pt;
 
-		if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
+		if (!params->unlocked) {
 			/* make sure that the page tables covering the
 			 * address range are actually allocated
 			 */
@@ -1458,8 +1458,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
 
 		shift = amdgpu_vm_level_shift(adev, cursor.level);
 		parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
-		if (adev->asic_type < CHIP_VEGA10 &&
-		    (flags & AMDGPU_PTE_VALID)) {
+		if (params->unlocked) {
+			/* Unlocked updates are only allowed on the leaves */
+			if (amdgpu_vm_pt_descendant(adev, &cursor))
+				continue;
+		} else if (adev->asic_type < CHIP_VEGA10 &&
+			   (flags & AMDGPU_PTE_VALID)) {
 			/* No huge page support before GMC v9 */
 			if (cursor.level != AMDGPU_VM_PTB) {
 				if (!amdgpu_vm_pt_descendant(adev, &cursor))
@@ -1558,6 +1562,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
  * @adev: amdgpu_device pointer
  * @vm: requested vm
  * @immediate: immediate submission in a page fault
+ * @unlocked: unlocked invalidation during MM callback
  * @resv: fences we need to sync to
  * @start: start of mapped range
  * @last: last mapped entry
@@ -1573,7 +1578,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
  */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 				       struct amdgpu_vm *vm, bool immediate,
-				       struct dma_resv *resv,
+				       bool unlocked, struct dma_resv *resv,
 				       uint64_t start, uint64_t last,
 				       uint64_t flags, uint64_t addr,
 				       dma_addr_t *pages_addr,
@@ -1603,11 +1608,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		goto error_unlock;
 	}
 
-	if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
-		struct amdgpu_bo *root = vm->root.base.bo;
+	if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
+		struct dma_fence *tmp = dma_fence_get_stub();
 
-		if (!dma_fence_is_signaled(vm->last_immediate))
-			amdgpu_bo_fence(root, vm->last_immediate, true);
+		amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true);
+		swap(vm->last_unlocked, tmp);
+		dma_fence_put(tmp);
 	}
 
 	r = vm->update_funcs->prepare(&params, resv, sync_mode);
@@ -1721,7 +1727,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		}
 
 		last = min((uint64_t)mapping->last, start + max_entries - 1);
-		r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
+		r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
 						start, last, flags, addr,
 						dma_addr, fence);
 		if (r)
@@ -2018,7 +2024,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 		    mapping->start < AMDGPU_GMC_HOLE_START)
 			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
 
-		r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
+		r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
 						mapping->start, mapping->last,
 						init_pte_value, 0, NULL, &f);
 		amdgpu_vm_free_mapping(adev, vm, mapping, f);
@@ -2589,7 +2595,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
 		return false;
 
 	/* Don't evict VM page tables while they are updated */
-	if (!dma_fence_is_signaled(bo_base->vm->last_immediate)) {
+	if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) {
 		amdgpu_vm_eviction_unlock(bo_base->vm);
 		return false;
 	}
@@ -2766,7 +2772,7 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
 	if (timeout <= 0)
 		return timeout;
 
-	return dma_fence_wait_timeout(vm->last_immediate, true, timeout);
+	return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);
 }
 
 /**
@@ -2838,7 +2844,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	else
 		vm->update_funcs = &amdgpu_vm_sdma_funcs;
 	vm->last_update = NULL;
-	vm->last_immediate = dma_fence_get_stub();
+	vm->last_unlocked = dma_fence_get_stub();
 
 	mutex_init(&vm->eviction_lock);
 	vm->evicting = false;
@@ -2892,7 +2898,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	vm->root.base.bo = NULL;
 
 error_free_delayed:
-	dma_fence_put(vm->last_immediate);
+	dma_fence_put(vm->last_unlocked);
 	drm_sched_entity_destroy(&vm->delayed);
 
 error_free_immediate:
@@ -3093,8 +3099,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		vm->pasid = 0;
 	}
 
-	dma_fence_wait(vm->last_immediate, false);
-	dma_fence_put(vm->last_immediate);
+	dma_fence_wait(vm->last_unlocked, false);
+	dma_fence_put(vm->last_unlocked);
 
 	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
 		if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
@@ -3347,8 +3353,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
 		value = 0;
 	}
 
-	r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
-					flags, value, NULL, NULL);
+	r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr,
+					addr + 1, flags, value, NULL, NULL);
 	if (r)
 		goto error_unlock;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 0b64200ec45f..ea771d84bf2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -210,6 +210,11 @@ struct amdgpu_vm_update_params {
 	 */
 	bool immediate;
 
+	/**
+	 * @unlocked: true if the root BO is not locked
+	 */
+	bool unlocked;
+
 	/**
 	 * @pages_addr:
 	 *
@@ -277,8 +282,8 @@ struct amdgpu_vm {
 	struct drm_sched_entity	immediate;
 	struct drm_sched_entity	delayed;
 
-	/* Last submission to the scheduler entities */
-	struct dma_fence	*last_immediate;
+	/* Last unlocked submission to the scheduler entities */
+	struct dma_fence	*last_unlocked;
 
 	unsigned int		pasid;
 	/* dedicated to vm */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index c78bcebd9378..8d9c6feba660 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -92,8 +92,8 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
 {
 	struct amdgpu_ib *ib = p->job->ibs;
 	struct drm_sched_entity *entity;
-	struct dma_fence *f, *tmp;
 	struct amdgpu_ring *ring;
+	struct dma_fence *f;
 	int r;
 
 	entity = p->immediate ? &p->vm->immediate : &p->vm->delayed;
@@ -106,13 +106,13 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
 	if (r)
 		goto error;
 
-	if (p->immediate) {
-		tmp = dma_fence_get(f);
-		swap(p->vm->last_immediate, f);
+	if (p->unlocked) {
+		struct dma_fence *tmp = dma_fence_get(f);
+
+		swap(p->vm->last_unlocked, f);
 		dma_fence_put(tmp);
 	} else {
-		dma_resv_add_shared_fence(p->vm->root.base.bo->tbo.base.resv,
-					  f);
+		amdgpu_bo_fence(p->vm->root.base.bo, f, true);
 	}
 
 	if (fence && !p->immediate)
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 4/4] drm/amdgpu: add new unlocked flag for PTE updates
  2020-04-07 14:27 ` [PATCH 4/4] drm/amdgpu: add new unlocked flag for PTE updates Christian König
@ 2020-04-07 19:24   ` Felix Kuehling
  2020-04-08  7:17     ` Christian König
  0 siblings, 1 reply; 7+ messages in thread
From: Felix Kuehling @ 2020-04-07 19:24 UTC (permalink / raw)
  To: Christian König, amd-gfx, Alex.Sierra

Am 2020-04-07 um 10:27 a.m. schrieb Christian König:
> For HMM support we need the ability to invalidate PTEs from
> a MM callback where we can't lock the root PD.
>
> Add a new flag to better support this instead of assuming
> that all invalidation updates are unlocked.

Thanks for this patch series. It looks good to me. Alex, please take a
look at the new "unlocked" parameter in amdgpu_vm_bo_update_mapping.
Does this work for your MMU notifier implementation? I think this should
be all that's needed for unmapping in an MMU notifier with SVM, where we
won't unmap complete BOs.

It may not work with your initial prototype that was BO-based, though.
For that one you may need to propagate the "unlocked" parameter all the
way up to amdgpu_vm_bo_update.

The series is

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      | 42 ++++++++++++---------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  9 ++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 12 +++---
>  3 files changed, 37 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 1ee87b460b96..4ca4f61b34ca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1446,7 +1446,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>  		uint64_t incr, entry_end, pe_start;
>  		struct amdgpu_bo *pt;
>  
> -		if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
> +		if (!params->unlocked) {
>  			/* make sure that the page tables covering the
>  			 * address range are actually allocated
>  			 */
> @@ -1458,8 +1458,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>  
>  		shift = amdgpu_vm_level_shift(adev, cursor.level);
>  		parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
> -		if (adev->asic_type < CHIP_VEGA10 &&
> -		    (flags & AMDGPU_PTE_VALID)) {
> +		if (params->unlocked) {
> +			/* Unlocked updates are only allowed on the leaves */
> +			if (amdgpu_vm_pt_descendant(adev, &cursor))
> +				continue;
> +		} else if (adev->asic_type < CHIP_VEGA10 &&
> +			   (flags & AMDGPU_PTE_VALID)) {
>  			/* No huge page support before GMC v9 */
>  			if (cursor.level != AMDGPU_VM_PTB) {
>  				if (!amdgpu_vm_pt_descendant(adev, &cursor))
> @@ -1558,6 +1562,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>   * @adev: amdgpu_device pointer
>   * @vm: requested vm
>   * @immediate: immediate submission in a page fault
> + * @unlocked: unlocked invalidation during MM callback
>   * @resv: fences we need to sync to
>   * @start: start of mapped range
>   * @last: last mapped entry
> @@ -1573,7 +1578,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>   */
>  static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>  				       struct amdgpu_vm *vm, bool immediate,
> -				       struct dma_resv *resv,
> +				       bool unlocked, struct dma_resv *resv,
>  				       uint64_t start, uint64_t last,
>  				       uint64_t flags, uint64_t addr,
>  				       dma_addr_t *pages_addr,
> @@ -1603,11 +1608,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>  		goto error_unlock;
>  	}
>  
> -	if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
> -		struct amdgpu_bo *root = vm->root.base.bo;
> +	if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
> +		struct dma_fence *tmp = dma_fence_get_stub();
>  
> -		if (!dma_fence_is_signaled(vm->last_immediate))
> -			amdgpu_bo_fence(root, vm->last_immediate, true);
> +		amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true);
> +		swap(vm->last_unlocked, tmp);
> +		dma_fence_put(tmp);
>  	}
>  
>  	r = vm->update_funcs->prepare(&params, resv, sync_mode);
> @@ -1721,7 +1727,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
>  		}
>  
>  		last = min((uint64_t)mapping->last, start + max_entries - 1);
> -		r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
> +		r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>  						start, last, flags, addr,
>  						dma_addr, fence);
>  		if (r)
> @@ -2018,7 +2024,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>  		    mapping->start < AMDGPU_GMC_HOLE_START)
>  			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>  
> -		r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
> +		r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>  						mapping->start, mapping->last,
>  						init_pte_value, 0, NULL, &f);
>  		amdgpu_vm_free_mapping(adev, vm, mapping, f);
> @@ -2589,7 +2595,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>  		return false;
>  
>  	/* Don't evict VM page tables while they are updated */
> -	if (!dma_fence_is_signaled(bo_base->vm->last_immediate)) {
> +	if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) {
>  		amdgpu_vm_eviction_unlock(bo_base->vm);
>  		return false;
>  	}
> @@ -2766,7 +2772,7 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
>  	if (timeout <= 0)
>  		return timeout;
>  
> -	return dma_fence_wait_timeout(vm->last_immediate, true, timeout);
> +	return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);
>  }
>  
>  /**
> @@ -2838,7 +2844,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  	else
>  		vm->update_funcs = &amdgpu_vm_sdma_funcs;
>  	vm->last_update = NULL;
> -	vm->last_immediate = dma_fence_get_stub();
> +	vm->last_unlocked = dma_fence_get_stub();
>  
>  	mutex_init(&vm->eviction_lock);
>  	vm->evicting = false;
> @@ -2892,7 +2898,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  	vm->root.base.bo = NULL;
>  
>  error_free_delayed:
> -	dma_fence_put(vm->last_immediate);
> +	dma_fence_put(vm->last_unlocked);
>  	drm_sched_entity_destroy(&vm->delayed);
>  
>  error_free_immediate:
> @@ -3093,8 +3099,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>  		vm->pasid = 0;
>  	}
>  
> -	dma_fence_wait(vm->last_immediate, false);
> -	dma_fence_put(vm->last_immediate);
> +	dma_fence_wait(vm->last_unlocked, false);
> +	dma_fence_put(vm->last_unlocked);
>  
>  	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
>  		if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
> @@ -3347,8 +3353,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
>  		value = 0;
>  	}
>  
> -	r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
> -					flags, value, NULL, NULL);
> +	r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr,
> +					addr + 1, flags, value, NULL, NULL);
>  	if (r)
>  		goto error_unlock;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 0b64200ec45f..ea771d84bf2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -210,6 +210,11 @@ struct amdgpu_vm_update_params {
>  	 */
>  	bool immediate;
>  
> +	/**
> +	 * @unlocked: true if the root BO is not locked
> +	 */
> +	bool unlocked;
> +
>  	/**
>  	 * @pages_addr:
>  	 *
> @@ -277,8 +282,8 @@ struct amdgpu_vm {
>  	struct drm_sched_entity	immediate;
>  	struct drm_sched_entity	delayed;
>  
> -	/* Last submission to the scheduler entities */
> -	struct dma_fence	*last_immediate;
> +	/* Last unlocked submission to the scheduler entities */
> +	struct dma_fence	*last_unlocked;
>  
>  	unsigned int		pasid;
>  	/* dedicated to vm */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> index c78bcebd9378..8d9c6feba660 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> @@ -92,8 +92,8 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>  {
>  	struct amdgpu_ib *ib = p->job->ibs;
>  	struct drm_sched_entity *entity;
> -	struct dma_fence *f, *tmp;
>  	struct amdgpu_ring *ring;
> +	struct dma_fence *f;
>  	int r;
>  
>  	entity = p->immediate ? &p->vm->immediate : &p->vm->delayed;
> @@ -106,13 +106,13 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>  	if (r)
>  		goto error;
>  
> -	if (p->immediate) {
> -		tmp = dma_fence_get(f);
> -		swap(p->vm->last_immediate, f);
> +	if (p->unlocked) {
> +		struct dma_fence *tmp = dma_fence_get(f);
> +
> +		swap(p->vm->last_unlocked, f);
>  		dma_fence_put(tmp);
>  	} else {
> -		dma_resv_add_shared_fence(p->vm->root.base.bo->tbo.base.resv,
> -					  f);
> +		amdgpu_bo_fence(p->vm->root.base.bo, f, true);
>  	}
>  
>  	if (fence && !p->immediate)
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 4/4] drm/amdgpu: add new unlocked flag for PTE updates
  2020-04-07 19:24   ` Felix Kuehling
@ 2020-04-08  7:17     ` Christian König
  2020-04-13 22:11       ` Felix Kuehling
  0 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2020-04-08  7:17 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx, Alex.Sierra

Am 07.04.20 um 21:24 schrieb Felix Kuehling:
> Am 2020-04-07 um 10:27 a.m. schrieb Christian König:
>> For HMM support we need the ability to invalidate PTEs from
>> a MM callback where we can't lock the root PD.
>>
>> Add a new flag to better support this instead of assuming
>> that all invalidation updates are unlocked.
> Thanks for this patch series. It looks good to me. Alex, please take a
> look at the new "unlocked" parameter in amdgpu_vm_bo_update_mapping.
> Does this work for your MMU notifier implementation? I think this should
> be all that's needed for unmapping in an MMU notifier with SVM, where we
> won't unmap complete BOs.
>
> It may not work with your initial prototype that was BO-based, though.
> For that one you may need to propagate the "unlocked" parameter all the
> way up to amdgpu_vm_bo_update.

You can't base the MMU notifier unmap on BOs anyway because you would 
need to lock the BO for that.

Best practice here is probably to call amdgpu_vm_bo_update_mapping() 
directly. Maybe we should rename that function to 
amdgpu_vm_bo_update_range().

Regards,
Christian.

>
> The series is
>
> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>
>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      | 42 ++++++++++++---------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  9 ++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 12 +++---
>>   3 files changed, 37 insertions(+), 26 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 1ee87b460b96..4ca4f61b34ca 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1446,7 +1446,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>   		uint64_t incr, entry_end, pe_start;
>>   		struct amdgpu_bo *pt;
>>   
>> -		if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>> +		if (!params->unlocked) {
>>   			/* make sure that the page tables covering the
>>   			 * address range are actually allocated
>>   			 */
>> @@ -1458,8 +1458,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>   
>>   		shift = amdgpu_vm_level_shift(adev, cursor.level);
>>   		parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
>> -		if (adev->asic_type < CHIP_VEGA10 &&
>> -		    (flags & AMDGPU_PTE_VALID)) {
>> +		if (params->unlocked) {
>> +			/* Unlocked updates are only allowed on the leaves */
>> +			if (amdgpu_vm_pt_descendant(adev, &cursor))
>> +				continue;
>> +		} else if (adev->asic_type < CHIP_VEGA10 &&
>> +			   (flags & AMDGPU_PTE_VALID)) {
>>   			/* No huge page support before GMC v9 */
>>   			if (cursor.level != AMDGPU_VM_PTB) {
>>   				if (!amdgpu_vm_pt_descendant(adev, &cursor))
>> @@ -1558,6 +1562,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>    * @adev: amdgpu_device pointer
>>    * @vm: requested vm
>>    * @immediate: immediate submission in a page fault
>> + * @unlocked: unlocked invalidation during MM callback
>>    * @resv: fences we need to sync to
>>    * @start: start of mapped range
>>    * @last: last mapped entry
>> @@ -1573,7 +1578,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>    */
>>   static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>   				       struct amdgpu_vm *vm, bool immediate,
>> -				       struct dma_resv *resv,
>> +				       bool unlocked, struct dma_resv *resv,
>>   				       uint64_t start, uint64_t last,
>>   				       uint64_t flags, uint64_t addr,
>>   				       dma_addr_t *pages_addr,
>> @@ -1603,11 +1608,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>   		goto error_unlock;
>>   	}
>>   
>> -	if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>> -		struct amdgpu_bo *root = vm->root.base.bo;
>> +	if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
>> +		struct dma_fence *tmp = dma_fence_get_stub();
>>   
>> -		if (!dma_fence_is_signaled(vm->last_immediate))
>> -			amdgpu_bo_fence(root, vm->last_immediate, true);
>> +		amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true);
>> +		swap(vm->last_unlocked, tmp);
>> +		dma_fence_put(tmp);
>>   	}
>>   
>>   	r = vm->update_funcs->prepare(&params, resv, sync_mode);
>> @@ -1721,7 +1727,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
>>   		}
>>   
>>   		last = min((uint64_t)mapping->last, start + max_entries - 1);
>> -		r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
>> +		r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>>   						start, last, flags, addr,
>>   						dma_addr, fence);
>>   		if (r)
>> @@ -2018,7 +2024,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>   		    mapping->start < AMDGPU_GMC_HOLE_START)
>>   			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>   
>> -		r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
>> +		r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>>   						mapping->start, mapping->last,
>>   						init_pte_value, 0, NULL, &f);
>>   		amdgpu_vm_free_mapping(adev, vm, mapping, f);
>> @@ -2589,7 +2595,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>>   		return false;
>>   
>>   	/* Don't evict VM page tables while they are updated */
>> -	if (!dma_fence_is_signaled(bo_base->vm->last_immediate)) {
>> +	if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) {
>>   		amdgpu_vm_eviction_unlock(bo_base->vm);
>>   		return false;
>>   	}
>> @@ -2766,7 +2772,7 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
>>   	if (timeout <= 0)
>>   		return timeout;
>>   
>> -	return dma_fence_wait_timeout(vm->last_immediate, true, timeout);
>> +	return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);
>>   }
>>   
>>   /**
>> @@ -2838,7 +2844,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>   	else
>>   		vm->update_funcs = &amdgpu_vm_sdma_funcs;
>>   	vm->last_update = NULL;
>> -	vm->last_immediate = dma_fence_get_stub();
>> +	vm->last_unlocked = dma_fence_get_stub();
>>   
>>   	mutex_init(&vm->eviction_lock);
>>   	vm->evicting = false;
>> @@ -2892,7 +2898,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>   	vm->root.base.bo = NULL;
>>   
>>   error_free_delayed:
>> -	dma_fence_put(vm->last_immediate);
>> +	dma_fence_put(vm->last_unlocked);
>>   	drm_sched_entity_destroy(&vm->delayed);
>>   
>>   error_free_immediate:
>> @@ -3093,8 +3099,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>>   		vm->pasid = 0;
>>   	}
>>   
>> -	dma_fence_wait(vm->last_immediate, false);
>> -	dma_fence_put(vm->last_immediate);
>> +	dma_fence_wait(vm->last_unlocked, false);
>> +	dma_fence_put(vm->last_unlocked);
>>   
>>   	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
>>   		if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
>> @@ -3347,8 +3353,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
>>   		value = 0;
>>   	}
>>   
>> -	r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
>> -					flags, value, NULL, NULL);
>> +	r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr,
>> +					addr + 1, flags, value, NULL, NULL);
>>   	if (r)
>>   		goto error_unlock;
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 0b64200ec45f..ea771d84bf2b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -210,6 +210,11 @@ struct amdgpu_vm_update_params {
>>   	 */
>>   	bool immediate;
>>   
>> +	/**
>> +	 * @unlocked: true if the root BO is not locked
>> +	 */
>> +	bool unlocked;
>> +
>>   	/**
>>   	 * @pages_addr:
>>   	 *
>> @@ -277,8 +282,8 @@ struct amdgpu_vm {
>>   	struct drm_sched_entity	immediate;
>>   	struct drm_sched_entity	delayed;
>>   
>> -	/* Last submission to the scheduler entities */
>> -	struct dma_fence	*last_immediate;
>> +	/* Last unlocked submission to the scheduler entities */
>> +	struct dma_fence	*last_unlocked;
>>   
>>   	unsigned int		pasid;
>>   	/* dedicated to vm */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>> index c78bcebd9378..8d9c6feba660 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>> @@ -92,8 +92,8 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>>   {
>>   	struct amdgpu_ib *ib = p->job->ibs;
>>   	struct drm_sched_entity *entity;
>> -	struct dma_fence *f, *tmp;
>>   	struct amdgpu_ring *ring;
>> +	struct dma_fence *f;
>>   	int r;
>>   
>>   	entity = p->immediate ? &p->vm->immediate : &p->vm->delayed;
>> @@ -106,13 +106,13 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>>   	if (r)
>>   		goto error;
>>   
>> -	if (p->immediate) {
>> -		tmp = dma_fence_get(f);
>> -		swap(p->vm->last_immediate, f);
>> +	if (p->unlocked) {
>> +		struct dma_fence *tmp = dma_fence_get(f);
>> +
>> +		swap(p->vm->last_unlocked, f);
>>   		dma_fence_put(tmp);
>>   	} else {
>> -		dma_resv_add_shared_fence(p->vm->root.base.bo->tbo.base.resv,
>> -					  f);
>> +		amdgpu_bo_fence(p->vm->root.base.bo, f, true);
>>   	}
>>   
>>   	if (fence && !p->immediate)

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 4/4] drm/amdgpu: add new unlocked flag for PTE updates
  2020-04-08  7:17     ` Christian König
@ 2020-04-13 22:11       ` Felix Kuehling
  0 siblings, 0 replies; 7+ messages in thread
From: Felix Kuehling @ 2020-04-13 22:11 UTC (permalink / raw)
  To: christian.koenig, amd-gfx, Alex.Sierra, Yang, Philip

[+Philip]

Am 2020-04-08 um 3:17 a.m. schrieb Christian König:
> Am 07.04.20 um 21:24 schrieb Felix Kuehling:
>> Am 2020-04-07 um 10:27 a.m. schrieb Christian König:
>>> For HMM support we need the ability to invalidate PTEs from
>>> a MM callback where we can't lock the root PD.
>>>
>>> Add a new flag to better support this instead of assuming
>>> that all invalidation updates are unlocked.
>> Thanks for this patch series. It looks good to me. Alex, please take a
>> look at the new "unlocked" parameter in amdgpu_vm_bo_update_mapping.
>> Does this work for your MMU notifier implementation? I think this should
>> be all that's needed for unmapping in an MMU notifier with SVM, where we
>> won't unmap complete BOs.
>>
>> It may not work with your initial prototype that was BO-based, though.
>> For that one you may need to propagate the "unlocked" parameter all the
>> way up to amdgpu_vm_bo_update.
>
> You can't base the MMU notifier unmap on BOs anyway because you would
> need to lock the BO for that.
>
> Best practice here is probably to call amdgpu_vm_bo_update_mapping()
> directly. Maybe we should rename that function to
> amdgpu_vm_bo_update_range().

That name sounds good to me. While we're at it, I think we should also
rename amdgpu_vm_split_mapping and update it's documenting. It no longer
handles splitting into IBs at all. Maybe this one should be renamed to
amdgpu_vm_update_mapping.

Philip will need to make both those functions non-static for our HMM
range-based memory manager.

Regards,
  Felix


>
> Regards,
> Christian.
>
>>
>> The series is
>>
>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>
>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      | 42
>>> ++++++++++++---------
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  9 ++++-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 12 +++---
>>>   3 files changed, 37 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 1ee87b460b96..4ca4f61b34ca 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -1446,7 +1446,7 @@ static int amdgpu_vm_update_ptes(struct
>>> amdgpu_vm_update_params *params,
>>>           uint64_t incr, entry_end, pe_start;
>>>           struct amdgpu_bo *pt;
>>>   -        if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>>> +        if (!params->unlocked) {
>>>               /* make sure that the page tables covering the
>>>                * address range are actually allocated
>>>                */
>>> @@ -1458,8 +1458,12 @@ static int amdgpu_vm_update_ptes(struct
>>> amdgpu_vm_update_params *params,
>>>             shift = amdgpu_vm_level_shift(adev, cursor.level);
>>>           parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
>>> -        if (adev->asic_type < CHIP_VEGA10 &&
>>> -            (flags & AMDGPU_PTE_VALID)) {
>>> +        if (params->unlocked) {
>>> +            /* Unlocked updates are only allowed on the leaves */
>>> +            if (amdgpu_vm_pt_descendant(adev, &cursor))
>>> +                continue;
>>> +        } else if (adev->asic_type < CHIP_VEGA10 &&
>>> +               (flags & AMDGPU_PTE_VALID)) {
>>>               /* No huge page support before GMC v9 */
>>>               if (cursor.level != AMDGPU_VM_PTB) {
>>>                   if (!amdgpu_vm_pt_descendant(adev, &cursor))
>>> @@ -1558,6 +1562,7 @@ static int amdgpu_vm_update_ptes(struct
>>> amdgpu_vm_update_params *params,
>>>    * @adev: amdgpu_device pointer
>>>    * @vm: requested vm
>>>    * @immediate: immediate submission in a page fault
>>> + * @unlocked: unlocked invalidation during MM callback
>>>    * @resv: fences we need to sync to
>>>    * @start: start of mapped range
>>>    * @last: last mapped entry
>>> @@ -1573,7 +1578,7 @@ static int amdgpu_vm_update_ptes(struct
>>> amdgpu_vm_update_params *params,
>>>    */
>>>   static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>                          struct amdgpu_vm *vm, bool immediate,
>>> -                       struct dma_resv *resv,
>>> +                       bool unlocked, struct dma_resv *resv,
>>>                          uint64_t start, uint64_t last,
>>>                          uint64_t flags, uint64_t addr,
>>>                          dma_addr_t *pages_addr,
>>> @@ -1603,11 +1608,12 @@ static int
>>> amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>           goto error_unlock;
>>>       }
>>>   -    if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>>> -        struct amdgpu_bo *root = vm->root.base.bo;
>>> +    if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
>>> +        struct dma_fence *tmp = dma_fence_get_stub();
>>>   -        if (!dma_fence_is_signaled(vm->last_immediate))
>>> -            amdgpu_bo_fence(root, vm->last_immediate, true);
>>> +        amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true);
>>> +        swap(vm->last_unlocked, tmp);
>>> +        dma_fence_put(tmp);
>>>       }
>>>         r = vm->update_funcs->prepare(&params, resv, sync_mode);
>>> @@ -1721,7 +1727,7 @@ static int amdgpu_vm_bo_split_mapping(struct
>>> amdgpu_device *adev,
>>>           }
>>>             last = min((uint64_t)mapping->last, start + max_entries
>>> - 1);
>>> -        r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
>>> +        r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>>>                           start, last, flags, addr,
>>>                           dma_addr, fence);
>>>           if (r)
>>> @@ -2018,7 +2024,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device
>>> *adev,
>>>               mapping->start < AMDGPU_GMC_HOLE_START)
>>>               init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>>   -        r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
>>> +        r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>>>                           mapping->start, mapping->last,
>>>                           init_pte_value, 0, NULL, &f);
>>>           amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>> @@ -2589,7 +2595,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>>>           return false;
>>>         /* Don't evict VM page tables while they are updated */
>>> -    if (!dma_fence_is_signaled(bo_base->vm->last_immediate)) {
>>> +    if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) {
>>>           amdgpu_vm_eviction_unlock(bo_base->vm);
>>>           return false;
>>>       }
>>> @@ -2766,7 +2772,7 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm,
>>> long timeout)
>>>       if (timeout <= 0)
>>>           return timeout;
>>>   -    return dma_fence_wait_timeout(vm->last_immediate, true,
>>> timeout);
>>> +    return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);
>>>   }
>>>     /**
>>> @@ -2838,7 +2844,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
>>> struct amdgpu_vm *vm,
>>>       else
>>>           vm->update_funcs = &amdgpu_vm_sdma_funcs;
>>>       vm->last_update = NULL;
>>> -    vm->last_immediate = dma_fence_get_stub();
>>> +    vm->last_unlocked = dma_fence_get_stub();
>>>         mutex_init(&vm->eviction_lock);
>>>       vm->evicting = false;
>>> @@ -2892,7 +2898,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
>>> struct amdgpu_vm *vm,
>>>       vm->root.base.bo = NULL;
>>>     error_free_delayed:
>>> -    dma_fence_put(vm->last_immediate);
>>> +    dma_fence_put(vm->last_unlocked);
>>>       drm_sched_entity_destroy(&vm->delayed);
>>>     error_free_immediate:
>>> @@ -3093,8 +3099,8 @@ void amdgpu_vm_fini(struct amdgpu_device
>>> *adev, struct amdgpu_vm *vm)
>>>           vm->pasid = 0;
>>>       }
>>>   -    dma_fence_wait(vm->last_immediate, false);
>>> -    dma_fence_put(vm->last_immediate);
>>> +    dma_fence_wait(vm->last_unlocked, false);
>>> +    dma_fence_put(vm->last_unlocked);
>>>         list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
>>>           if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
>>> @@ -3347,8 +3353,8 @@ bool amdgpu_vm_handle_fault(struct
>>> amdgpu_device *adev, unsigned int pasid,
>>>           value = 0;
>>>       }
>>>   -    r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr,
>>> addr + 1,
>>> -                    flags, value, NULL, NULL);
>>> +    r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr,
>>> +                    addr + 1, flags, value, NULL, NULL);
>>>       if (r)
>>>           goto error_unlock;
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 0b64200ec45f..ea771d84bf2b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -210,6 +210,11 @@ struct amdgpu_vm_update_params {
>>>        */
>>>       bool immediate;
>>>   +    /**
>>> +     * @unlocked: true if the root BO is not locked
>>> +     */
>>> +    bool unlocked;
>>> +
>>>       /**
>>>        * @pages_addr:
>>>        *
>>> @@ -277,8 +282,8 @@ struct amdgpu_vm {
>>>       struct drm_sched_entity    immediate;
>>>       struct drm_sched_entity    delayed;
>>>   -    /* Last submission to the scheduler entities */
>>> -    struct dma_fence    *last_immediate;
>>> +    /* Last unlocked submission to the scheduler entities */
>>> +    struct dma_fence    *last_unlocked;
>>>         unsigned int        pasid;
>>>       /* dedicated to vm */
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> index c78bcebd9378..8d9c6feba660 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> @@ -92,8 +92,8 @@ static int amdgpu_vm_sdma_commit(struct
>>> amdgpu_vm_update_params *p,
>>>   {
>>>       struct amdgpu_ib *ib = p->job->ibs;
>>>       struct drm_sched_entity *entity;
>>> -    struct dma_fence *f, *tmp;
>>>       struct amdgpu_ring *ring;
>>> +    struct dma_fence *f;
>>>       int r;
>>>         entity = p->immediate ? &p->vm->immediate : &p->vm->delayed;
>>> @@ -106,13 +106,13 @@ static int amdgpu_vm_sdma_commit(struct
>>> amdgpu_vm_update_params *p,
>>>       if (r)
>>>           goto error;
>>>   -    if (p->immediate) {
>>> -        tmp = dma_fence_get(f);
>>> -        swap(p->vm->last_immediate, f);
>>> +    if (p->unlocked) {
>>> +        struct dma_fence *tmp = dma_fence_get(f);
>>> +
>>> +        swap(p->vm->last_unlocked, f);
>>>           dma_fence_put(tmp);
>>>       } else {
>>> -        dma_resv_add_shared_fence(p->vm->root.base.bo->tbo.base.resv,
>>> -                      f);
>>> +        amdgpu_bo_fence(p->vm->root.base.bo, f, true);
>>>       }
>>>         if (fence && !p->immediate)
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-04-13 22:11 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-07 14:27 [PATCH 1/4] drm/amdgpu: partial revert VM sync changes Christian König
2020-04-07 14:27 ` [PATCH 2/4] drm/amdgpu: cleanup IB pool handling a bit Christian König
2020-04-07 14:27 ` [PATCH 3/4] drm/amdgpu: rename direct to immediate for VM updates Christian König
2020-04-07 14:27 ` [PATCH 4/4] drm/amdgpu: add new unlocked flag for PTE updates Christian König
2020-04-07 19:24   ` Felix Kuehling
2020-04-08  7:17     ` Christian König
2020-04-13 22:11       ` Felix Kuehling

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.