All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdkfd: Rename pm_release_ib() to pm_destroy_runlist_ib()
@ 2019-11-21 21:26 ` Yong Zhao
  0 siblings, 0 replies; 8+ messages in thread
From: Yong Zhao @ 2019-11-21 21:26 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Yong Zhao

Its counterparty is called pm_create_runlist_ib(). The new name makes
it easier to navigate in the code.

Accordingly, Add rl_ to the variable names to indicate it is runlist.

Change-Id: Id63bfebeb8a5ed6aaefbebe98858d84724fd26be
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c  |  2 +-
 .../gpu/drm/amd/amdkfd/kfd_packet_manager.c    | 18 +++++++++---------
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h          |  6 +++---
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f7f6df40875e..510f2d1bb8bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1355,7 +1355,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
 	if (retval)
 		return retval;
 
-	pm_release_ib(&dqm->packets);
+	pm_destroy_runlist_ib(&dqm->packets);
 	dqm->active_runlist = false;
 
 	return retval;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 6cabed06ef5d..4a9433257428 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -98,15 +98,15 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
 	mutex_lock(&pm->lock);
 
 	retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
-					&pm->ib_buffer_obj);
+					&pm->rl_ib_obj);
 
 	if (retval) {
 		pr_err("Failed to allocate runlist IB\n");
 		goto out;
 	}
 
-	*(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;
-	*rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr;
+	*(void **)rl_buffer = pm->rl_ib_obj->cpu_ptr;
+	*rl_gpu_buffer = pm->rl_ib_obj->gpu_addr;
 
 	memset(*rl_buffer, 0, *rl_buffer_size);
 	pm->allocated = true;
@@ -138,7 +138,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 		return retval;
 
 	*rl_size_bytes = alloc_size_bytes;
-	pm->ib_size_bytes = alloc_size_bytes;
+	pm->rl_ib_size_bytes = alloc_size_bytes;
 
 	pr_debug("Building runlist ib process count: %d queues count %d\n",
 		pm->dqm->processes_count, pm->dqm->queue_count);
@@ -149,7 +149,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 		/* build map process packet */
 		if (proccesses_mapped >= pm->dqm->processes_count) {
 			pr_debug("Not enough space left in runlist IB\n");
-			pm_release_ib(pm);
+			pm_destroy_runlist_ib(pm);
 			return -ENOMEM;
 		}
 
@@ -337,7 +337,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
 fail_acquire_packet_buffer:
 	mutex_unlock(&pm->lock);
 fail_create_runlist_ib:
-	pm_release_ib(pm);
+	pm_destroy_runlist_ib(pm);
 	return retval;
 }
 
@@ -401,11 +401,11 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 	return retval;
 }
 
-void pm_release_ib(struct packet_manager *pm)
+void pm_destroy_runlist_ib(struct packet_manager *pm)
 {
 	mutex_lock(&pm->lock);
 	if (pm->allocated) {
-		kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj);
+		kfd_gtt_sa_free(pm->dqm->dev, pm->rl_ib_obj);
 		pm->allocated = false;
 	}
 	mutex_unlock(&pm->lock);
@@ -425,7 +425,7 @@ int pm_debugfs_runlist(struct seq_file *m, void *data)
 	}
 
 	seq_hex_dump(m, "  ", DUMP_PREFIX_OFFSET, 32, 4,
-		     pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false);
+		     pm->rl_ib_obj->cpu_ptr, pm->rl_ib_size_bytes, false);
 
 out:
 	mutex_unlock(&pm->lock);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 514896bef99a..389cda7c8f1a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -937,8 +937,8 @@ struct packet_manager {
 	struct kernel_queue *priv_queue;
 	struct mutex lock;
 	bool allocated;
-	struct kfd_mem_obj *ib_buffer_obj;
-	unsigned int ib_size_bytes;
+	struct kfd_mem_obj *rl_ib_obj;
+	unsigned int rl_ib_size_bytes;
 	bool is_over_subscription;
 
 	const struct packet_manager_funcs *pmf;
@@ -989,7 +989,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 			uint32_t filter_param, bool reset,
 			unsigned int sdma_engine);
 
-void pm_release_ib(struct packet_manager *pm);
+void pm_destroy_runlist_ib(struct packet_manager *pm);
 
 /* Following PM funcs can be shared among VI and AI */
 unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 1/2] drm/amdkfd: Rename pm_release_ib() to pm_destroy_runlist_ib()
@ 2019-11-21 21:26 ` Yong Zhao
  0 siblings, 0 replies; 8+ messages in thread
From: Yong Zhao @ 2019-11-21 21:26 UTC (permalink / raw)
  To: amd-gfx; +Cc: Yong Zhao

Its counterparty is called pm_create_runlist_ib(). The new name makes
it easier to navigate in the code.

Accordingly, Add rl_ to the variable names to indicate it is runlist.

Change-Id: Id63bfebeb8a5ed6aaefbebe98858d84724fd26be
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c  |  2 +-
 .../gpu/drm/amd/amdkfd/kfd_packet_manager.c    | 18 +++++++++---------
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h          |  6 +++---
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f7f6df40875e..510f2d1bb8bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1355,7 +1355,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
 	if (retval)
 		return retval;
 
-	pm_release_ib(&dqm->packets);
+	pm_destroy_runlist_ib(&dqm->packets);
 	dqm->active_runlist = false;
 
 	return retval;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 6cabed06ef5d..4a9433257428 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -98,15 +98,15 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
 	mutex_lock(&pm->lock);
 
 	retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
-					&pm->ib_buffer_obj);
+					&pm->rl_ib_obj);
 
 	if (retval) {
 		pr_err("Failed to allocate runlist IB\n");
 		goto out;
 	}
 
-	*(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;
-	*rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr;
+	*(void **)rl_buffer = pm->rl_ib_obj->cpu_ptr;
+	*rl_gpu_buffer = pm->rl_ib_obj->gpu_addr;
 
 	memset(*rl_buffer, 0, *rl_buffer_size);
 	pm->allocated = true;
@@ -138,7 +138,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 		return retval;
 
 	*rl_size_bytes = alloc_size_bytes;
-	pm->ib_size_bytes = alloc_size_bytes;
+	pm->rl_ib_size_bytes = alloc_size_bytes;
 
 	pr_debug("Building runlist ib process count: %d queues count %d\n",
 		pm->dqm->processes_count, pm->dqm->queue_count);
@@ -149,7 +149,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 		/* build map process packet */
 		if (proccesses_mapped >= pm->dqm->processes_count) {
 			pr_debug("Not enough space left in runlist IB\n");
-			pm_release_ib(pm);
+			pm_destroy_runlist_ib(pm);
 			return -ENOMEM;
 		}
 
@@ -337,7 +337,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
 fail_acquire_packet_buffer:
 	mutex_unlock(&pm->lock);
 fail_create_runlist_ib:
-	pm_release_ib(pm);
+	pm_destroy_runlist_ib(pm);
 	return retval;
 }
 
@@ -401,11 +401,11 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 	return retval;
 }
 
-void pm_release_ib(struct packet_manager *pm)
+void pm_destroy_runlist_ib(struct packet_manager *pm)
 {
 	mutex_lock(&pm->lock);
 	if (pm->allocated) {
-		kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj);
+		kfd_gtt_sa_free(pm->dqm->dev, pm->rl_ib_obj);
 		pm->allocated = false;
 	}
 	mutex_unlock(&pm->lock);
@@ -425,7 +425,7 @@ int pm_debugfs_runlist(struct seq_file *m, void *data)
 	}
 
 	seq_hex_dump(m, "  ", DUMP_PREFIX_OFFSET, 32, 4,
-		     pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false);
+		     pm->rl_ib_obj->cpu_ptr, pm->rl_ib_size_bytes, false);
 
 out:
 	mutex_unlock(&pm->lock);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 514896bef99a..389cda7c8f1a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -937,8 +937,8 @@ struct packet_manager {
 	struct kernel_queue *priv_queue;
 	struct mutex lock;
 	bool allocated;
-	struct kfd_mem_obj *ib_buffer_obj;
-	unsigned int ib_size_bytes;
+	struct kfd_mem_obj *rl_ib_obj;
+	unsigned int rl_ib_size_bytes;
 	bool is_over_subscription;
 
 	const struct packet_manager_funcs *pmf;
@@ -989,7 +989,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 			uint32_t filter_param, bool reset,
 			unsigned int sdma_engine);
 
-void pm_release_ib(struct packet_manager *pm);
+void pm_destroy_runlist_ib(struct packet_manager *pm);
 
 /* Following PM funcs can be shared among VI and AI */
 unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] drm/amdkfd: Move pm_create_runlist_ib() out of pm_send_runlist()
@ 2019-11-21 21:26     ` Yong Zhao
  0 siblings, 0 replies; 8+ messages in thread
From: Yong Zhao @ 2019-11-21 21:26 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Yong Zhao

This is consistent with the calling sequence in unmap_queues_cpsch().

Change-Id: Ieb6714422c812d4f6ebbece34e339871471e4b5e
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 +++++++++++++++--
 .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 20 +++++--------------
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  7 ++++++-
 3 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 510f2d1bb8bb..fd7d90136b94 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1302,6 +1302,8 @@ static int unmap_sdma_queues(struct device_queue_manager *dqm)
 static int map_queues_cpsch(struct device_queue_manager *dqm)
 {
 	int retval;
+	uint64_t rl_ib_gpu_addr;
+	size_t rl_ib_size;
 
 	if (!dqm->sched_running)
 		return 0;
@@ -1310,15 +1312,27 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
 	if (dqm->active_runlist)
 		return 0;
 
-	retval = pm_send_runlist(&dqm->packets, &dqm->queues);
+	retval = pm_create_runlist_ib(&dqm->packets, &dqm->queues,
+				&rl_ib_gpu_addr, &rl_ib_size);
+	if (retval)
+		goto fail_create_runlist_ib;
+
+	pr_debug("runlist IB address: 0x%llX\n", rl_ib_gpu_addr);
+
+	retval = pm_send_runlist(&dqm->packets, &dqm->queues,
+			rl_ib_gpu_addr, rl_ib_size);
 	pr_debug("%s sent runlist\n", __func__);
 	if (retval) {
 		pr_err("failed to execute runlist\n");
-		return retval;
+		goto fail_create_runlist_ib;
 	}
 	dqm->active_runlist = true;
 
 	return retval;
+
+fail_create_runlist_ib:
+	pm_destroy_runlist_ib(&dqm->packets);
+	return retval;
 }
 
 /* dqm->lock mutex has to be locked before calling this function */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 4a9433257428..6ec54e9f9392 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -116,7 +116,7 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
 	return retval;
 }
 
-static int pm_create_runlist_ib(struct packet_manager *pm,
+int pm_create_runlist_ib(struct packet_manager *pm,
 				struct list_head *queues,
 				uint64_t *rl_gpu_addr,
 				size_t *rl_size_bytes)
@@ -149,7 +149,6 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 		/* build map process packet */
 		if (proccesses_mapped >= pm->dqm->processes_count) {
 			pr_debug("Not enough space left in runlist IB\n");
-			pm_destroy_runlist_ib(pm);
 			return -ENOMEM;
 		}
 
@@ -299,20 +298,13 @@ int pm_send_set_resources(struct packet_manager *pm,
 	return retval;
 }
 
-int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
+int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues,
+			uint64_t rl_ib_gpu_addr, size_t rl_ib_size)
 {
-	uint64_t rl_gpu_ib_addr;
 	uint32_t *rl_buffer;
-	size_t rl_ib_size, packet_size_dwords;
+	size_t packet_size_dwords;
 	int retval;
 
-	retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
-					&rl_ib_size);
-	if (retval)
-		goto fail_create_runlist_ib;
-
-	pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
-
 	packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
 	mutex_lock(&pm->lock);
 
@@ -321,7 +313,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
 	if (retval)
 		goto fail_acquire_packet_buffer;
 
-	retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
+	retval = pm->pmf->runlist(pm, rl_buffer, rl_ib_gpu_addr,
 					rl_ib_size / sizeof(uint32_t), false);
 	if (retval)
 		goto fail_create_runlist;
@@ -336,8 +328,6 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
 	kq_rollback_packet(pm->priv_queue);
 fail_acquire_packet_buffer:
 	mutex_unlock(&pm->lock);
-fail_create_runlist_ib:
-	pm_destroy_runlist_ib(pm);
 	return retval;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 389cda7c8f1a..6accb605b9f0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -980,7 +980,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
 void pm_uninit(struct packet_manager *pm);
 int pm_send_set_resources(struct packet_manager *pm,
 				struct scheduling_resources *res);
-int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
+int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues,
+		uint64_t rl_ib_gpu_addr, size_t rl_ib_size);
 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
 				uint32_t fence_value);
 
@@ -989,6 +990,10 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 			uint32_t filter_param, bool reset,
 			unsigned int sdma_engine);
 
+int pm_create_runlist_ib(struct packet_manager *pm,
+				struct list_head *queues,
+				uint64_t *rl_gpu_addr,
+				size_t *rl_size_bytes);
 void pm_destroy_runlist_ib(struct packet_manager *pm);
 
 /* Following PM funcs can be shared among VI and AI */
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] drm/amdkfd: Move pm_create_runlist_ib() out of pm_send_runlist()
@ 2019-11-21 21:26     ` Yong Zhao
  0 siblings, 0 replies; 8+ messages in thread
From: Yong Zhao @ 2019-11-21 21:26 UTC (permalink / raw)
  To: amd-gfx; +Cc: Yong Zhao

This is consistent with the calling sequence in unmap_queues_cpsch().

Change-Id: Ieb6714422c812d4f6ebbece34e339871471e4b5e
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 +++++++++++++++--
 .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 20 +++++--------------
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  7 ++++++-
 3 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 510f2d1bb8bb..fd7d90136b94 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1302,6 +1302,8 @@ static int unmap_sdma_queues(struct device_queue_manager *dqm)
 static int map_queues_cpsch(struct device_queue_manager *dqm)
 {
 	int retval;
+	uint64_t rl_ib_gpu_addr;
+	size_t rl_ib_size;
 
 	if (!dqm->sched_running)
 		return 0;
@@ -1310,15 +1312,27 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
 	if (dqm->active_runlist)
 		return 0;
 
-	retval = pm_send_runlist(&dqm->packets, &dqm->queues);
+	retval = pm_create_runlist_ib(&dqm->packets, &dqm->queues,
+				&rl_ib_gpu_addr, &rl_ib_size);
+	if (retval)
+		goto fail_create_runlist_ib;
+
+	pr_debug("runlist IB address: 0x%llX\n", rl_ib_gpu_addr);
+
+	retval = pm_send_runlist(&dqm->packets, &dqm->queues,
+			rl_ib_gpu_addr, rl_ib_size);
 	pr_debug("%s sent runlist\n", __func__);
 	if (retval) {
 		pr_err("failed to execute runlist\n");
-		return retval;
+		goto fail_create_runlist_ib;
 	}
 	dqm->active_runlist = true;
 
 	return retval;
+
+fail_create_runlist_ib:
+	pm_destroy_runlist_ib(&dqm->packets);
+	return retval;
 }
 
 /* dqm->lock mutex has to be locked before calling this function */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 4a9433257428..6ec54e9f9392 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -116,7 +116,7 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
 	return retval;
 }
 
-static int pm_create_runlist_ib(struct packet_manager *pm,
+int pm_create_runlist_ib(struct packet_manager *pm,
 				struct list_head *queues,
 				uint64_t *rl_gpu_addr,
 				size_t *rl_size_bytes)
@@ -149,7 +149,6 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 		/* build map process packet */
 		if (proccesses_mapped >= pm->dqm->processes_count) {
 			pr_debug("Not enough space left in runlist IB\n");
-			pm_destroy_runlist_ib(pm);
 			return -ENOMEM;
 		}
 
@@ -299,20 +298,13 @@ int pm_send_set_resources(struct packet_manager *pm,
 	return retval;
 }
 
-int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
+int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues,
+			uint64_t rl_ib_gpu_addr, size_t rl_ib_size)
 {
-	uint64_t rl_gpu_ib_addr;
 	uint32_t *rl_buffer;
-	size_t rl_ib_size, packet_size_dwords;
+	size_t packet_size_dwords;
 	int retval;
 
-	retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
-					&rl_ib_size);
-	if (retval)
-		goto fail_create_runlist_ib;
-
-	pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
-
 	packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
 	mutex_lock(&pm->lock);
 
@@ -321,7 +313,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
 	if (retval)
 		goto fail_acquire_packet_buffer;
 
-	retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
+	retval = pm->pmf->runlist(pm, rl_buffer, rl_ib_gpu_addr,
 					rl_ib_size / sizeof(uint32_t), false);
 	if (retval)
 		goto fail_create_runlist;
@@ -336,8 +328,6 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
 	kq_rollback_packet(pm->priv_queue);
 fail_acquire_packet_buffer:
 	mutex_unlock(&pm->lock);
-fail_create_runlist_ib:
-	pm_destroy_runlist_ib(pm);
 	return retval;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 389cda7c8f1a..6accb605b9f0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -980,7 +980,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
 void pm_uninit(struct packet_manager *pm);
 int pm_send_set_resources(struct packet_manager *pm,
 				struct scheduling_resources *res);
-int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
+int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues,
+		uint64_t rl_ib_gpu_addr, size_t rl_ib_size);
 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
 				uint32_t fence_value);
 
@@ -989,6 +990,10 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 			uint32_t filter_param, bool reset,
 			unsigned int sdma_engine);
 
+int pm_create_runlist_ib(struct packet_manager *pm,
+				struct list_head *queues,
+				uint64_t *rl_gpu_addr,
+				size_t *rl_size_bytes);
 void pm_destroy_runlist_ib(struct packet_manager *pm);
 
 /* Following PM funcs can be shared among VI and AI */
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] drm/amdkfd: Move pm_create_runlist_ib() out of pm_send_runlist()
@ 2019-11-22 21:21         ` Felix Kuehling
  0 siblings, 0 replies; 8+ messages in thread
From: Felix Kuehling @ 2019-11-22 21:21 UTC (permalink / raw)
  To: Yong Zhao, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

I'm not sure about this one. Looks like the interface is getting 
needlessly more complicated. Now the caller has to keep track of the 
runlist IB address and size just to pass those to another function. I 
could understand this if there was a use case that needs to separate the 
allocation of the runlist and sending it to the HW. But I don't see that.

Some background for why I think the interface is the way it is: The 
runlist IB is continuously executed by the HWS firmware. If the runlist 
is oversubscribed, the HWS firmware will loop through it. So the IB must 
remain allocated until pm_send_unmap_queue is called. Currently 
pm_send_runlist creates the runlist IB and sends it to the HWS. You're 
separating that into creation and sending. Do you see a case where you 
need to send the same runlist multiple times? Or do something else 
between creating the runlist and sending it to the HWS?

pm_release_ib releases the runlist IB, assuming that he HWS is no longer 
using it. Maybe this could be combined with pm_send_unmap_queue. I'm not 
100% sure because there are some filter parameters that may leave some 
queues mapped. If the two can be combined, I'd suggest the following 
name and interface changes to reflect how I think this is being used today:

  * pm_send_runlist -> pm_create_and_send_runlist
  * pm_send_unmap_queue + pm_release_ib -> pm_preempt_and_free_runlist

I see you're doing a lot of cleanup and refactoring in this area of the 
code. Is there some bigger picture here, some idea of the end-state 
you're trying to get to? Knowing where you're going with this may make 
it easier to review the code.

Regards,
   Felix

On 2019-11-21 4:26 p.m., Yong Zhao wrote:
> This is consistent with the calling sequence in unmap_queues_cpsch().
>
> Change-Id: Ieb6714422c812d4f6ebbece34e339871471e4b5e
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
> ---
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 +++++++++++++++--
>   .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 20 +++++--------------
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  7 ++++++-
>   3 files changed, 27 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 510f2d1bb8bb..fd7d90136b94 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -1302,6 +1302,8 @@ static int unmap_sdma_queues(struct device_queue_manager *dqm)
>   static int map_queues_cpsch(struct device_queue_manager *dqm)
>   {
>   	int retval;
> +	uint64_t rl_ib_gpu_addr;
> +	size_t rl_ib_size;
>   
>   	if (!dqm->sched_running)
>   		return 0;
> @@ -1310,15 +1312,27 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
>   	if (dqm->active_runlist)
>   		return 0;
>   
> -	retval = pm_send_runlist(&dqm->packets, &dqm->queues);
> +	retval = pm_create_runlist_ib(&dqm->packets, &dqm->queues,
> +				&rl_ib_gpu_addr, &rl_ib_size);
> +	if (retval)
> +		goto fail_create_runlist_ib;
> +
> +	pr_debug("runlist IB address: 0x%llX\n", rl_ib_gpu_addr);
> +
> +	retval = pm_send_runlist(&dqm->packets, &dqm->queues,
> +			rl_ib_gpu_addr, rl_ib_size);
>   	pr_debug("%s sent runlist\n", __func__);
>   	if (retval) {
>   		pr_err("failed to execute runlist\n");
> -		return retval;
> +		goto fail_create_runlist_ib;
>   	}
>   	dqm->active_runlist = true;
>   
>   	return retval;
> +
> +fail_create_runlist_ib:
> +	pm_destroy_runlist_ib(&dqm->packets);
> +	return retval;
>   }
>   
>   /* dqm->lock mutex has to be locked before calling this function */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> index 4a9433257428..6ec54e9f9392 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> @@ -116,7 +116,7 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
>   	return retval;
>   }
>   
> -static int pm_create_runlist_ib(struct packet_manager *pm,
> +int pm_create_runlist_ib(struct packet_manager *pm,
>   				struct list_head *queues,
>   				uint64_t *rl_gpu_addr,
>   				size_t *rl_size_bytes)
> @@ -149,7 +149,6 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
>   		/* build map process packet */
>   		if (proccesses_mapped >= pm->dqm->processes_count) {
>   			pr_debug("Not enough space left in runlist IB\n");
> -			pm_destroy_runlist_ib(pm);
>   			return -ENOMEM;
>   		}
>   
> @@ -299,20 +298,13 @@ int pm_send_set_resources(struct packet_manager *pm,
>   	return retval;
>   }
>   
> -int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
> +int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues,
> +			uint64_t rl_ib_gpu_addr, size_t rl_ib_size)
>   {
> -	uint64_t rl_gpu_ib_addr;
>   	uint32_t *rl_buffer;
> -	size_t rl_ib_size, packet_size_dwords;
> +	size_t packet_size_dwords;
>   	int retval;
>   
> -	retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
> -					&rl_ib_size);
> -	if (retval)
> -		goto fail_create_runlist_ib;
> -
> -	pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
> -
>   	packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
>   	mutex_lock(&pm->lock);
>   
> @@ -321,7 +313,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
>   	if (retval)
>   		goto fail_acquire_packet_buffer;
>   
> -	retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
> +	retval = pm->pmf->runlist(pm, rl_buffer, rl_ib_gpu_addr,
>   					rl_ib_size / sizeof(uint32_t), false);
>   	if (retval)
>   		goto fail_create_runlist;
> @@ -336,8 +328,6 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
>   	kq_rollback_packet(pm->priv_queue);
>   fail_acquire_packet_buffer:
>   	mutex_unlock(&pm->lock);
> -fail_create_runlist_ib:
> -	pm_destroy_runlist_ib(pm);
>   	return retval;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 389cda7c8f1a..6accb605b9f0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -980,7 +980,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
>   void pm_uninit(struct packet_manager *pm);
>   int pm_send_set_resources(struct packet_manager *pm,
>   				struct scheduling_resources *res);
> -int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
> +int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues,
> +		uint64_t rl_ib_gpu_addr, size_t rl_ib_size);
>   int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
>   				uint32_t fence_value);
>   
> @@ -989,6 +990,10 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
>   			uint32_t filter_param, bool reset,
>   			unsigned int sdma_engine);
>   
> +int pm_create_runlist_ib(struct packet_manager *pm,
> +				struct list_head *queues,
> +				uint64_t *rl_gpu_addr,
> +				size_t *rl_size_bytes);
>   void pm_destroy_runlist_ib(struct packet_manager *pm);
>   
>   /* Following PM funcs can be shared among VI and AI */
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] drm/amdkfd: Move pm_create_runlist_ib() out of pm_send_runlist()
@ 2019-11-22 21:21         ` Felix Kuehling
  0 siblings, 0 replies; 8+ messages in thread
From: Felix Kuehling @ 2019-11-22 21:21 UTC (permalink / raw)
  To: Yong Zhao, amd-gfx

I'm not sure about this one. Looks like the interface is getting 
needlessly more complicated. Now the caller has to keep track of the 
runlist IB address and size just to pass those to another function. I 
could understand this if there was a use case that needs to separate the 
allocation of the runlist and sending it to the HW. But I don't see that.

Some background for why I think the interface is the way it is: The 
runlist IB is continuously executed by the HWS firmware. If the runlist 
is oversubscribed, the HWS firmware will loop through it. So the IB must 
remain allocated until pm_send_unmap_queue is called. Currently 
pm_send_runlist creates the runlist IB and sends it to the HWS. You're 
separating that into creation and sending. Do you see a case where you 
need to send the same runlist multiple times? Or do something else 
between creating the runlist and sending it to the HWS?

pm_release_ib releases the runlist IB, assuming that he HWS is no longer 
using it. Maybe this could be combined with pm_send_unmap_queue. I'm not 
100% sure because there are some filter parameters that may leave some 
queues mapped. If the two can be combined, I'd suggest the following 
name and interface changes to reflect how I think this is being used today:

  * pm_send_runlist -> pm_create_and_send_runlist
  * pm_send_unmap_queue + pm_release_ib -> pm_preempt_and_free_runlist

I see you're doing a lot of cleanup and refactoring in this area of the 
code. Is there some bigger picture here, some idea of the end-state 
you're trying to get to? Knowing where you're going with this may make 
it easier to review the code.

Regards,
   Felix

On 2019-11-21 4:26 p.m., Yong Zhao wrote:
> This is consistent with the calling sequence in unmap_queues_cpsch().
>
> Change-Id: Ieb6714422c812d4f6ebbece34e339871471e4b5e
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
> ---
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 +++++++++++++++--
>   .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 20 +++++--------------
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  7 ++++++-
>   3 files changed, 27 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 510f2d1bb8bb..fd7d90136b94 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -1302,6 +1302,8 @@ static int unmap_sdma_queues(struct device_queue_manager *dqm)
>   static int map_queues_cpsch(struct device_queue_manager *dqm)
>   {
>   	int retval;
> +	uint64_t rl_ib_gpu_addr;
> +	size_t rl_ib_size;
>   
>   	if (!dqm->sched_running)
>   		return 0;
> @@ -1310,15 +1312,27 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
>   	if (dqm->active_runlist)
>   		return 0;
>   
> -	retval = pm_send_runlist(&dqm->packets, &dqm->queues);
> +	retval = pm_create_runlist_ib(&dqm->packets, &dqm->queues,
> +				&rl_ib_gpu_addr, &rl_ib_size);
> +	if (retval)
> +		goto fail_create_runlist_ib;
> +
> +	pr_debug("runlist IB address: 0x%llX\n", rl_ib_gpu_addr);
> +
> +	retval = pm_send_runlist(&dqm->packets, &dqm->queues,
> +			rl_ib_gpu_addr, rl_ib_size);
>   	pr_debug("%s sent runlist\n", __func__);
>   	if (retval) {
>   		pr_err("failed to execute runlist\n");
> -		return retval;
> +		goto fail_create_runlist_ib;
>   	}
>   	dqm->active_runlist = true;
>   
>   	return retval;
> +
> +fail_create_runlist_ib:
> +	pm_destroy_runlist_ib(&dqm->packets);
> +	return retval;
>   }
>   
>   /* dqm->lock mutex has to be locked before calling this function */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> index 4a9433257428..6ec54e9f9392 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> @@ -116,7 +116,7 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
>   	return retval;
>   }
>   
> -static int pm_create_runlist_ib(struct packet_manager *pm,
> +int pm_create_runlist_ib(struct packet_manager *pm,
>   				struct list_head *queues,
>   				uint64_t *rl_gpu_addr,
>   				size_t *rl_size_bytes)
> @@ -149,7 +149,6 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
>   		/* build map process packet */
>   		if (proccesses_mapped >= pm->dqm->processes_count) {
>   			pr_debug("Not enough space left in runlist IB\n");
> -			pm_destroy_runlist_ib(pm);
>   			return -ENOMEM;
>   		}
>   
> @@ -299,20 +298,13 @@ int pm_send_set_resources(struct packet_manager *pm,
>   	return retval;
>   }
>   
> -int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
> +int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues,
> +			uint64_t rl_ib_gpu_addr, size_t rl_ib_size)
>   {
> -	uint64_t rl_gpu_ib_addr;
>   	uint32_t *rl_buffer;
> -	size_t rl_ib_size, packet_size_dwords;
> +	size_t packet_size_dwords;
>   	int retval;
>   
> -	retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
> -					&rl_ib_size);
> -	if (retval)
> -		goto fail_create_runlist_ib;
> -
> -	pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
> -
>   	packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
>   	mutex_lock(&pm->lock);
>   
> @@ -321,7 +313,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
>   	if (retval)
>   		goto fail_acquire_packet_buffer;
>   
> -	retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
> +	retval = pm->pmf->runlist(pm, rl_buffer, rl_ib_gpu_addr,
>   					rl_ib_size / sizeof(uint32_t), false);
>   	if (retval)
>   		goto fail_create_runlist;
> @@ -336,8 +328,6 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
>   	kq_rollback_packet(pm->priv_queue);
>   fail_acquire_packet_buffer:
>   	mutex_unlock(&pm->lock);
> -fail_create_runlist_ib:
> -	pm_destroy_runlist_ib(pm);
>   	return retval;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 389cda7c8f1a..6accb605b9f0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -980,7 +980,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
>   void pm_uninit(struct packet_manager *pm);
>   int pm_send_set_resources(struct packet_manager *pm,
>   				struct scheduling_resources *res);
> -int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
> +int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues,
> +		uint64_t rl_ib_gpu_addr, size_t rl_ib_size);
>   int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
>   				uint32_t fence_value);
>   
> @@ -989,6 +990,10 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
>   			uint32_t filter_param, bool reset,
>   			unsigned int sdma_engine);
>   
> +int pm_create_runlist_ib(struct packet_manager *pm,
> +				struct list_head *queues,
> +				uint64_t *rl_gpu_addr,
> +				size_t *rl_size_bytes);
>   void pm_destroy_runlist_ib(struct packet_manager *pm);
>   
>   /* Following PM funcs can be shared among VI and AI */
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] drm/amdkfd: Move pm_create_runlist_ib() out of pm_send_runlist()
@ 2019-11-22 21:50             ` Zhao, Yong
  0 siblings, 0 replies; 8+ messages in thread
From: Zhao, Yong @ 2019-11-22 21:50 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 9153 bytes --]

[AMD Official Use Only - Internal Distribution Only]

Hi Felix,

There is no big picture unfortunately, just some improvements that I came to when navigating the code.

Regarding your suggestion, I have a concern. With the original code in unmap_queues_cpsch(), if amdkfd_fence_wait_timeout() fails, we won't release the runlist ib. I am not sure it is by design or just a small bug. If it is by design (probably for debugging when HWS hang), merging pm_send_unmap_queue and pm_release_ib together will break the design.

If we agree to move in that direction, I agree with the part of the name changes because the original names are prone to cause confusion.

Regards,
Yong
________________________________
From: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Sent: Friday, November 22, 2019 4:21 PM
To: Zhao, Yong <Yong.Zhao-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/2] drm/amdkfd: Move pm_create_runlist_ib() out of pm_send_runlist()

I'm not sure about this one. Looks like the interface is getting
needlessly more complicated. Now the caller has to keep track of the
runlist IB address and size just to pass those to another function. I
could understand this if there was a use case that needs to separate the
allocation of the runlist and sending it to the HW. But I don't see that.

Some background for why I think the interface is the way it is: The
runlist IB is continuously executed by the HWS firmware. If the runlist
is oversubscribed, the HWS firmware will loop through it. So the IB must
remain allocated until pm_send_unmap_queue is called. Currently
pm_send_runlist creates the runlist IB and sends it to the HWS. You're
separating that into creation and sending. Do you see a case where you
need to send the same runlist multiple times? Or do something else
between creating the runlist and sending it to the HWS?

pm_release_ib releases the runlist IB, assuming that he HWS is no longer
using it. Maybe this could be combined with pm_send_unmap_queue. I'm not
100% sure because there are some filter parameters that may leave some
queues mapped. If the two can be combined, I'd suggest the following
name and interface changes to reflect how I think this is being used today:

  * pm_send_runlist -> pm_create_and_send_runlist
  * pm_send_unmap_queue + pm_release_ib -> pm_preempt_and_free_runlist

I see you're doing a lot of cleanup and refactoring in this area of the
code. Is there some bigger picture here, some idea of the end-state
you're trying to get to? Knowing where you're going with this may make
it easier to review the code.

Regards,
   Felix

On 2019-11-21 4:26 p.m., Yong Zhao wrote:
> This is consistent with the calling sequence in unmap_queues_cpsch().
>
> Change-Id: Ieb6714422c812d4f6ebbece34e339871471e4b5e
> Signed-off-by: Yong Zhao <Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
> ---
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 +++++++++++++++--
>   .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 20 +++++--------------
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  7 ++++++-
>   3 files changed, 27 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 510f2d1bb8bb..fd7d90136b94 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -1302,6 +1302,8 @@ static int unmap_sdma_queues(struct device_queue_manager *dqm)
>   static int map_queues_cpsch(struct device_queue_manager *dqm)
>   {
>        int retval;
> +     uint64_t rl_ib_gpu_addr;
> +     size_t rl_ib_size;
>
>        if (!dqm->sched_running)
>                return 0;
> @@ -1310,15 +1312,27 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
>        if (dqm->active_runlist)
>                return 0;
>
> -     retval = pm_send_runlist(&dqm->packets, &dqm->queues);
> +     retval = pm_create_runlist_ib(&dqm->packets, &dqm->queues,
> +                             &rl_ib_gpu_addr, &rl_ib_size);
> +     if (retval)
> +             goto fail_create_runlist_ib;
> +
> +     pr_debug("runlist IB address: 0x%llX\n", rl_ib_gpu_addr);
> +
> +     retval = pm_send_runlist(&dqm->packets, &dqm->queues,
> +                     rl_ib_gpu_addr, rl_ib_size);
>        pr_debug("%s sent runlist\n", __func__);
>        if (retval) {
>                pr_err("failed to execute runlist\n");
> -             return retval;
> +             goto fail_create_runlist_ib;
>        }
>        dqm->active_runlist = true;
>
>        return retval;
> +
> +fail_create_runlist_ib:
> +     pm_destroy_runlist_ib(&dqm->packets);
> +     return retval;
>   }
>
>   /* dqm->lock mutex has to be locked before calling this function */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> index 4a9433257428..6ec54e9f9392 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> @@ -116,7 +116,7 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
>        return retval;
>   }
>
> -static int pm_create_runlist_ib(struct packet_manager *pm,
> +int pm_create_runlist_ib(struct packet_manager *pm,
>                                struct list_head *queues,
>                                uint64_t *rl_gpu_addr,
>                                size_t *rl_size_bytes)
> @@ -149,7 +149,6 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
>                /* build map process packet */
>                if (proccesses_mapped >= pm->dqm->processes_count) {
>                        pr_debug("Not enough space left in runlist IB\n");
> -                     pm_destroy_runlist_ib(pm);
>                        return -ENOMEM;
>                }
>
> @@ -299,20 +298,13 @@ int pm_send_set_resources(struct packet_manager *pm,
>        return retval;
>   }
>
> -int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
> +int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues,
> +                     uint64_t rl_ib_gpu_addr, size_t rl_ib_size)
>   {
> -     uint64_t rl_gpu_ib_addr;
>        uint32_t *rl_buffer;
> -     size_t rl_ib_size, packet_size_dwords;
> +     size_t packet_size_dwords;
>        int retval;
>
> -     retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
> -                                     &rl_ib_size);
> -     if (retval)
> -             goto fail_create_runlist_ib;
> -
> -     pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
> -
>        packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
>        mutex_lock(&pm->lock);
>
> @@ -321,7 +313,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
>        if (retval)
>                goto fail_acquire_packet_buffer;
>
> -     retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
> +     retval = pm->pmf->runlist(pm, rl_buffer, rl_ib_gpu_addr,
>                                        rl_ib_size / sizeof(uint32_t), false);
>        if (retval)
>                goto fail_create_runlist;
> @@ -336,8 +328,6 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
>        kq_rollback_packet(pm->priv_queue);
>   fail_acquire_packet_buffer:
>        mutex_unlock(&pm->lock);
> -fail_create_runlist_ib:
> -     pm_destroy_runlist_ib(pm);
>        return retval;
>   }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 389cda7c8f1a..6accb605b9f0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -980,7 +980,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
>   void pm_uninit(struct packet_manager *pm);
>   int pm_send_set_resources(struct packet_manager *pm,
>                                struct scheduling_resources *res);
> -int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
> +int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues,
> +             uint64_t rl_ib_gpu_addr, size_t rl_ib_size);
>   int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
>                                uint32_t fence_value);
>
> @@ -989,6 +990,10 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
>                        uint32_t filter_param, bool reset,
>                        unsigned int sdma_engine);
>
> +int pm_create_runlist_ib(struct packet_manager *pm,
> +                             struct list_head *queues,
> +                             uint64_t *rl_gpu_addr,
> +                             size_t *rl_size_bytes);
>   void pm_destroy_runlist_ib(struct packet_manager *pm);
>
>   /* Following PM funcs can be shared among VI and AI */

[-- Attachment #1.2: Type: text/html, Size: 17498 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] drm/amdkfd: Move pm_create_runlist_ib() out of pm_send_runlist()
@ 2019-11-22 21:50             ` Zhao, Yong
  0 siblings, 0 replies; 8+ messages in thread
From: Zhao, Yong @ 2019-11-22 21:50 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 9062 bytes --]

[AMD Official Use Only - Internal Distribution Only]

Hi Felix,

There is no big picture unfortunately, just some improvements that I came to when navigating the code.

Regarding your suggestion, I have a concern. With the original code in unmap_queues_cpsch(), if amdkfd_fence_wait_timeout() fails, we won't release the runlist ib. I am not sure it is by design or just a small bug. If it is by design (probably for debugging when HWS hang), merging pm_send_unmap_queue and pm_release_ib together will break the design.

If we agree to move in that direction, I agree with the part of the name changes because the original names are prone to cause confusion.

Regards,
Yong
________________________________
From: Kuehling, Felix <Felix.Kuehling@amd.com>
Sent: Friday, November 22, 2019 4:21 PM
To: Zhao, Yong <Yong.Zhao@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/2] drm/amdkfd: Move pm_create_runlist_ib() out of pm_send_runlist()

I'm not sure about this one. Looks like the interface is getting
needlessly more complicated. Now the caller has to keep track of the
runlist IB address and size just to pass those to another function. I
could understand this if there was a use case that needs to separate the
allocation of the runlist and sending it to the HW. But I don't see that.

Some background for why I think the interface is the way it is: The
runlist IB is continuously executed by the HWS firmware. If the runlist
is oversubscribed, the HWS firmware will loop through it. So the IB must
remain allocated until pm_send_unmap_queue is called. Currently
pm_send_runlist creates the runlist IB and sends it to the HWS. You're
separating that into creation and sending. Do you see a case where you
need to send the same runlist multiple times? Or do something else
between creating the runlist and sending it to the HWS?

pm_release_ib releases the runlist IB, assuming that he HWS is no longer
using it. Maybe this could be combined with pm_send_unmap_queue. I'm not
100% sure because there are some filter parameters that may leave some
queues mapped. If the two can be combined, I'd suggest the following
name and interface changes to reflect how I think this is being used today:

  * pm_send_runlist -> pm_create_and_send_runlist
  * pm_send_unmap_queue + pm_release_ib -> pm_preempt_and_free_runlist

I see you're doing a lot of cleanup and refactoring in this area of the
code. Is there some bigger picture here, some idea of the end-state
you're trying to get to? Knowing where you're going with this may make
it easier to review the code.

Regards,
   Felix

On 2019-11-21 4:26 p.m., Yong Zhao wrote:
> This is consistent with the calling sequence in unmap_queues_cpsch().
>
> Change-Id: Ieb6714422c812d4f6ebbece34e339871471e4b5e
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
> ---
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 +++++++++++++++--
>   .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 20 +++++--------------
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  7 ++++++-
>   3 files changed, 27 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 510f2d1bb8bb..fd7d90136b94 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -1302,6 +1302,8 @@ static int unmap_sdma_queues(struct device_queue_manager *dqm)
>   static int map_queues_cpsch(struct device_queue_manager *dqm)
>   {
>        int retval;
> +     uint64_t rl_ib_gpu_addr;
> +     size_t rl_ib_size;
>
>        if (!dqm->sched_running)
>                return 0;
> @@ -1310,15 +1312,27 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
>        if (dqm->active_runlist)
>                return 0;
>
> -     retval = pm_send_runlist(&dqm->packets, &dqm->queues);
> +     retval = pm_create_runlist_ib(&dqm->packets, &dqm->queues,
> +                             &rl_ib_gpu_addr, &rl_ib_size);
> +     if (retval)
> +             goto fail_create_runlist_ib;
> +
> +     pr_debug("runlist IB address: 0x%llX\n", rl_ib_gpu_addr);
> +
> +     retval = pm_send_runlist(&dqm->packets, &dqm->queues,
> +                     rl_ib_gpu_addr, rl_ib_size);
>        pr_debug("%s sent runlist\n", __func__);
>        if (retval) {
>                pr_err("failed to execute runlist\n");
> -             return retval;
> +             goto fail_create_runlist_ib;
>        }
>        dqm->active_runlist = true;
>
>        return retval;
> +
> +fail_create_runlist_ib:
> +     pm_destroy_runlist_ib(&dqm->packets);
> +     return retval;
>   }
>
>   /* dqm->lock mutex has to be locked before calling this function */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> index 4a9433257428..6ec54e9f9392 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> @@ -116,7 +116,7 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
>        return retval;
>   }
>
> -static int pm_create_runlist_ib(struct packet_manager *pm,
> +int pm_create_runlist_ib(struct packet_manager *pm,
>                                struct list_head *queues,
>                                uint64_t *rl_gpu_addr,
>                                size_t *rl_size_bytes)
> @@ -149,7 +149,6 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
>                /* build map process packet */
>                if (proccesses_mapped >= pm->dqm->processes_count) {
>                        pr_debug("Not enough space left in runlist IB\n");
> -                     pm_destroy_runlist_ib(pm);
>                        return -ENOMEM;
>                }
>
> @@ -299,20 +298,13 @@ int pm_send_set_resources(struct packet_manager *pm,
>        return retval;
>   }
>
> -int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
> +int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues,
> +                     uint64_t rl_ib_gpu_addr, size_t rl_ib_size)
>   {
> -     uint64_t rl_gpu_ib_addr;
>        uint32_t *rl_buffer;
> -     size_t rl_ib_size, packet_size_dwords;
> +     size_t packet_size_dwords;
>        int retval;
>
> -     retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
> -                                     &rl_ib_size);
> -     if (retval)
> -             goto fail_create_runlist_ib;
> -
> -     pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
> -
>        packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
>        mutex_lock(&pm->lock);
>
> @@ -321,7 +313,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
>        if (retval)
>                goto fail_acquire_packet_buffer;
>
> -     retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
> +     retval = pm->pmf->runlist(pm, rl_buffer, rl_ib_gpu_addr,
>                                        rl_ib_size / sizeof(uint32_t), false);
>        if (retval)
>                goto fail_create_runlist;
> @@ -336,8 +328,6 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
>        kq_rollback_packet(pm->priv_queue);
>   fail_acquire_packet_buffer:
>        mutex_unlock(&pm->lock);
> -fail_create_runlist_ib:
> -     pm_destroy_runlist_ib(pm);
>        return retval;
>   }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 389cda7c8f1a..6accb605b9f0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -980,7 +980,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
>   void pm_uninit(struct packet_manager *pm);
>   int pm_send_set_resources(struct packet_manager *pm,
>                                struct scheduling_resources *res);
> -int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
> +int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues,
> +             uint64_t rl_ib_gpu_addr, size_t rl_ib_size);
>   int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
>                                uint32_t fence_value);
>
> @@ -989,6 +990,10 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
>                        uint32_t filter_param, bool reset,
>                        unsigned int sdma_engine);
>
> +int pm_create_runlist_ib(struct packet_manager *pm,
> +                             struct list_head *queues,
> +                             uint64_t *rl_gpu_addr,
> +                             size_t *rl_size_bytes);
>   void pm_destroy_runlist_ib(struct packet_manager *pm);
>
>   /* Following PM funcs can be shared among VI and AI */

[-- Attachment #1.2: Type: text/html, Size: 17376 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-11-22 21:50 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-21 21:26 [PATCH 1/2] drm/amdkfd: Rename pm_release_ib() to pm_destroy_runlist_ib() Yong Zhao
2019-11-21 21:26 ` Yong Zhao
     [not found] ` <20191121212639.4057-1-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
2019-11-21 21:26   ` [PATCH 2/2] drm/amdkfd: Move pm_create_runlist_ib() out of pm_send_runlist() Yong Zhao
2019-11-21 21:26     ` Yong Zhao
     [not found]     ` <20191121212639.4057-2-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
2019-11-22 21:21       ` Felix Kuehling
2019-11-22 21:21         ` Felix Kuehling
     [not found]         ` <529f5ed1-481c-4de3-763b-f70fa1e6080c-5C7GfCeVMHo@public.gmane.org>
2019-11-22 21:50           ` Zhao, Yong
2019-11-22 21:50             ` Zhao, Yong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.